diff options
321 files changed, 47258 insertions, 388 deletions
diff --git a/Documentation/arm/cluster-pm-race-avoidance.txt b/Documentation/arm/cluster-pm-race-avoidance.txt new file mode 100644 index 00000000000..750b6fc24af --- /dev/null +++ b/Documentation/arm/cluster-pm-race-avoidance.txt @@ -0,0 +1,498 @@ +Cluster-wide Power-up/power-down race avoidance algorithm +========================================================= + +This file documents the algorithm which is used to coordinate CPU and +cluster setup and teardown operations and to manage hardware coherency +controls safely. + +The section "Rationale" explains what the algorithm is for and why it is +needed. "Basic model" explains general concepts using a simplified view +of the system. The other sections explain the actual details of the +algorithm in use. + + +Rationale +--------- + +In a system containing multiple CPUs, it is desirable to have the +ability to turn off individual CPUs when the system is idle, reducing +power consumption and thermal dissipation. + +In a system containing multiple clusters of CPUs, it is also desirable +to have the ability to turn off entire clusters. + +Turning entire clusters off and on is a risky business, because it +involves performing potentially destructive operations affecting a group +of independently running CPUs, while the OS continues to run. This +means that we need some coordination in order to ensure that critical +cluster-level operations are only performed when it is truly safe to do +so. + +Simple locking may not be sufficient to solve this problem, because +mechanisms like Linux spinlocks may rely on coherency mechanisms which +are not immediately enabled when a cluster powers up. Since enabling or +disabling those mechanisms may itself be a non-atomic operation (such as +writing some hardware registers and invalidating large caches), other +methods of coordination are required in order to guarantee safe +power-down and power-up at the cluster level. + +The mechanism presented in this document describes a coherent memory +based protocol for performing the needed coordination. It aims to be as +lightweight as possible, while providing the required safety properties. + + +Basic model +----------- + +Each cluster and CPU is assigned a state, as follows: + + DOWN + COMING_UP + UP + GOING_DOWN + + +---------> UP ----------+ + | v + + COMING_UP GOING_DOWN + + ^ | + +--------- DOWN <--------+ + + +DOWN: The CPU or cluster is not coherent, and is either powered off or + suspended, or is ready to be powered off or suspended. + +COMING_UP: The CPU or cluster has committed to moving to the UP state. + It may be part way through the process of initialisation and + enabling coherency. + +UP: The CPU or cluster is active and coherent at the hardware + level. A CPU in this state is not necessarily being used + actively by the kernel. + +GOING_DOWN: The CPU or cluster has committed to moving to the DOWN + state. It may be part way through the process of teardown and + coherency exit. + + +Each CPU has one of these states assigned to it at any point in time. +The CPU states are described in the "CPU state" section, below. + +Each cluster is also assigned a state, but it is necessary to split the +state value into two parts (the "cluster" state and "inbound" state) and +to introduce additional states in order to avoid races between different +CPUs in the cluster simultaneously modifying the state. The cluster- +level states are described in the "Cluster state" section. + +To help distinguish the CPU states from cluster states in this +discussion, the state names are given a CPU_ prefix for the CPU states, +and a CLUSTER_ or INBOUND_ prefix for the cluster states. + + +CPU state +--------- + +In this algorithm, each individual core in a multi-core processor is +referred to as a "CPU". CPUs are assumed to be single-threaded: +therefore, a CPU can only be doing one thing at a single point in time. + +This means that CPUs fit the basic model closely. + +The algorithm defines the following states for each CPU in the system: + + CPU_DOWN + CPU_COMING_UP + CPU_UP + CPU_GOING_DOWN + + cluster setup and + CPU setup complete policy decision + +-----------> CPU_UP ------------+ + | v + + CPU_COMING_UP CPU_GOING_DOWN + + ^ | + +----------- CPU_DOWN <----------+ + policy decision CPU teardown complete + or hardware event + + +The definitions of the four states correspond closely to the states of +the basic model. + +Transitions between states occur as follows. + +A trigger event (spontaneous) means that the CPU can transition to the +next state as a result of making local progress only, with no +requirement for any external event to happen. + + +CPU_DOWN: + + A CPU reaches the CPU_DOWN state when it is ready for + power-down. On reaching this state, the CPU will typically + power itself down or suspend itself, via a WFI instruction or a + firmware call. + + Next state: CPU_COMING_UP + Conditions: none + + Trigger events: + + a) an explicit hardware power-up operation, resulting + from a policy decision on another CPU; + + b) a hardware event, such as an interrupt. + + +CPU_COMING_UP: + + A CPU cannot start participating in hardware coherency until the + cluster is set up and coherent. If the cluster is not ready, + then the CPU will wait in the CPU_COMING_UP state until the + cluster has been set up. + + Next state: CPU_UP + Conditions: The CPU's parent cluster must be in CLUSTER_UP. + Trigger events: Transition of the parent cluster to CLUSTER_UP. + + Refer to the "Cluster state" section for a description of the + CLUSTER_UP state. + + +CPU_UP: + When a CPU reaches the CPU_UP state, it is safe for the CPU to + start participating in local coherency. + + This is done by jumping to the kernel's CPU resume code. + + Note that the definition of this state is slightly different + from the basic model definition: CPU_UP does not mean that the + CPU is coherent yet, but it does mean that it is safe to resume + the kernel. The kernel handles the rest of the resume + procedure, so the remaining steps are not visible as part of the + race avoidance algorithm. + + The CPU remains in this state until an explicit policy decision + is made to shut down or suspend the CPU. + + Next state: CPU_GOING_DOWN + Conditions: none + Trigger events: explicit policy decision + + +CPU_GOING_DOWN: + + While in this state, the CPU exits coherency, including any + operations required to achieve this (such as cleaning data + caches). + + Next state: CPU_DOWN + Conditions: local CPU teardown complete + Trigger events: (spontaneous) + + +Cluster state +------------- + +A cluster is a group of connected CPUs with some common resources. +Because a cluster contains multiple CPUs, it can be doing multiple +things at the same time. This has some implications. In particular, a +CPU can start up while another CPU is tearing the cluster down. + +In this discussion, the "outbound side" is the view of the cluster state +as seen by a CPU tearing the cluster down. The "inbound side" is the +view of the cluster state as seen by a CPU setting the CPU up. + +In order to enable safe coordination in such situations, it is important +that a CPU which is setting up the cluster can advertise its state +independently of the CPU which is tearing down the cluster. For this +reason, the cluster state is split into two parts: + + "cluster" state: The global state of the cluster; or the state + on the outbound side: + + CLUSTER_DOWN + CLUSTER_UP + CLUSTER_GOING_DOWN + + "inbound" state: The state of the cluster on the inbound side. + + INBOUND_NOT_COMING_UP + INBOUND_COMING_UP + + + The different pairings of these states results in six possible + states for the cluster as a whole: + + CLUSTER_UP + +==========> INBOUND_NOT_COMING_UP -------------+ + # | + | + CLUSTER_UP <----+ | + INBOUND_COMING_UP | v + + ^ CLUSTER_GOING_DOWN CLUSTER_GOING_DOWN + # INBOUND_COMING_UP <=== INBOUND_NOT_COMING_UP + + CLUSTER_DOWN | | + INBOUND_COMING_UP <----+ | + | + ^ | + +=========== CLUSTER_DOWN <------------+ + INBOUND_NOT_COMING_UP + + Transitions -----> can only be made by the outbound CPU, and + only involve changes to the "cluster" state. + + Transitions ===##> can only be made by the inbound CPU, and only + involve changes to the "inbound" state, except where there is no + further transition possible on the outbound side (i.e., the + outbound CPU has put the cluster into the CLUSTER_DOWN state). + + The race avoidance algorithm does not provide a way to determine + which exact CPUs within the cluster play these roles. This must + be decided in advance by some other means. Refer to the section + "Last man and first man selection" for more explanation. + + + CLUSTER_DOWN/INBOUND_NOT_COMING_UP is the only state where the + cluster can actually be powered down. + + The parallelism of the inbound and outbound CPUs is observed by + the existence of two different paths from CLUSTER_GOING_DOWN/ + INBOUND_NOT_COMING_UP (corresponding to GOING_DOWN in the basic + model) to CLUSTER_DOWN/INBOUND_COMING_UP (corresponding to + COMING_UP in the basic model). The second path avoids cluster + teardown completely. + + CLUSTER_UP/INBOUND_COMING_UP is equivalent to UP in the basic + model. The final transition to CLUSTER_UP/INBOUND_NOT_COMING_UP + is trivial and merely resets the state machine ready for the + next cycle. + + Details of the allowable transitions follow. + + The next state in each case is notated + + <cluster state>/<inbound state> (<transitioner>) + + where the <transitioner> is the side on which the transition + can occur; either the inbound or the outbound side. + + +CLUSTER_DOWN/INBOUND_NOT_COMING_UP: + + Next state: CLUSTER_DOWN/INBOUND_COMING_UP (inbound) + Conditions: none + Trigger events: + + a) an explicit hardware power-up operation, resulting + from a policy decision on another CPU; + + b) a hardware event, such as an interrupt. + + +CLUSTER_DOWN/INBOUND_COMING_UP: + + In this state, an inbound CPU sets up the cluster, including + enabling of hardware coherency at the cluster level and any + other operations (such as cache invalidation) which are required + in order to achieve this. + + The purpose of this state is to do sufficient cluster-level + setup to enable other CPUs in the cluster to enter coherency + safely. + + Next state: CLUSTER_UP/INBOUND_COMING_UP (inbound) + Conditions: cluster-level setup and hardware coherency complete + Trigger events: (spontaneous) + + +CLUSTER_UP/INBOUND_COMING_UP: + + Cluster-level setup is complete and hardware coherency is + enabled for the cluster. Other CPUs in the cluster can safely + enter coherency. + + This is a transient state, leading immediately to + CLUSTER_UP/INBOUND_NOT_COMING_UP. All other CPUs on the cluster + should consider treat these two states as equivalent. + + Next state: CLUSTER_UP/INBOUND_NOT_COMING_UP (inbound) + Conditions: none + Trigger events: (spontaneous) + + +CLUSTER_UP/INBOUND_NOT_COMING_UP: + + Cluster-level setup is complete and hardware coherency is + enabled for the cluster. Other CPUs in the cluster can safely + enter coherency. + + The cluster will remain in this state until a policy decision is + made to power the cluster down. + + Next state: CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP (outbound) + Conditions: none + Trigger events: policy decision to power down the cluster + + +CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP: + + An outbound CPU is tearing the cluster down. The selected CPU + must wait in this state until all CPUs in the cluster are in the + CPU_DOWN state. + + When all CPUs are in the CPU_DOWN state, the cluster can be torn + down, for example by cleaning data caches and exiting + cluster-level coherency. + + To avoid wasteful unnecessary teardown operations, the outbound + should check the inbound cluster state for asynchronous + transitions to INBOUND_COMING_UP. Alternatively, individual + CPUs can be checked for entry into CPU_COMING_UP or CPU_UP. + + + Next states: + + CLUSTER_DOWN/INBOUND_NOT_COMING_UP (outbound) + Conditions: cluster torn down and ready to power off + Trigger events: (spontaneous) + + CLUSTER_GOING_DOWN/INBOUND_COMING_UP (inbound) + Conditions: none + Trigger events: + + a) an explicit hardware power-up operation, + resulting from a policy decision on another + CPU; + + b) a hardware event, such as an interrupt. + + +CLUSTER_GOING_DOWN/INBOUND_COMING_UP: + + The cluster is (or was) being torn down, but another CPU has + come online in the meantime and is trying to set up the cluster + again. + + If the outbound CPU observes this state, it has two choices: + + a) back out of teardown, restoring the cluster to the + CLUSTER_UP state; + + b) finish tearing the cluster down and put the cluster + in the CLUSTER_DOWN state; the inbound CPU will + set up the cluster again from there. + + Choice (a) permits the removal of some latency by avoiding + unnecessary teardown and setup operations in situations where + the cluster is not really going to be powered down. + + + Next states: + + CLUSTER_UP/INBOUND_COMING_UP (outbound) + Conditions: cluster-level setup and hardware + coherency complete + Trigger events: (spontaneous) + + CLUSTER_DOWN/INBOUND_COMING_UP (outbound) + Conditions: cluster torn down and ready to power off + Trigger events: (spontaneous) + + +Last man and First man selection +-------------------------------- + +The CPU which performs cluster tear-down operations on the outbound side +is commonly referred to as the "last man". + +The CPU which performs cluster setup on the inbound side is commonly +referred to as the "first man". + +The race avoidance algorithm documented above does not provide a +mechanism to choose which CPUs should play these roles. + + +Last man: + +When shutting down the cluster, all the CPUs involved are initially +executing Linux and hence coherent. Therefore, ordinary spinlocks can +be used to select a last man safely, before the CPUs become +non-coherent. + + +First man: + +Because CPUs may power up asynchronously in response to external wake-up +events, a dynamic mechanism is needed to make sure that only one CPU +attempts to play the first man role and do the cluster-level +initialisation: any other CPUs must wait for this to complete before +proceeding. + +Cluster-level initialisation may involve actions such as configuring +coherency controls in the bus fabric. + +The current implementation in mcpm_head.S uses a separate mutual exclusion +mechanism to do this arbitration. This mechanism is documented in +detail in vlocks.txt. + + +Features and Limitations +------------------------ + +Implementation: + + The current ARM-based implementation is split between + arch/arm/common/mcpm_head.S (low-level inbound CPU operations) and + arch/arm/common/mcpm_entry.c (everything else): + + __mcpm_cpu_going_down() signals the transition of a CPU to the + CPU_GOING_DOWN state. + + __mcpm_cpu_down() signals the transition of a CPU to the CPU_DOWN + state. + + A CPU transitions to CPU_COMING_UP and then to CPU_UP via the + low-level power-up code in mcpm_head.S. This could + involve CPU-specific setup code, but in the current + implementation it does not. + + __mcpm_outbound_enter_critical() and __mcpm_outbound_leave_critical() + handle transitions from CLUSTER_UP to CLUSTER_GOING_DOWN + and from there to CLUSTER_DOWN or back to CLUSTER_UP (in + the case of an aborted cluster power-down). + + These functions are more complex than the __mcpm_cpu_*() + functions due to the extra inter-CPU coordination which + is needed for safe transitions at the cluster level. + + A cluster transitions from CLUSTER_DOWN back to CLUSTER_UP via + the low-level power-up code in mcpm_head.S. This + typically involves platform-specific setup code, + provided by the platform-specific power_up_setup + function registered via mcpm_sync_init. + +Deep topologies: + + As currently described and implemented, the algorithm does not + support CPU topologies involving more than two levels (i.e., + clusters of clusters are not supported). The algorithm could be + extended by replicating the cluster-level states for the + additional topological levels, and modifying the transition + rules for the intermediate (non-outermost) cluster levels. + + +Colophon +-------- + +Originally created and documented by Dave Martin for Linaro Limited, in +collaboration with Nicolas Pitre and Achin Gupta. + +Copyright (C) 2012-2013 Linaro Limited +Distributed under the terms of Version 2 of the GNU General Public +License, as defined in linux/COPYING. diff --git a/Documentation/arm/vlocks.txt b/Documentation/arm/vlocks.txt new file mode 100644 index 00000000000..415960a9bab --- /dev/null +++ b/Documentation/arm/vlocks.txt @@ -0,0 +1,211 @@ +vlocks for Bare-Metal Mutual Exclusion +====================================== + +Voting Locks, or "vlocks" provide a simple low-level mutual exclusion +mechanism, with reasonable but minimal requirements on the memory +system. + +These are intended to be used to coordinate critical activity among CPUs +which are otherwise non-coherent, in situations where the hardware +provides no other mechanism to support this and ordinary spinlocks +cannot be used. + + +vlocks make use of the atomicity provided by the memory system for +writes to a single memory location. To arbitrate, every CPU "votes for +itself", by storing a unique number to a common memory location. The +final value seen in that memory location when all the votes have been +cast identifies the winner. + +In order to make sure that the election produces an unambiguous result +in finite time, a CPU will only enter the election in the first place if +no winner has been chosen and the election does not appear to have +started yet. + + +Algorithm +--------- + +The easiest way to explain the vlocks algorithm is with some pseudo-code: + + + int currently_voting[NR_CPUS] = { 0, }; + int last_vote = -1; /* no votes yet */ + + bool vlock_trylock(int this_cpu) + { + /* signal our desire to vote */ + currently_voting[this_cpu] = 1; + if (last_vote != -1) { + /* someone already volunteered himself */ + currently_voting[this_cpu] = 0; + return false; /* not ourself */ + } + + /* let's suggest ourself */ + last_vote = this_cpu; + currently_voting[this_cpu] = 0; + + /* then wait until everyone else is done voting */ + for_each_cpu(i) { + while (currently_voting[i] != 0) + /* wait */; + } + + /* result */ + if (last_vote == this_cpu) + return true; /* we won */ + return false; + } + + bool vlock_unlock(void) + { + last_vote = -1; + } + + +The currently_voting[] array provides a way for the CPUs to determine +whether an election is in progress, and plays a role analogous to the +"entering" array in Lamport's bakery algorithm [1]. + +However, once the election has started, the underlying memory system +atomicity is used to pick the winner. This avoids the need for a static +priority rule to act as a tie-breaker, or any counters which could +overflow. + +As long as the last_vote variable is globally visible to all CPUs, it +will contain only one value that won't change once every CPU has cleared +its currently_voting flag. + + +Features and limitations +------------------------ + + * vlocks are not intended to be fair. In the contended case, it is the + _last_ CPU which attempts to get the lock which will be most likely + to win. + + vlocks are therefore best suited to situations where it is necessary + to pick a unique winner, but it does not matter which CPU actually + wins. + + * Like other similar mechanisms, vlocks will not scale well to a large + number of CPUs. + + vlocks can be cascaded in a voting hierarchy to permit better scaling + if necessary, as in the following hypothetical example for 4096 CPUs: + + /* first level: local election */ + my_town = towns[(this_cpu >> 4) & 0xf]; + I_won = vlock_trylock(my_town, this_cpu & 0xf); + if (I_won) { + /* we won the town election, let's go for the state */ + my_state = states[(this_cpu >> 8) & 0xf]; + I_won = vlock_lock(my_state, this_cpu & 0xf)); + if (I_won) { + /* and so on */ + I_won = vlock_lock(the_whole_country, this_cpu & 0xf]; + if (I_won) { + /* ... */ + } + vlock_unlock(the_whole_country); + } + vlock_unlock(my_state); + } + vlock_unlock(my_town); + + +ARM implementation +------------------ + +The current ARM implementation [2] contains some optimisations beyond +the basic algorithm: + + * By packing the members of the currently_voting array close together, + we can read the whole array in one transaction (providing the number + of CPUs potentially contending the lock is small enough). This + reduces the number of round-trips required to external memory. + + In the ARM implementation, this means that we can use a single load + and comparison: + + LDR Rt, [Rn] + CMP Rt, #0 + + ...in place of code equivalent to: + + LDRB Rt, [Rn] + CMP Rt, #0 + LDRBEQ Rt, [Rn, #1] + CMPEQ Rt, #0 + LDRBEQ Rt, [Rn, #2] + CMPEQ Rt, #0 + LDRBEQ Rt, [Rn, #3] + CMPEQ Rt, #0 + + This cuts down on the fast-path latency, as well as potentially + reducing bus contention in contended cases. + + The optimisation relies on the fact that the ARM memory system + guarantees coherency between overlapping memory accesses of + different sizes, similarly to many other architectures. Note that + we do not care which element of currently_voting appears in which + bits of Rt, so there is no need to worry about endianness in this + optimisation. + + If there are too many CPUs to read the currently_voting array in + one transaction then multiple transations are still required. The + implementation uses a simple loop of word-sized loads for this + case. The number of transactions is still fewer than would be + required if bytes were loaded individually. + + + In principle, we could aggregate further by using LDRD or LDM, but + to keep the code simple this was not attempted in the initial + implementation. + + + * vlocks are currently only used to coordinate between CPUs which are + unable to enable their caches yet. This means that the + implementation removes many of the barriers which would be required + when executing the algorithm in cached memory. + + packing of the currently_voting array does not work with cached + memory unless all CPUs contending the lock are cache-coherent, due + to cache writebacks from one CPU clobbering values written by other + CPUs. (Though if all the CPUs are cache-coherent, you should be + probably be using proper spinlocks instead anyway). + + + * The "no votes yet" value used for the last_vote variable is 0 (not + -1 as in the pseudocode). This allows statically-allocated vlocks + to be implicitly initialised to an unlocked state simply by putting + them in .bss. + + An offset is added to each CPU's ID for the purpose of setting this + variable, so that no CPU uses the value 0 for its ID. + + +Colophon +-------- + +Originally created and documented by Dave Martin for Linaro Limited, for +use in ARM-based big.LITTLE platforms, with review and input gratefully +received from Nicolas Pitre and Achin Gupta. Thanks to Nicolas for +grabbing most of this text out of the relevant mail thread and writing +up the pseudocode. + +Copyright (C) 2012-2013 Linaro Limited +Distributed under the terms of Version 2 of the GNU General Public +License, as defined in linux/COPYING. + + +References +---------- + +[1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming + Problem", Communications of the ACM 17, 8 (August 1974), 453-455. + + http://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm + +[2] linux/arch/arm/common/vlock.S, www.kernel.org. diff --git a/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt new file mode 100644 index 00000000000..3b8fbf3c00c --- /dev/null +++ b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt @@ -0,0 +1,19 @@ +ARM Dual Cluster System Configuration Block +------------------------------------------- + +The Dual Cluster System Configuration Block (DCSCB) provides basic +functionality for controlling clocks, resets and configuration pins in +the Dual Cluster System implemented by the Real-Time System Model (RTSM). + +Required properties: + +- compatible : should be "arm,rtsm,dcscb" + +- reg : physical base address and the size of the registers window + +Example: + + dcscb@60000000 { + compatible = "arm,rtsm,dcscb"; + reg = <0x60000000 0x1000>; + }; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fbd3a07f20a..b67e45a9830 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1691,6 +1691,21 @@ config HAVE_ARM_TWD help This options enables support for the ARM timer and watchdog unit +config MCPM + bool "Multi-Cluster Power Management" + depends on CPU_V7 && SMP + help + This option provides the common power management infrastructure + for (multi-)cluster based systems, such as big.LITTLE based + systems. + +config BIG_LITTLE + bool "big.LITTLE support (Experimental)" + depends on CPU_V7 && SMP + select MCPM + help + This option enables support for the big.LITTLE architecture. + choice prompt "Memory split" default VMSPLIT_3G diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 9c6255884cb..2fc92bf10cc 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -169,7 +169,14 @@ dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2p-ca5s.dtb \ vexpress-v2p-ca9.dtb \ vexpress-v2p-ca15-tc1.dtb \ vexpress-v2p-ca15_a7.dtb \ - xenvm-4.2.dtb + xenvm-4.2.dtb \ + rtsm_ve-cortex_a9x2.dtb \ + rtsm_ve-cortex_a9x4.dtb \ + rtsm_ve-cortex_a15x1.dtb \ + rtsm_ve-cortex_a15x2.dtb \ + rtsm_ve-cortex_a15x4.dtb \ + rtsm_ve-v2p-ca15x1-ca7x1.dtb \ + rtsm_ve-v2p-ca15x4-ca7x4.dtb dtb-$(CONFIG_ARCH_VT8500) += vt8500-bv07.dtb \ wm8505-ref.dtb \ wm8650-mid.dtb \ diff --git a/arch/arm/boot/dts/clcd-panels.dtsi b/arch/arm/boot/dts/clcd-panels.dtsi new file mode 100644 index 00000000000..0b0ff6ead4b --- /dev/null +++ b/arch/arm/boot/dts/clcd-panels.dtsi @@ -0,0 +1,52 @@ +/* + * ARM Ltd. Versatile Express + * + */ + +/ { + panels { + panel@0 { + compatible = "panel"; + mode = "VGA"; + refresh = <60>; + xres = <640>; + yres = <480>; + pixclock = <39721>; + left_margin = <40>; + right_margin = <24>; + upper_margin = <32>; + lower_margin = <11>; + hsync_len = <96>; + vsync_len = <2>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + + panel@1 { + compatible = "panel"; + mode = "XVGA"; + refresh = <60>; + xres = <1024>; + yres = <768>; + pixclock = <15748>; + left_margin = <152>; + right_margin = <48>; + upper_margin = <23>; + lower_margin = <3>; + hsync_len = <104>; + vsync_len = <4>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + }; +}; diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts new file mode 100644 index 00000000000..c9eee916aa7 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts @@ -0,0 +1,159 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x1CT + * + * RTSM_VE_Cortex_A15x1.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x1"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x1", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts new file mode 100644 index 00000000000..853a166e3c3 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts @@ -0,0 +1,165 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x2CT + * + * RTSM_VE_Cortex_A15x2.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x2"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x2", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts new file mode 100644 index 00000000000..c1947a3a5c8 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts @@ -0,0 +1,177 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * + * RTSM_VE_Cortex_A15x4.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <3>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts new file mode 100644 index 00000000000..fca6b2f7967 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts @@ -0,0 +1,171 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA9MPx2CT + * + * RTSM_VE_Cortex_A9x2.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA9x2"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a9x2", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <1>; + #size-cells = <1>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <1>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x80000000>; + }; + + scu@2c000000 { + compatible = "arm,cortex-a9-scu"; + reg = <0x2c000000 0x58>; + }; + + timer@2c000600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x2c000600 0x20>; + interrupts = <1 13 0xf04>; + }; + + watchdog@2c000620 { + compatible = "arm,cortex-a9-twd-wdt"; + reg = <0x2c000620 0x20>; + interrupts = <1 14 0xf04>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x2c001000 0x1000>, + <0x2c000100 0x100>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0x08000000 0x04000000>, + <1 0 0x14000000 0x04000000>, + <2 0 0x18000000 0x04000000>, + <3 0 0x1c000000 0x04000000>, + <4 0 0x0c000000 0x04000000>, + <5 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts new file mode 100644 index 00000000000..fd8a6ed97a0 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts @@ -0,0 +1,183 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA9MPx4CT + * + * RTSM_VE_Cortex_A9x4.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA9x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a9x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <1>; + #size-cells = <1>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <3>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x80000000>; + }; + + scu@2c000000 { + compatible = "arm,cortex-a9-scu"; + reg = <0x2c000000 0x58>; + }; + + timer@2c000600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x2c000600 0x20>; + interrupts = <1 13 0xf04>; + }; + + watchdog@2c000620 { + compatible = "arm,cortex-a9-twd-wdt"; + reg = <0x2c000620 0x20>; + interrupts = <1 14 0xf04>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x2c001000 0x1000>, + <0x2c000100 0x100>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0x08000000 0x04000000>, + <1 0 0x14000000 0x04000000>, + <2 0 0x18000000 0x04000000>, + <3 0 0x1c000000 0x04000000>, + <4 0 0x0c000000 0x04000000>, + <5 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi new file mode 100644 index 00000000000..6d125662612 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi @@ -0,0 +1,224 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * Motherboard component + * + * VEMotherBoard.lisa + */ + + motherboard { + compatible = "arm,vexpress,v2m-p1", "simple-bus"; + arm,hbi = <0x190>; + arm,vexpress,site = <0>; + arm,v2m-memory-map = "rs1"; + #address-cells = <2>; /* SMB chipselect number and offset */ + #size-cells = <1>; + #interrupt-cells = <1>; + ranges; + + flash@0,00000000 { + compatible = "arm,vexpress-flash", "cfi-flash"; + reg = <0 0x00000000 0x04000000>, + <4 0x00000000 0x04000000>; + bank-width = <4>; + }; + + vram@2,00000000 { + compatible = "arm,vexpress-vram"; + reg = <2 0x00000000 0x00800000>; + }; + + ethernet@2,02000000 { + compatible = "smsc,lan91c111"; + reg = <2 0x02000000 0x10000>; + interrupts = <15>; + }; + + iofpga@3,00000000 { + compatible = "arm,amba-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 3 0 0x200000>; + + v2m_sysreg: sysreg@010000 { + compatible = "arm,vexpress-sysreg"; + reg = <0x010000 0x1000>; + gpio-controller; + #gpio-cells = <2>; + }; + + v2m_sysctl: sysctl@020000 { + compatible = "arm,sp810", "arm,primecell"; + reg = <0x020000 0x1000>; + clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&smbclk>; + clock-names = "refclk", "timclk", "apb_pclk"; + #clock-cells = <1>; + clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3"; + }; + + aaci@040000 { + compatible = "arm,pl041", "arm,primecell"; + reg = <0x040000 0x1000>; + interrupts = <11>; + clocks = <&smbclk>; + clock-names = "apb_pclk"; + }; + + mmci@050000 { + compatible = "arm,pl180", "arm,primecell"; + reg = <0x050000 0x1000>; + interrupts = <9 10>; + cd-gpios = <&v2m_sysreg 0 0>; + wp-gpios = <&v2m_sysreg 1 0>; + max-frequency = <12000000>; + vmmc-supply = <&v2m_fixed_3v3>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "mclk", "apb_pclk"; + }; + + kmi@060000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x060000 0x1000>; + interrupts = <12>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + kmi@070000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x070000 0x1000>; + interrupts = <13>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + v2m_serial0: uart@090000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x090000 0x1000>; + interrupts = <5>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial1: uart@0a0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0a0000 0x1000>; + interrupts = <6>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial2: uart@0b0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0b0000 0x1000>; + interrupts = <7>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial3: uart@0c0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0c0000 0x1000>; + interrupts = <8>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + wdt@0f0000 { + compatible = "arm,sp805", "arm,primecell"; + reg = <0x0f0000 0x1000>; + interrupts = <0>; + clocks = <&v2m_refclk32khz>, <&smbclk>; + clock-names = "wdogclk", "apb_pclk"; + }; + + v2m_timer01: timer@110000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x110000 0x1000>; + interrupts = <2>; + clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&smbclk>; + clock-names = "timclken1", "timclken2", "apb_pclk"; + }; + + v2m_timer23: timer@120000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x120000 0x1000>; + interrupts = <3>; + clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&smbclk>; + clock-names = "timclken1", "timclken2", "apb_pclk"; + }; + + rtc@170000 { + compatible = "arm,pl031", "arm,primecell"; + reg = <0x170000 0x1000>; + interrupts = <4>; + clocks = <&smbclk>; + clock-names = "apb_pclk"; + }; + + clcd@1f0000 { + compatible = "arm,pl111", "arm,primecell"; + reg = <0x1f0000 0x1000>; + interrupts = <14>; + clocks = <&v2m_oscclk1>, <&smbclk>; + clock-names = "v2m:oscclk1", "apb_pclk"; + mode = "VGA"; + use_dma = <0>; + framebuffer = <0x18000000 0x00180000>; + }; + }; + + v2m_fixed_3v3: fixedregulator@0 { + compatible = "regulator-fixed"; + regulator-name = "3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + v2m_clk24mhz: clk24mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <24000000>; + clock-output-names = "v2m:clk24mhz"; + }; + + v2m_refclk1mhz: refclk1mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <1000000>; + clock-output-names = "v2m:refclk1mhz"; + }; + + v2m_refclk32khz: refclk32khz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "v2m:refclk32khz"; + }; + + mcc { + compatible = "simple-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + v2m_oscclk1: osc@1 { + /* CLCD clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <23750000 63500000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk1"; + }; + + muxfpga@0 { + compatible = "arm,vexpress-muxfpga"; + arm,vexpress-sysreg,func = <7 0>; + }; + + shutdown@0 { + compatible = "arm,vexpress-shutdown"; + arm,vexpress-sysreg,func = <8 0>; + }; + }; + }; diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts new file mode 100644 index 00000000000..55d4f5ce019 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts @@ -0,0 +1,227 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * ARMCortexA7x4CT + * RTSM_VE_Cortex_A15x1_A7x1.lisa + */ + +/dts-v1/; + +/memreserve/ 0xff000000 0x01000000; + +/ { + model = "RTSM_VE_CortexA15x1-A7x1"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x1_a7x1", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + clusters { + #address-cells = <1>; + #size-cells = <0>; + + cluster0: cluster@0 { + reg = <0>; +// freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + }; + }; + + cluster1: cluster@1 { + reg = <1>; +// freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core1: core@0 { + reg = <0>; + }; + + }; + }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cluster = <&cluster0>; + core = <&core0>; +// clock-frequency = <1000000000>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + cluster = <&cluster1>; + core = <&core1>; +// clock-frequency = <800000000>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + cci@2c090000 { + compatible = "arm,cci"; + reg = <0 0x2c090000 0 0x8000>; + }; + + dcscb@60000000 { + compatible = "arm,rtsm,dcscb"; + reg = <0 0x60000000 0 0x1000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + + gic-cpuif@0 { + compatible = "arm,gic-cpuif"; + cpuif-id = <0>; + cpu = <&cpu0>; + }; + gic-cpuif@1 { + compatible = "arm,gic-cpuif"; + cpuif-id = <1>; + cpu = <&cpu1>; + }; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts new file mode 100644 index 00000000000..a2d4441568a --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts @@ -0,0 +1,335 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * ARMCortexA7x4CT + * RTSM_VE_Cortex_A15x4_A7x4.lisa + */ + +/dts-v1/; + +/memreserve/ 0xff000000 0x01000000; + +/ { + model = "RTSM_VE_CortexA15x4-A7x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x4_a7x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + clusters { + #address-cells = <1>; + #size-cells = <0>; + + cluster0: cluster@0 { + reg = <0>; +// freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + core1: core@1 { + reg = <1>; + }; + + core2: core@2 { + reg = <2>; + }; + + core3: core@3 { + reg = <3>; + }; + + }; + }; + + cluster1: cluster@1 { + reg = <1>; +// freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core4: core@0 { + reg = <0>; + }; + + core5: core@1 { + reg = <1>; + }; + + core6: core@2 { + reg = <2>; + }; + + core7: core@3 { + reg = <3>; + }; + + }; + }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cluster = <&cluster0>; + core = <&core0>; +// clock-frequency = <1000000000>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + cluster = <&cluster0>; + core = <&core1>; +// clock-frequency = <1000000000>; + }; + + cpu2: cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <2>; + cluster = <&cluster0>; + core = <&core2>; +// clock-frequency = <1000000000>; + }; + + cpu3: cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <3>; + cluster = <&cluster0>; + core = <&core3>; +// clock-frequency = <1000000000>; + }; + + cpu4: cpu@4 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + cluster = <&cluster1>; + core = <&core4>; +// clock-frequency = <800000000>; + }; + + cpu5: cpu@5 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x101>; + cluster = <&cluster1>; + core = <&core5>; +// clock-frequency = <800000000>; + }; + + cpu6: cpu@6 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x102>; + cluster = <&cluster1>; + core = <&core6>; +// clock-frequency = <800000000>; + }; + + cpu7: cpu@7 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x103>; + cluster = <&cluster1>; + core = <&core7>; +// clock-frequency = <800000000>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + cci@2c090000 { + compatible = "arm,cci"; + reg = <0 0x2c090000 0 0x8000>; + }; + + dcscb@60000000 { + compatible = "arm,rtsm,dcscb"; + reg = <0 0x60000000 0 0x1000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + + gic-cpuif@0 { + compatible = "arm,gic-cpuif"; + cpuif-id = <0>; + cpu = <&cpu0>; + }; + gic-cpuif@1 { + compatible = "arm,gic-cpuif"; + cpuif-id = <1>; + cpu = <&cpu1>; + }; + gic-cpuif@2 { + compatible = "arm,gic-cpuif"; + cpuif-id = <2>; + cpu = <&cpu2>; + }; + gic-cpuif@3 { + compatible = "arm,gic-cpuif"; + cpuif-id = <3>; + cpu = <&cpu3>; + }; + gic-cpuif@4 { + compatible = "arm,gic-cpuif"; + cpuif-id = <4>; + cpu = <&cpu4>; + }; + gic-cpuif@5 { + compatible = "arm,gic-cpuif"; + cpuif-id = <5>; + cpu = <&cpu5>; + }; + gic-cpuif@6 { + compatible = "arm,gic-cpuif"; + cpuif-id = <6>; + cpu = <&cpu6>; + }; + gic-cpuif@7 { + compatible = "arm,gic-cpuif"; + cpuif-id = <7>; + cpu = <&cpu7>; + }; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi index ac870fb3fa0..9584232ee6b 100644 --- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi @@ -228,6 +228,7 @@ }; clcd@1f0000 { + status = "disabled"; compatible = "arm,pl111", "arm,primecell"; reg = <0x1f0000 0x1000>; interrupts = <14>; diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi b/arch/arm/boot/dts/vexpress-v2m.dtsi index f1420368355..6593398c11a 100644 --- a/arch/arm/boot/dts/vexpress-v2m.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m.dtsi @@ -227,6 +227,7 @@ }; clcd@1f000 { + status = "disabled"; compatible = "arm,pl111", "arm,primecell"; reg = <0x1f000 0x1000>; interrupts = <14>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts index 73187173117..cc6a8c0cfe3 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts @@ -9,6 +9,8 @@ /dts-v1/; +/memreserve/ 0xbf000000 0x01000000; + / { model = "V2P-CA15"; arm,hbi = <0x237>; @@ -57,6 +59,8 @@ interrupts = <0 85 4>; clocks = <&oscclk5>; clock-names = "pxlclk"; + mode = "1024x768-16@60"; + framebuffer = <0 0xff000000 0 0x01000000>; }; memory-controller@2b0a0000 { @@ -117,7 +121,7 @@ }; pmu { - compatible = "arm,cortex-a15-pmu", "arm,cortex-a9-pmu"; + compatible = "arm,cortex-a15-pmu"; interrupts = <0 68 4>, <0 69 4>; }; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index dfe371ec274..b37fdd8c146 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -9,11 +9,13 @@ /dts-v1/; +/memreserve/ 0xff000000 0x01000000; + / { model = "V2P-CA15_CA7"; arm,hbi = <0x249>; arm,vexpress,site = <0xf>; - compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress"; + compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress", "arm,generic"; interrupt-parent = <&gic>; #address-cells = <2>; #size-cells = <2>; @@ -29,6 +31,48 @@ i2c1 = &v2m_i2c_pcie; }; + clusters { + #address-cells = <1>; + #size-cells = <0>; + + cluster0: cluster@0 { + reg = <0>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + core1: core@1 { + reg = <1>; + }; + + }; + }; + + cluster1: cluster@1 { + reg = <1>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core2: core@0 { + reg = <0>; + }; + + core3: core@1 { + reg = <1>; + }; + + core4: core@2 { + reg = <2>; + }; + }; + }; + }; + cpus { #address-cells = <1>; #size-cells = <0>; @@ -37,36 +81,51 @@ device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <0>; + cluster = <&cluster0>; + core = <&core0>; + clock-frequency = <1000000000>; }; cpu1: cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <1>; + cluster = <&cluster0>; + core = <&core1>; + clock-frequency = <1000000000>; }; cpu2: cpu@2 { device_type = "cpu"; compatible = "arm,cortex-a7"; reg = <0x100>; + cluster = <&cluster1>; + core = <&core2>; + clock-frequency = <800000000>; }; cpu3: cpu@3 { device_type = "cpu"; compatible = "arm,cortex-a7"; reg = <0x101>; + cluster = <&cluster1>; + core = <&core3>; + clock-frequency = <800000000>; }; cpu4: cpu@4 { device_type = "cpu"; compatible = "arm,cortex-a7"; reg = <0x102>; + cluster = <&cluster1>; + core = <&core4>; + clock-frequency = <800000000>; }; }; memory@80000000 { device_type = "memory"; - reg = <0 0x80000000 0 0x40000000>; + reg = <0 0x80000000 0 0x80000000>; }; wdt@2a490000 { @@ -81,6 +140,8 @@ compatible = "arm,hdlcd"; reg = <0 0x2b000000 0 0x1000>; interrupts = <0 85 4>; + mode = "1024x768-16@60"; + framebuffer = <0 0xff000000 0 0x01000000>; clocks = <&oscclk5>; clock-names = "pxlclk"; }; @@ -102,6 +163,44 @@ <0 0x2c004000 0 0x2000>, <0 0x2c006000 0 0x2000>; interrupts = <1 9 0xf04>; + + gic-cpuif@0 { + compatible = "arm,gic-cpuif"; + cpuif-id = <0>; + cpu = <&cpu0>; + }; + gic-cpuif@1 { + compatible = "arm,gic-cpuif"; + cpuif-id = <1>; + cpu = <&cpu1>; + }; + gic-cpuif@2 { + compatible = "arm,gic-cpuif"; + cpuif-id = <2>; + cpu = <&cpu2>; + }; + + gic-cpuif@3 { + compatible = "arm,gic-cpuif"; + cpuif-id = <3>; + cpu = <&cpu3>; + }; + + gic-cpuif@4 { + compatible = "arm,gic-cpuif"; + cpuif-id = <4>; + cpu = <&cpu4>; + }; + }; + + cci@2c090000 { + compatible = "arm,cci"; + reg = <0 0x2c090000 0 0x10000>; + interrupts = <0 101 4>, + <0 102 4>, + <0 103 4>, + <0 104 4>, + <0 105 4>; }; memory-controller@7ffd0000 { @@ -125,6 +224,12 @@ clock-names = "apb_pclk"; }; + spc@7fff0000 { + compatible = "arm,spc"; + reg = <0 0x7fff0000 0 0x1000>; + interrupts = <0 95 4>; + }; + timer { compatible = "arm,armv7-timer"; interrupts = <1 13 0xf08>, @@ -133,12 +238,21 @@ <1 10 0xf08>; }; - pmu { - compatible = "arm,cortex-a15-pmu", "arm,cortex-a9-pmu"; + pmu_a15 { + compatible = "arm,cortex-a15-pmu"; + cluster = <&cluster0>; interrupts = <0 68 4>, <0 69 4>; }; + pmu_a7 { + compatible = "arm,cortex-a7-pmu"; + cluster = <&cluster1>; + interrupts = <0 128 4>, + <0 129 4>, + <0 130 4>; + }; + oscclk6a: oscclk6a { /* Reference 24MHz clock */ compatible = "fixed-clock"; @@ -147,6 +261,15 @@ clock-output-names = "oscclk6a"; }; + psci { + compatible = "arm,psci"; + method = "smc"; + cpu_suspend = <0x80100001>; + cpu_off = <0x80100002>; + cpu_on = <0x80100003>; + migrate = <0x80100004>; + }; + dcc { compatible = "arm,vexpress,config-bus"; arm,vexpress,config-bridge = <&v2m_sysreg>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts index 6328cbc71d3..cf633ed6a1b 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts @@ -9,6 +9,8 @@ /dts-v1/; +/memreserve/ 0xbf000000 0x01000000; + / { model = "V2P-CA5s"; arm,hbi = <0x225>; @@ -59,6 +61,8 @@ interrupts = <0 85 4>; clocks = <&oscclk3>; clock-names = "pxlclk"; + mode = "640x480-16@60"; + framebuffer = <0xbf000000 0x01000000>; }; memory-controller@2a150000 { @@ -111,7 +115,7 @@ }; pmu { - compatible = "arm,cortex-a5-pmu", "arm,cortex-a9-pmu"; + compatible = "arm,cortex-a5-pmu"; interrupts = <0 68 4>, <0 69 4>; }; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts index 1420bb14d95..663fa5927e7 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts @@ -9,6 +9,8 @@ /dts-v1/; +/include/ "clcd-panels.dtsi" + / { model = "V2P-CA9"; arm,hbi = <0x191>; @@ -73,6 +75,8 @@ interrupts = <0 44 4>; clocks = <&oscclk1>, <&oscclk2>; clock-names = "clcdclk", "apb_pclk"; + mode = "XVGA"; + use_dma = <1>; }; memory-controller@100e0000 { diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index dc8dd0de5c0..bd48ab52544 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -11,3 +11,5 @@ obj-$(CONFIG_SHARP_PARAM) += sharpsl_param.o obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o +obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o +CFLAGS_REMOVE_mcpm_entry.o = -pg diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c new file mode 100644 index 00000000000..370236dd1a0 --- /dev/null +++ b/arch/arm/common/mcpm_entry.c @@ -0,0 +1,263 @@ +/* + * arch/arm/common/mcpm_entry.c -- entry point for multi-cluster PM + * + * Created by: Nicolas Pitre, March 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/irqflags.h> + +#include <asm/mcpm.h> +#include <asm/cacheflush.h> +#include <asm/idmap.h> +#include <asm/cputype.h> + +extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER]; + +void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr) +{ + unsigned long val = ptr ? virt_to_phys(ptr) : 0; + mcpm_entry_vectors[cluster][cpu] = val; + sync_cache_w(&mcpm_entry_vectors[cluster][cpu]); +} + +static const struct mcpm_platform_ops *platform_ops; + +int __init mcpm_platform_register(const struct mcpm_platform_ops *ops) +{ + if (platform_ops) + return -EBUSY; + platform_ops = ops; + return 0; +} + +int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster) +{ + if (!platform_ops) + return -EUNATCH; /* try not to shadow power_up errors */ + might_sleep(); + return platform_ops->power_up(cpu, cluster); +} + +typedef void (*phys_reset_t)(unsigned long); + +void mcpm_cpu_power_down(void) +{ + phys_reset_t phys_reset; + + BUG_ON(!platform_ops); + BUG_ON(!irqs_disabled()); + + /* + * Do this before calling into the power_down method, + * as it might not always be safe to do afterwards. + */ + setup_mm_for_reboot(); + + platform_ops->power_down(); + + /* + * It is possible for a power_up request to happen concurrently + * with a power_down request for the same CPU. In this case the + * power_down method might not be able to actually enter a + * powered down state with the WFI instruction if the power_up + * method has removed the required reset condition. The + * power_down method is then allowed to return. We must perform + * a re-entry in the kernel as if the power_up method just had + * deasserted reset on the CPU. + * + * To simplify race issues, the platform specific implementation + * must accommodate for the possibility of unordered calls to + * power_down and power_up with a usage count. Therefore, if a + * call to power_up is issued for a CPU that is not down, then + * the next call to power_down must not attempt a full shutdown + * but only do the minimum (normally disabling L1 cache and CPU + * coherency) and return just as if a concurrent power_up request + * had happened as described above. + */ + + phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); + phys_reset(virt_to_phys(mcpm_entry_point)); + + /* should never get here */ + BUG(); +} + +void mcpm_cpu_suspend(u64 expected_residency) +{ + phys_reset_t phys_reset; + + BUG_ON(!platform_ops); + BUG_ON(!irqs_disabled()); + + /* Very similar to mcpm_cpu_power_down() */ + setup_mm_for_reboot(); + platform_ops->suspend(expected_residency); + phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); + phys_reset(virt_to_phys(mcpm_entry_point)); + BUG(); +} + +int mcpm_cpu_powered_up(void) +{ + if (!platform_ops) + return -EUNATCH; + if (platform_ops->powered_up) + platform_ops->powered_up(); + return 0; +} + +struct sync_struct mcpm_sync; + +/* + * __mcpm_cpu_going_down: Indicates that the cpu is being torn down. + * This must be called at the point of committing to teardown of a CPU. + * The CPU cache (SCTRL.C bit) is expected to still be active. + */ +void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster) +{ + mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_GOING_DOWN; + sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); +} + +/* + * __mcpm_cpu_down: Indicates that cpu teardown is complete and that the + * cluster can be torn down without disrupting this CPU. + * To avoid deadlocks, this must be called before a CPU is powered down. + * The CPU cache (SCTRL.C bit) is expected to be off. + * However L2 cache might or might not be active. + */ +void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster) +{ + dmb(); + mcpm_sync.clusters[cluster].cpus[cpu].cpu = CPU_DOWN; + sync_cache_w(&mcpm_sync.clusters[cluster].cpus[cpu].cpu); + dsb_sev(); +} + +/* + * __mcpm_outbound_leave_critical: Leave the cluster teardown critical section. + * @state: the final state of the cluster: + * CLUSTER_UP: no destructive teardown was done and the cluster has been + * restored to the previous state (CPU cache still active); or + * CLUSTER_DOWN: the cluster has been torn-down, ready for power-off + * (CPU cache disabled, L2 cache either enabled or disabled). + */ +void __mcpm_outbound_leave_critical(unsigned int cluster, int state) +{ + dmb(); + mcpm_sync.clusters[cluster].cluster = state; + sync_cache_w(&mcpm_sync.clusters[cluster].cluster); + dsb_sev(); +} + +/* + * __mcpm_outbound_enter_critical: Enter the cluster teardown critical section. + * This function should be called by the last man, after local CPU teardown + * is complete. CPU cache expected to be active. + * + * Returns: + * false: the critical section was not entered because an inbound CPU was + * observed, or the cluster is already being set up; + * true: the critical section was entered: it is now safe to tear down the + * cluster. + */ +bool __mcpm_outbound_enter_critical(unsigned int cpu, unsigned int cluster) +{ + unsigned int i; + struct mcpm_sync_struct *c = &mcpm_sync.clusters[cluster]; + + /* Warn inbound CPUs that the cluster is being torn down: */ + c->cluster = CLUSTER_GOING_DOWN; + sync_cache_w(&c->cluster); + + /* Back out if the inbound cluster is already in the critical region: */ + sync_cache_r(&c->inbound); + if (c->inbound == INBOUND_COMING_UP) + goto abort; + + /* + * Wait for all CPUs to get out of the GOING_DOWN state, so that local + * teardown is complete on each CPU before tearing down the cluster. + * + * If any CPU has been woken up again from the DOWN state, then we + * shouldn't be taking the cluster down at all: abort in that case. + */ + sync_cache_r(&c->cpus); + for (i = 0; i < MAX_CPUS_PER_CLUSTER; i++) { + int cpustate; + + if (i == cpu) + continue; + + while (1) { + cpustate = c->cpus[i].cpu; + if (cpustate != CPU_GOING_DOWN) + break; + + wfe(); + sync_cache_r(&c->cpus[i].cpu); + } + + switch (cpustate) { + case CPU_DOWN: + continue; + + default: + goto abort; + } + } + + return true; + +abort: + __mcpm_outbound_leave_critical(cluster, CLUSTER_UP); + return false; +} + +int __mcpm_cluster_state(unsigned int cluster) +{ + sync_cache_r(&mcpm_sync.clusters[cluster].cluster); + return mcpm_sync.clusters[cluster].cluster; +} + +extern unsigned long mcpm_power_up_setup_phys; + +int __init mcpm_sync_init( + void (*power_up_setup)(unsigned int affinity_level)) +{ + unsigned int i, j, mpidr, this_cluster; + + BUILD_BUG_ON(MCPM_SYNC_CLUSTER_SIZE * MAX_NR_CLUSTERS != sizeof mcpm_sync); + BUG_ON((unsigned long)&mcpm_sync & (__CACHE_WRITEBACK_GRANULE - 1)); + + /* + * Set initial CPU and cluster states. + * Only one cluster is assumed to be active at this point. + */ + for (i = 0; i < MAX_NR_CLUSTERS; i++) { + mcpm_sync.clusters[i].cluster = CLUSTER_DOWN; + mcpm_sync.clusters[i].inbound = INBOUND_NOT_COMING_UP; + for (j = 0; j < MAX_CPUS_PER_CLUSTER; j++) + mcpm_sync.clusters[i].cpus[j].cpu = CPU_DOWN; + } + mpidr = read_cpuid_mpidr(); + this_cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + for_each_online_cpu(i) + mcpm_sync.clusters[this_cluster].cpus[i].cpu = CPU_UP; + mcpm_sync.clusters[this_cluster].cluster = CLUSTER_UP; + sync_cache_w(&mcpm_sync); + + if (power_up_setup) { + mcpm_power_up_setup_phys = virt_to_phys(power_up_setup); + sync_cache_w(&mcpm_power_up_setup_phys); + } + + return 0; +} diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S new file mode 100644 index 00000000000..8178705c4b2 --- /dev/null +++ b/arch/arm/common/mcpm_head.S @@ -0,0 +1,219 @@ +/* + * arch/arm/common/mcpm_head.S -- kernel entry point for multi-cluster PM + * + * Created by: Nicolas Pitre, March 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * + * Refer to Documentation/arm/cluster-pm-race-avoidance.txt + * for details of the synchronisation algorithms used here. + */ + +#include <linux/linkage.h> +#include <asm/mcpm.h> + +#include "vlock.h" + +.if MCPM_SYNC_CLUSTER_CPUS +.error "cpus must be the first member of struct mcpm_sync_struct" +.endif + + .macro pr_dbg string +#if defined(CONFIG_DEBUG_LL) && defined(DEBUG) + b 1901f +1902: .asciz "CPU" +1903: .asciz " cluster" +1904: .asciz ": \string" + .align +1901: adr r0, 1902b + bl printascii + mov r0, r9 + bl printhex8 + adr r0, 1903b + bl printascii + mov r0, r10 + bl printhex8 + adr r0, 1904b + bl printascii +#endif + .endm + + .arm + .align + +ENTRY(mcpm_entry_point) + + THUMB( adr r12, BSYM(1f) ) + THUMB( bx r12 ) + THUMB( .thumb ) +1: + mrc p15, 0, r0, c0, c0, 5 @ MPIDR + ubfx r9, r0, #0, #8 @ r9 = cpu + ubfx r10, r0, #8, #8 @ r10 = cluster + mov r3, #MAX_CPUS_PER_CLUSTER + mla r4, r3, r10, r9 @ r4 = canonical CPU index + cmp r4, #(MAX_CPUS_PER_CLUSTER * MAX_NR_CLUSTERS) + blo 2f + + /* We didn't expect this CPU. Try to cheaply make it quiet. */ +1: wfi + wfe + b 1b + +2: pr_dbg "kernel mcpm_entry_point\n" + + /* + * MMU is off so we need to get to various variables in a + * position independent way. + */ + adr r5, 3f + ldmia r5, {r6, r7, r8, r11} + add r6, r5, r6 @ r6 = mcpm_entry_vectors + ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys + add r8, r5, r8 @ r8 = mcpm_sync + add r11, r5, r11 @ r11 = first_man_locks + + mov r0, #MCPM_SYNC_CLUSTER_SIZE + mla r8, r0, r10, r8 @ r8 = sync cluster base + + @ Signal that this CPU is coming UP: + mov r0, #CPU_COMING_UP + mov r5, #MCPM_SYNC_CPU_SIZE + mla r5, r9, r5, r8 @ r5 = sync cpu address + strb r0, [r5] + + @ At this point, the cluster cannot unexpectedly enter the GOING_DOWN + @ state, because there is at least one active CPU (this CPU). + + mov r0, #VLOCK_SIZE + mla r11, r0, r10, r11 @ r11 = cluster first man lock + mov r0, r11 + mov r1, r9 @ cpu + bl vlock_trylock @ implies DMB + + cmp r0, #0 @ failed to get the lock? + bne mcpm_setup_wait @ wait for cluster setup if so + + ldrb r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER] + cmp r0, #CLUSTER_UP @ cluster already up? + bne mcpm_setup @ if not, set up the cluster + + @ Otherwise, release the first man lock and skip setup: + mov r0, r11 + bl vlock_unlock + b mcpm_setup_complete + +mcpm_setup: + @ Control dependency implies strb not observable before previous ldrb. + + @ Signal that the cluster is being brought up: + mov r0, #INBOUND_COMING_UP + strb r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND] + dmb + + @ Any CPU trying to take the cluster into CLUSTER_GOING_DOWN from this + @ point onwards will observe INBOUND_COMING_UP and abort. + + @ Wait for any previously-pending cluster teardown operations to abort + @ or complete: +mcpm_teardown_wait: + ldrb r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER] + cmp r0, #CLUSTER_GOING_DOWN + bne first_man_setup + wfe + b mcpm_teardown_wait + +first_man_setup: + dmb + + @ If the outbound gave up before teardown started, skip cluster setup: + + cmp r0, #CLUSTER_UP + beq mcpm_setup_leave + + @ power_up_setup is now responsible for setting up the cluster: + + cmp r7, #0 + mov r0, #1 @ second (cluster) affinity level + blxne r7 @ Call power_up_setup if defined + dmb + + mov r0, #CLUSTER_UP + strb r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER] + dmb + +mcpm_setup_leave: + @ Leave the cluster setup critical section: + + mov r0, #INBOUND_NOT_COMING_UP + strb r0, [r8, #MCPM_SYNC_CLUSTER_INBOUND] + dsb + sev + + mov r0, r11 + bl vlock_unlock @ implies DMB + b mcpm_setup_complete + + @ In the contended case, non-first men wait here for cluster setup + @ to complete: +mcpm_setup_wait: + ldrb r0, [r8, #MCPM_SYNC_CLUSTER_CLUSTER] + cmp r0, #CLUSTER_UP + wfene + bne mcpm_setup_wait + dmb + +mcpm_setup_complete: + @ If a platform-specific CPU setup hook is needed, it is + @ called from here. + + cmp r7, #0 + mov r0, #0 @ first (CPU) affinity level + blxne r7 @ Call power_up_setup if defined + dmb + + @ Mark the CPU as up: + + mov r0, #CPU_UP + strb r0, [r5] + + @ Observability order of CPU_UP and opening of the gate does not matter. + +mcpm_entry_gated: + ldr r5, [r6, r4, lsl #2] @ r5 = CPU entry vector + cmp r5, #0 + wfeeq + beq mcpm_entry_gated + dmb + + pr_dbg "released\n" + bx r5 + + .align 2 + +3: .word mcpm_entry_vectors - . + .word mcpm_power_up_setup_phys - 3b + .word mcpm_sync - 3b + .word first_man_locks - 3b + +ENDPROC(mcpm_entry_point) + + .bss + + .align CACHE_WRITEBACK_ORDER + .type first_man_locks, #object +first_man_locks: + .space VLOCK_SIZE * MAX_NR_CLUSTERS + .align CACHE_WRITEBACK_ORDER + + .type mcpm_entry_vectors, #object +ENTRY(mcpm_entry_vectors) + .space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER + + .type mcpm_power_up_setup_phys, #object +ENTRY(mcpm_power_up_setup_phys) + .space 4 @ set by mcpm_sync_init() diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c new file mode 100644 index 00000000000..3caed0db698 --- /dev/null +++ b/arch/arm/common/mcpm_platsmp.c @@ -0,0 +1,89 @@ +/* + * linux/arch/arm/mach-vexpress/mcpm_platsmp.c + * + * Created by: Nicolas Pitre, November 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Code to handle secondary CPU bringup and hotplug for the cluster power API. + */ + +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/spinlock.h> + +#include <asm/mcpm.h> +#include <asm/smp.h> +#include <asm/smp_plat.h> + +static void __init simple_smp_init_cpus(void) +{ +} + +static int __cpuinit mcpm_boot_secondary(unsigned int cpu, struct task_struct *idle) +{ + unsigned int mpidr, pcpu, pcluster, ret; + extern void secondary_startup(void); + + mpidr = cpu_logical_map(cpu); + pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + pr_debug("%s: logical CPU %d is physical CPU %d cluster %d\n", + __func__, cpu, pcpu, pcluster); + + mcpm_set_entry_vector(pcpu, pcluster, NULL); + ret = mcpm_cpu_power_up(pcpu, pcluster); + if (ret) + return ret; + mcpm_set_entry_vector(pcpu, pcluster, secondary_startup); + arch_send_wakeup_ipi_mask(cpumask_of(cpu)); + dsb_sev(); + return 0; +} + +static void __cpuinit mcpm_secondary_init(unsigned int cpu) +{ + mcpm_cpu_powered_up(); +} + +#ifdef CONFIG_HOTPLUG_CPU + +static int mcpm_cpu_disable(unsigned int cpu) +{ + /* + * We assume all CPUs may be shut down. + * This would be the hook to use for eventual Secure + * OS migration requests as described in the PSCI spec. + */ + return 0; +} + +static void mcpm_cpu_die(unsigned int cpu) +{ + unsigned int mpidr, pcpu, pcluster; + mpidr = read_cpuid_mpidr(); + pcpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + pcluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + mcpm_set_entry_vector(pcpu, pcluster, NULL); + mcpm_cpu_power_down(); +} + +#endif + +static struct smp_operations __initdata mcpm_smp_ops = { + .smp_init_cpus = simple_smp_init_cpus, + .smp_boot_secondary = mcpm_boot_secondary, + .smp_secondary_init = mcpm_secondary_init, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = mcpm_cpu_disable, + .cpu_die = mcpm_cpu_die, +#endif +}; + +void __init mcpm_smp_set_ops(void) +{ + smp_set_ops(&mcpm_smp_ops); +} diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index 9d2d3ba339f..2c64c4bc6d5 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -29,13 +29,14 @@ #include <asm/sched_clock.h> #include <asm/hardware/arm_timer.h> -static long __init sp804_get_clock_rate(const char *name) +static long __init sp804_get_clock_rate(struct clk *clk, + const char *name) { - struct clk *clk; long rate; int err; - clk = clk_get_sys("sp804", name); + if (!clk) + clk = clk_get_sys("sp804", name); if (IS_ERR(clk)) { pr_err("sp804: %s clock not found: %d\n", name, (int)PTR_ERR(clk)); @@ -77,9 +78,10 @@ static u32 sp804_read(void) void __init __sp804_clocksource_and_sched_clock_init(void __iomem *base, const char *name, + struct clk *clk, int use_sched_clock) { - long rate = sp804_get_clock_rate(name); + long rate = sp804_get_clock_rate(clk, name); if (rate < 0) return; @@ -172,10 +174,10 @@ static struct irqaction sp804_timer_irq = { }; void __init sp804_clockevents_init(void __iomem *base, unsigned int irq, - const char *name) + const char *name, struct clk *clk) { struct clock_event_device *evt = &sp804_clockevent; - long rate = sp804_get_clock_rate(name); + long rate = sp804_get_clock_rate(clk, name); if (rate < 0) return; diff --git a/arch/arm/common/vlock.S b/arch/arm/common/vlock.S new file mode 100644 index 00000000000..ff198583f68 --- /dev/null +++ b/arch/arm/common/vlock.S @@ -0,0 +1,108 @@ +/* + * vlock.S - simple voting lock implementation for ARM + * + * Created by: Dave Martin, 2012-08-16 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + * This algorithm is described in more detail in + * Documentation/arm/vlocks.txt. + */ + +#include <linux/linkage.h> +#include "vlock.h" + +/* Select different code if voting flags can fit in a single word. */ +#if VLOCK_VOTING_SIZE > 4 +#define FEW(x...) +#define MANY(x...) x +#else +#define FEW(x...) x +#define MANY(x...) +#endif + +@ voting lock for first-man coordination + +.macro voting_begin rbase:req, rcpu:req, rscratch:req + mov \rscratch, #1 + strb \rscratch, [\rbase, \rcpu] + dmb +.endm + +.macro voting_end rbase:req, rcpu:req, rscratch:req + dmb + mov \rscratch, #0 + strb \rscratch, [\rbase, \rcpu] + dsb + sev +.endm + +/* + * The vlock structure must reside in Strongly-Ordered or Device memory. + * This implementation deliberately eliminates most of the barriers which + * would be required for other memory types, and assumes that independent + * writes to neighbouring locations within a cacheline do not interfere + * with one another. + */ + +@ r0: lock structure base +@ r1: CPU ID (0-based index within cluster) +ENTRY(vlock_trylock) + add r1, r1, #VLOCK_VOTING_OFFSET + + voting_begin r0, r1, r2 + + ldrb r2, [r0, #VLOCK_OWNER_OFFSET] @ check whether lock is held + cmp r2, #VLOCK_OWNER_NONE + bne trylock_fail @ fail if so + + @ Control dependency implies strb not observable before previous ldrb. + + strb r1, [r0, #VLOCK_OWNER_OFFSET] @ submit my vote + + voting_end r0, r1, r2 @ implies DMB + + @ Wait for the current round of voting to finish: + + MANY( mov r3, #VLOCK_VOTING_OFFSET ) +0: + MANY( ldr r2, [r0, r3] ) + FEW( ldr r2, [r0, #VLOCK_VOTING_OFFSET] ) + cmp r2, #0 + wfene + bne 0b + MANY( add r3, r3, #4 ) + MANY( cmp r3, #VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE ) + MANY( bne 0b ) + + @ Check who won: + + dmb + ldrb r2, [r0, #VLOCK_OWNER_OFFSET] + eor r0, r1, r2 @ zero if I won, else nonzero + bx lr + +trylock_fail: + voting_end r0, r1, r2 + mov r0, #1 @ nonzero indicates that I lost + bx lr +ENDPROC(vlock_trylock) + +@ r0: lock structure base +ENTRY(vlock_unlock) + dmb + mov r1, #VLOCK_OWNER_NONE + strb r1, [r0, #VLOCK_OWNER_OFFSET] + dsb + sev + bx lr +ENDPROC(vlock_unlock) diff --git a/arch/arm/common/vlock.h b/arch/arm/common/vlock.h new file mode 100644 index 00000000000..3b441475a59 --- /dev/null +++ b/arch/arm/common/vlock.h @@ -0,0 +1,29 @@ +/* + * vlock.h - simple voting lock implementation + * + * Created by: Dave Martin, 2012-08-16 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __VLOCK_H +#define __VLOCK_H + +#include <asm/mcpm.h> + +/* Offsets and sizes are rounded to a word (4 bytes) */ +#define VLOCK_OWNER_OFFSET 0 +#define VLOCK_VOTING_OFFSET 4 +#define VLOCK_VOTING_SIZE ((MAX_CPUS_PER_CLUSTER + 3) / 4 * 4) +#define VLOCK_SIZE (VLOCK_VOTING_OFFSET + VLOCK_VOTING_SIZE) +#define VLOCK_OWNER_NONE 0 + +#endif /* ! __VLOCK_H */ diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index e1489c54cd1..bff71388e72 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -363,4 +363,79 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end) flush_cache_all(); } +/* + * Memory synchronization helpers for mixed cached vs non cached accesses. + * + * Some synchronization algorithms have to set states in memory with the + * cache enabled or disabled depending on the code path. It is crucial + * to always ensure proper cache maintenance to update main memory right + * away in that case. + * + * Any cached write must be followed by a cache clean operation. + * Any cached read must be preceded by a cache invalidate operation. + * Yet, in the read case, a cache flush i.e. atomic clean+invalidate + * operation is needed to avoid discarding possible concurrent writes to the + * accessed memory. + * + * Also, in order to prevent a cached writer from interfering with an + * adjacent non-cached writer, each state variable must be located to + * a separate cache line. + */ + +/* + * This needs to be >= the max cache writeback size of all + * supported platforms included in the current kernel configuration. + * This is used to align state variables to their own cache lines. + */ +#define __CACHE_WRITEBACK_ORDER 6 /* guessed from existing platforms */ +#define __CACHE_WRITEBACK_GRANULE (1 << __CACHE_WRITEBACK_ORDER) + +/* + * There is no __cpuc_clean_dcache_area but we use it anyway for + * code intent clarity, and alias it to __cpuc_flush_dcache_area. + */ +#define __cpuc_clean_dcache_area __cpuc_flush_dcache_area + +/* + * Ensure preceding writes to *p by this CPU are visible to + * subsequent reads by other CPUs: + */ +static inline void __sync_cache_range_w(volatile void *p, size_t size) +{ + char *_p = (char *)p; + + __cpuc_clean_dcache_area(_p, size); + outer_clean_range(__pa(_p), __pa(_p + size)); +} + +/* + * Ensure preceding writes to *p by other CPUs are visible to + * subsequent reads by this CPU. We must be careful not to + * discard data simultaneously written by another CPU, hence the + * usage of flush rather than invalidate operations. + */ +static inline void __sync_cache_range_r(volatile void *p, size_t size) +{ + char *_p = (char *)p; + +#ifdef CONFIG_OUTER_CACHE + if (outer_cache.flush_range) { + /* + * Ensure dirty data migrated from other CPUs into our cache + * are cleaned out safely before the outer cache is cleaned: + */ + __cpuc_clean_dcache_area(_p, size); + + /* Clean and invalidate stale data for *p from outer ... */ + outer_flush_range(__pa(_p), __pa(_p + size)); + } +#endif + + /* ... and inner cache: */ + __cpuc_flush_dcache_area(_p, size); +} + +#define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr)) +#define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr)) + #endif diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index 5ef4d8015a6..ce4d01c03e6 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -59,6 +59,20 @@ static inline void set_cr(unsigned int val) isb(); } +static inline unsigned int get_auxcr(void) +{ + unsigned int val; + asm("mrc p15, 0, %0, c1, c0, 1 @ get AUXCR" : "=r" (val)); + return val; +} + +static inline void set_auxcr(unsigned int val) +{ + asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set AUXCR" + : : "r" (val)); + isb(); +} + #ifndef CONFIG_SMP extern void adjust_cr(unsigned long mask, unsigned long set); #endif diff --git a/arch/arm/include/asm/hardware/timer-sp.h b/arch/arm/include/asm/hardware/timer-sp.h index 2dd9d3f83f2..4cba3e6d8ae 100644 --- a/arch/arm/include/asm/hardware/timer-sp.h +++ b/arch/arm/include/asm/hardware/timer-sp.h @@ -1,15 +1,18 @@ void __sp804_clocksource_and_sched_clock_init(void __iomem *, - const char *, int); + const char *, struct clk *, int); -static inline void sp804_clocksource_init(void __iomem *base, const char *name) +static inline void sp804_clocksource_init(void __iomem *base, const char *name, + struct clk *clk) { - __sp804_clocksource_and_sched_clock_init(base, name, 0); + __sp804_clocksource_and_sched_clock_init(base, name, clk, 0); } static inline void sp804_clocksource_and_sched_clock_init(void __iomem *base, - const char *name) + const char *name, + struct clk *clk) { - __sp804_clocksource_and_sched_clock_init(base, name, 1); + __sp804_clocksource_and_sched_clock_init(base, name, clk, 1); } -void sp804_clockevents_init(void __iomem *, unsigned int, const char *); +void sp804_clockevents_init(void __iomem *, unsigned int, const char *, + struct clk *); diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index 35c21c375d8..53c15dec7af 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -30,6 +30,11 @@ extern void asm_do_IRQ(unsigned int, struct pt_regs *); void handle_IRQ(unsigned int, struct pt_regs *); void init_IRQ(void); +#ifdef CONFIG_MULTI_IRQ_HANDLER +extern void (*handle_arch_irq)(struct pt_regs *); +extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); +#endif + #endif #endif diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 308ad7d6f98..75bf07910b8 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -8,6 +8,8 @@ * published by the Free Software Foundation. */ +#include <linux/types.h> + #ifndef __ASSEMBLY__ struct tag; @@ -16,8 +18,10 @@ struct pt_regs; struct smp_operations; #ifdef CONFIG_SMP #define smp_ops(ops) (&(ops)) +#define smp_init_ops(ops) (&(ops)) #else #define smp_ops(ops) (struct smp_operations *)NULL +#define smp_init_ops(ops) (bool (*)(void))NULL #endif struct machine_desc { @@ -41,6 +45,7 @@ struct machine_desc { unsigned char reserve_lp2 :1; /* never has lp2 */ char restart_mode; /* default restart mode */ struct smp_operations *smp; /* SMP operations */ + bool (*smp_init)(void); void (*fixup)(struct tag *, char **, struct meminfo *); void (*reserve)(void);/* reserve mem blocks */ diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h index 18c88302333..2092ee1e130 100644 --- a/arch/arm/include/asm/mach/irq.h +++ b/arch/arm/include/asm/mach/irq.h @@ -20,11 +20,6 @@ struct seq_file; extern void init_FIQ(int); extern int show_fiq_list(struct seq_file *, int); -#ifdef CONFIG_MULTI_IRQ_HANDLER -extern void (*handle_arch_irq)(struct pt_regs *); -extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); -#endif - /* * This is for easy migration, but should be changed in the source */ @@ -35,35 +30,4 @@ do { \ raw_spin_unlock(&desc->lock); \ } while(0) -#ifndef __ASSEMBLY__ -/* - * Entry/exit functions for chained handlers where the primary IRQ chip - * may implement either fasteoi or level-trigger flow control. - */ -static inline void chained_irq_enter(struct irq_chip *chip, - struct irq_desc *desc) -{ - /* FastEOI controllers require no action on entry. */ - if (chip->irq_eoi) - return; - - if (chip->irq_mask_ack) { - chip->irq_mask_ack(&desc->irq_data); - } else { - chip->irq_mask(&desc->irq_data); - if (chip->irq_ack) - chip->irq_ack(&desc->irq_data); - } -} - -static inline void chained_irq_exit(struct irq_chip *chip, - struct irq_desc *desc) -{ - if (chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); - else - chip->irq_unmask(&desc->irq_data); -} -#endif - #endif diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h new file mode 100644 index 00000000000..0f7b7620e9a --- /dev/null +++ b/arch/arm/include/asm/mcpm.h @@ -0,0 +1,209 @@ +/* + * arch/arm/include/asm/mcpm.h + * + * Created by: Nicolas Pitre, April 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef MCPM_H +#define MCPM_H + +/* + * Maximum number of possible clusters / CPUs per cluster. + * + * This should be sufficient for quite a while, while keeping the + * (assembly) code simpler. When this starts to grow then we'll have + * to consider dynamic allocation. + */ +#define MAX_CPUS_PER_CLUSTER 4 +#define MAX_NR_CLUSTERS 2 + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> +#include <asm/cacheflush.h> + +/* + * Platform specific code should use this symbol to set up secondary + * entry location for processors to use when released from reset. + */ +extern void mcpm_entry_point(void); + +/* + * This is used to indicate where the given CPU from given cluster should + * branch once it is ready to re-enter the kernel using ptr, or NULL if it + * should be gated. A gated CPU is held in a WFE loop until its vector + * becomes non NULL. + */ +void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr); + +/* + * CPU/cluster power operations API for higher subsystems to use. + */ + +/** + * mcpm_cpu_power_up - make given CPU in given cluster runable + * + * @cpu: CPU number within given cluster + * @cluster: cluster number for the CPU + * + * The identified CPU is brought out of reset. If the cluster was powered + * down then it is brought up as well, taking care not to let the other CPUs + * in the cluster run, and ensuring appropriate cluster setup. + * + * Caller must ensure the appropriate entry vector is initialized with + * mcpm_set_entry_vector() prior to calling this. + * + * This must be called in a sleepable context. However, the implementation + * is strongly encouraged to return early and let the operation happen + * asynchronously, especially when significant delays are expected. + * + * If the operation cannot be performed then an error code is returned. + */ +int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster); + +/** + * mcpm_cpu_power_down - power the calling CPU down + * + * The calling CPU is powered down. + * + * If this CPU is found to be the "last man standing" in the cluster + * then the cluster is prepared for power-down too. + * + * This must be called with interrupts disabled. + * + * This does not return. Re-entry in the kernel is expected via + * mcpm_entry_point. + */ +void mcpm_cpu_power_down(void); + +/** + * mcpm_cpu_suspend - bring the calling CPU in a suspended state + * + * @expected_residency: duration in microseconds the CPU is expected + * to remain suspended, or 0 if unknown/infinity. + * + * The calling CPU is suspended. The expected residency argument is used + * as a hint by the platform specific backend to implement the appropriate + * sleep state level according to the knowledge it has on wake-up latency + * for the given hardware. + * + * If this CPU is found to be the "last man standing" in the cluster + * then the cluster may be prepared for power-down too, if the expected + * residency makes it worthwhile. + * + * This must be called with interrupts disabled. + * + * This does not return. Re-entry in the kernel is expected via + * mcpm_entry_point. + */ +void mcpm_cpu_suspend(u64 expected_residency); + +/** + * mcpm_cpu_powered_up - housekeeping workafter a CPU has been powered up + * + * This lets the platform specific backend code perform needed housekeeping + * work. This must be called by the newly activated CPU as soon as it is + * fully operational in kernel space, before it enables interrupts. + * + * If the operation cannot be performed then an error code is returned. + */ +int mcpm_cpu_powered_up(void); + +/* + * Platform specific methods used in the implementation of the above API. + */ +struct mcpm_platform_ops { + int (*power_up)(unsigned int cpu, unsigned int cluster); + void (*power_down)(void); + void (*suspend)(u64); + void (*powered_up)(void); +}; + +/** + * mcpm_platform_register - register platform specific power methods + * + * @ops: mcpm_platform_ops structure to register + * + * An error is returned if the registration has been done previously. + */ +int __init mcpm_platform_register(const struct mcpm_platform_ops *ops); + +/* Synchronisation structures for coordinating safe cluster setup/teardown: */ + +/* + * When modifying this structure, make sure you update the MCPM_SYNC_ defines + * to match. + */ +struct mcpm_sync_struct { + /* individual CPU states */ + struct { + s8 cpu __aligned(__CACHE_WRITEBACK_GRANULE); + } cpus[MAX_CPUS_PER_CLUSTER]; + + /* cluster state */ + s8 cluster __aligned(__CACHE_WRITEBACK_GRANULE); + + /* inbound-side state */ + s8 inbound __aligned(__CACHE_WRITEBACK_GRANULE); +}; + +struct sync_struct { + struct mcpm_sync_struct clusters[MAX_NR_CLUSTERS]; +}; + +extern unsigned long sync_phys; /* physical address of *mcpm_sync */ + +void __mcpm_cpu_going_down(unsigned int cpu, unsigned int cluster); +void __mcpm_cpu_down(unsigned int cpu, unsigned int cluster); +void __mcpm_outbound_leave_critical(unsigned int cluster, int state); +bool __mcpm_outbound_enter_critical(unsigned int this_cpu, unsigned int cluster); +int __mcpm_cluster_state(unsigned int cluster); + +int __init mcpm_sync_init( + void (*power_up_setup)(unsigned int affinity_level)); + +void __init mcpm_smp_set_ops(void); + +#else + +/* + * asm-offsets.h causes trouble when included in .c files, and cacheflush.h + * cannot be included in asm files. Let's work around the conflict like this. + */ +#include <asm/asm-offsets.h> +#define __CACHE_WRITEBACK_GRANULE CACHE_WRITEBACK_GRANULE + +#endif /* ! __ASSEMBLY__ */ + +/* Definitions for mcpm_sync_struct */ +#define CPU_DOWN 0x11 +#define CPU_COMING_UP 0x12 +#define CPU_UP 0x13 +#define CPU_GOING_DOWN 0x14 + +#define CLUSTER_DOWN 0x21 +#define CLUSTER_UP 0x22 +#define CLUSTER_GOING_DOWN 0x23 + +#define INBOUND_NOT_COMING_UP 0x31 +#define INBOUND_COMING_UP 0x32 + +/* + * Offsets for the mcpm_sync_struct members, for use in asm. + * We don't want to make them global to the kernel via asm-offsets.c. + */ +#define MCPM_SYNC_CLUSTER_CPUS 0 +#define MCPM_SYNC_CPU_SIZE __CACHE_WRITEBACK_GRANULE +#define MCPM_SYNC_CLUSTER_CLUSTER \ + (MCPM_SYNC_CLUSTER_CPUS + MCPM_SYNC_CPU_SIZE * MAX_CPUS_PER_CLUSTER) +#define MCPM_SYNC_CLUSTER_INBOUND \ + (MCPM_SYNC_CLUSTER_CLUSTER + __CACHE_WRITEBACK_GRANULE) +#define MCPM_SYNC_CLUSTER_SIZE \ + (MCPM_SYNC_CLUSTER_INBOUND + __CACHE_WRITEBACK_GRANULE) + +#endif diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index ce0dbe7c162..a079cbee427 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -16,6 +16,10 @@ #define PSCI_POWER_STATE_TYPE_STANDBY 0 #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 +#define PSCI_POWER_STATE_AFFINITY_LEVEL0 0 +#define PSCI_POWER_STATE_AFFINITY_LEVEL1 1 +#define PSCI_POWER_STATE_AFFINITY_LEVEL2 2 +#define PSCI_POWER_STATE_AFFINITY_LEVEL3 3 struct psci_power_state { u16 id; @@ -33,4 +37,12 @@ struct psci_operations { extern struct psci_operations psci_ops; +#ifdef CONFIG_ARM_PSCI +extern int __init psci_probe(void); +#else +static inline int psci_probe(void) +{ + return -ENODEV; +} +#endif #endif /* __ASM_ARM_PSCI_H */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 923eec7105c..3f088225e71 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -149,6 +149,10 @@ int main(void) DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); + BLANK(); + DEFINE(CACHE_WRITEBACK_ORDER, __CACHE_WRITEBACK_ORDER); + DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE); + BLANK(); #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index 36531643cc2..1180801468d 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -17,6 +17,7 @@ #include <linux/init.h> #include <linux/of.h> +#include <linux/string.h> #include <asm/compiler.h> #include <asm/errno.h> @@ -26,6 +27,11 @@ struct psci_operations psci_ops; +/* Type of psci support. Currently can only be enabled or disabled */ +#define PSCI_SUP_DISABLED 0 +#define PSCI_SUP_ENABLED 1 + +static unsigned int psci; static int (*invoke_psci_fn)(u32, u32, u32, u32); enum psci_function { @@ -42,6 +48,7 @@ static u32 psci_function_id[PSCI_FN_MAX]; #define PSCI_RET_EOPNOTSUPP -1 #define PSCI_RET_EINVAL -2 #define PSCI_RET_EPERM -3 +#define PSCI_RET_EALREADYON -4 static int psci_to_linux_errno(int errno) { @@ -54,6 +61,8 @@ static int psci_to_linux_errno(int errno) return -EINVAL; case PSCI_RET_EPERM: return -EPERM; + case PSCI_RET_EALREADYON: + return -EAGAIN; }; return -EINVAL; @@ -164,6 +173,9 @@ static int __init psci_init(void) const char *method; u32 id; + if (psci == PSCI_SUP_DISABLED) + return 0; + np = of_find_matching_node(NULL, psci_of_match); if (!np) return 0; @@ -209,3 +221,33 @@ out_put_node: return 0; } early_initcall(psci_init); + +int __init psci_probe(void) +{ + struct device_node *np; + int ret = -ENODEV; + + if (psci == PSCI_SUP_ENABLED) { + np = of_find_matching_node(NULL, psci_of_match); + if (np) + ret = 0; + } + + of_node_put(np); + return ret; +} + +static int __init early_psci(char *val) +{ + int ret = 0; + + if (strcmp(val, "enable") == 0) + psci = PSCI_SUP_ENABLED; + else if (strcmp(val, "disable") == 0) + psci = PSCI_SUP_DISABLED; + else + ret = -EINVAL; + + return ret; +} +early_param("psci", early_psci); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 234e339196c..3f0d5e969ef 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -260,6 +260,19 @@ static int cpu_has_aliasing_icache(unsigned int arch) int aliasing_icache; unsigned int id_reg, num_sets, line_size; +#ifdef CONFIG_BIG_LITTLE + /* + * We expect a combination of Cortex-A15 and Cortex-A7 cores. + * A7 = VIPT aliasing I-cache + * A15 = PIPT (non-aliasing) I-cache + * To cater for this discrepancy, let's assume aliasing I-cache + * all the time. This means unneeded extra work on the A15 but + * only ptrace is affected which is not performance critical. + */ + if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc0f0) + return 1; +#endif + /* PIPT caches never alias. */ if (icache_is_pipt()) return 0; @@ -389,7 +402,7 @@ static void __init feat_v6_fixup(void) * * cpu_init sets up the per-CPU stacks. */ -void cpu_init(void) +void notrace cpu_init(void) { unsigned int cpu = smp_processor_id(); struct stack *stk = &stacks[cpu]; @@ -787,7 +800,10 @@ void __init setup_arch(char **cmdline_p) arm_dt_init_cpu_maps(); #ifdef CONFIG_SMP if (is_smp()) { - smp_set_ops(mdesc->smp); + if (!mdesc->smp_init || !mdesc->smp_init()) { + if(mdesc->smp) + smp_set_ops(mdesc->smp); + } smp_init_cpus(); } #endif diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 987dcf33415..b5c1e636ed8 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -4,6 +4,7 @@ #include <asm/assembler.h> #include <asm/glue-cache.h> #include <asm/glue-proc.h> +#include "entry-header.S" .text /* @@ -30,9 +31,8 @@ ENTRY(__cpu_suspend) mov r2, r5 @ virtual SP ldr r3, =sleep_save_sp #ifdef CONFIG_SMP - ALT_SMP(mrc p15, 0, lr, c0, c0, 5) - ALT_UP(mov lr, #0) - and lr, lr, #15 + get_thread_info r5 + ldr lr, [r5, #TI_CPU] @ cpu logical index add r3, r3, lr, lsl #2 #endif bl __cpu_suspend_save @@ -82,10 +82,13 @@ ENDPROC(cpu_resume_after_mmu) .align ENTRY(cpu_resume) #ifdef CONFIG_SMP + mov r1, #0 @ fall-back logical index for UP + ALT_SMP(mrc p15, 0, r0, c0, c0, 5) + ALT_UP_B(1f) + bic r0, #0xff000000 + bl cpu_logical_index @ return logical index in r1 +1: adr r0, sleep_save_sp - ALT_SMP(mrc p15, 0, r1, c0, c0, 5) - ALT_UP(mov r1, #0) - and r1, r1, #15 ldr r0, [r0, r1, lsl #2] @ stack phys addr #else ldr r0, sleep_save_sp @ stack phys addr @@ -102,3 +105,20 @@ sleep_save_sp: .rept CONFIG_NR_CPUS .long 0 @ preserve stack phys ptr here .endr + +#ifdef CONFIG_SMP +cpu_logical_index: + adr r3, cpu_map_ptr + ldr r2, [r3] + add r3, r3, r2 @ virt_to_phys(__cpu_logical_map) + mov r1, #0 +1: + ldr r2, [r3, r1, lsl #2] + cmp r2, r0 + moveq pc, lr + add r1, r1, #1 + b 1b + +cpu_map_ptr: + .long __cpu_logical_map - . +#endif diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index fa45fb43a62..8df74d630ca 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -13,6 +13,7 @@ #include <linux/cpu.h> #include <linux/cpumask.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/percpu.h> #include <linux/node.h> @@ -201,6 +202,7 @@ static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {} * cpu topology table */ struct cputopo_arm cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); const struct cpumask *cpu_coregroup_mask(int cpu) { diff --git a/arch/arm/mach-at91/gpio.c b/arch/arm/mach-at91/gpio.c index c5d7e1e9d75..a5afcf76550 100644 --- a/arch/arm/mach-at91/gpio.c +++ b/arch/arm/mach-at91/gpio.c @@ -22,10 +22,9 @@ #include <linux/module.h> #include <linux/io.h> #include <linux/irqdomain.h> +#include <linux/irqchip/chained_irq.h> #include <linux/of_address.h> -#include <asm/mach/irq.h> - #include <mach/hardware.h> #include <mach/at91_pio.h> diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c index d63d399c7ba..7bc0f9aa8b3 100644 --- a/arch/arm/mach-exynos/common.c +++ b/arch/arm/mach-exynos/common.c @@ -26,6 +26,7 @@ #include <linux/irqchip.h> #include <linux/of_address.h> #include <linux/irqchip/arm-gic.h> +#include <linux/irqchip/chained_irq.h> #include <asm/proc-fns.h> #include <asm/exception.h> diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 60f7c5be057..95e04bd5813 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -20,7 +20,6 @@ #include <linux/jiffies.h> #include <linux/smp.h> #include <linux/io.h> -#include <linux/irqchip/arm-gic.h> #include <asm/cacheflush.h> #include <asm/smp_plat.h> @@ -76,13 +75,6 @@ static DEFINE_SPINLOCK(boot_lock); static void __cpuinit exynos_secondary_init(unsigned int cpu) { /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - - /* * let the primary processor know we're out of the * pen, then head off into the C entry point */ diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index a4f9f50247d..53d0c79b543 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -116,8 +116,9 @@ static void __init highbank_timer_init(void) lookup.clk = of_clk_get(np, 0); clkdev_add(&lookup); - sp804_clocksource_and_sched_clock_init(timer_base + 0x20, "timer1"); - sp804_clockevents_init(timer_base, irq, "timer0"); + sp804_clocksource_and_sched_clock_init(timer_base + 0x20, "timer1", + NULL); + sp804_clockevents_init(timer_base, irq, "timer0", NULL); twd_local_timer_of_register(); diff --git a/arch/arm/mach-highbank/platsmp.c b/arch/arm/mach-highbank/platsmp.c index 8797a700172..a984573e0d0 100644 --- a/arch/arm/mach-highbank/platsmp.c +++ b/arch/arm/mach-highbank/platsmp.c @@ -17,7 +17,6 @@ #include <linux/init.h> #include <linux/smp.h> #include <linux/io.h> -#include <linux/irqchip/arm-gic.h> #include <asm/smp_scu.h> @@ -25,11 +24,6 @@ extern void secondary_startup(void); -static void __cpuinit highbank_secondary_init(unsigned int cpu) -{ - gic_secondary_init(0); -} - static int __cpuinit highbank_boot_secondary(unsigned int cpu, struct task_struct *idle) { highbank_set_cpu_jump(cpu, secondary_startup); @@ -67,7 +61,6 @@ static void __init highbank_smp_prepare_cpus(unsigned int max_cpus) struct smp_operations highbank_smp_ops __initdata = { .smp_init_cpus = highbank_smp_init_cpus, .smp_prepare_cpus = highbank_smp_prepare_cpus, - .smp_secondary_init = highbank_secondary_init, .smp_boot_secondary = highbank_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = highbank_cpu_die, diff --git a/arch/arm/mach-imx/platsmp.c b/arch/arm/mach-imx/platsmp.c index 7c0b03f67b0..77e9a25ed0f 100644 --- a/arch/arm/mach-imx/platsmp.c +++ b/arch/arm/mach-imx/platsmp.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/smp.h> -#include <linux/irqchip/arm-gic.h> #include <asm/page.h> #include <asm/smp_scu.h> #include <asm/mach/map.h> @@ -52,16 +51,6 @@ void imx_scu_standby_enable(void) writel_relaxed(val, scu_base); } -static void __cpuinit imx_secondary_init(unsigned int cpu) -{ - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); -} - static int __cpuinit imx_boot_secondary(unsigned int cpu, struct task_struct *idle) { imx_set_cpu_jump(cpu, v7_secondary_startup); @@ -96,7 +85,6 @@ static void __init imx_smp_prepare_cpus(unsigned int max_cpus) struct smp_operations imx_smp_ops __initdata = { .smp_init_cpus = imx_smp_init_cpus, .smp_prepare_cpus = imx_smp_prepare_cpus, - .smp_secondary_init = imx_secondary_init, .smp_boot_secondary = imx_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = imx_cpu_die, diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index 2b0db82a538..5976d3465a3 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -268,7 +268,7 @@ static void __init cp_of_timer_init(void) if (WARN_ON(!base)) return; writel(0, base + TIMER_CTRL); - sp804_clocksource_init(base, node->name); + sp804_clocksource_init(base, node->name, NULL); err = of_property_read_string(of_aliases, "arm,timer-secondary", &path); @@ -280,7 +280,7 @@ static void __init cp_of_timer_init(void) return; irq = irq_of_parse_and_map(node, 0); writel(0, base + TIMER_CTRL); - sp804_clockevents_init(base, irq, node->name); + sp804_clockevents_init(base, irq, node->name, NULL); } static const struct of_device_id fpga_irq_of_match[] __initconst = { @@ -514,8 +514,8 @@ static void __init cp_timer_init(void) writel(0, TIMER1_VA_BASE + TIMER_CTRL); writel(0, TIMER2_VA_BASE + TIMER_CTRL); - sp804_clocksource_init(TIMER2_VA_BASE, "timer2"); - sp804_clockevents_init(TIMER1_VA_BASE, IRQ_TIMERINT1, "timer1"); + sp804_clocksource_init(TIMER2_VA_BASE, "timer2", NULL); + sp804_clockevents_init(TIMER1_VA_BASE, IRQ_TIMERINT1, "timer1", NULL); } #define INTEGRATOR_CP_MMC_IRQS { IRQ_CP_MMCIINT0, IRQ_CP_MMCIINT1 } diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c index 42932865416..00cdb0a5dac 100644 --- a/arch/arm/mach-msm/platsmp.c +++ b/arch/arm/mach-msm/platsmp.c @@ -15,7 +15,6 @@ #include <linux/jiffies.h> #include <linux/smp.h> #include <linux/io.h> -#include <linux/irqchip/arm-gic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> @@ -42,13 +41,6 @@ static inline int get_core_count(void) static void __cpuinit msm_secondary_init(unsigned int cpu) { /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - - /* * let the primary processor know we're out of the * pen, then head off into the C entry point */ diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index d9727218dd0..e7a449758ab 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -67,13 +67,6 @@ static void __cpuinit omap4_secondary_init(unsigned int cpu) 4, 0, 0, 0, 0, 0); /* - * If any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - - /* * Synchronise with the boot thread. */ spin_lock(&boot_lock); diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c index 4b788310f6a..c7c92e78f0c 100644 --- a/arch/arm/mach-prima2/platsmp.c +++ b/arch/arm/mach-prima2/platsmp.c @@ -11,7 +11,6 @@ #include <linux/delay.h> #include <linux/of.h> #include <linux/of_address.h> -#include <linux/irqchip/arm-gic.h> #include <asm/page.h> #include <asm/mach/map.h> #include <asm/smp_plat.h> @@ -49,13 +48,6 @@ void __init sirfsoc_map_scu(void) static void __cpuinit sirfsoc_secondary_init(unsigned int cpu) { /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - - /* * let the primary processor know we're out of the * pen, then head off into the C entry point */ diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 1d5ee5c9a1d..a4cf86fe591 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -363,8 +363,8 @@ void __init realview_timer_init(unsigned int timer_irq) writel(0, timer2_va_base + TIMER_CTRL); writel(0, timer3_va_base + TIMER_CTRL); - sp804_clocksource_init(timer3_va_base, "timer3"); - sp804_clockevents_init(timer0_va_base, timer_irq, "timer0"); + sp804_clocksource_init(timer3_va_base, "timer3", NULL); + sp804_clockevents_init(timer0_va_base, timer_irq, "timer0", NULL); } /* diff --git a/arch/arm/mach-s3c24xx/irq.c b/arch/arm/mach-s3c24xx/irq.c index d8ba9bee4c7..6b0b6047785 100644 --- a/arch/arm/mach-s3c24xx/irq.c +++ b/arch/arm/mach-s3c24xx/irq.c @@ -25,6 +25,7 @@ #include <linux/ioport.h> #include <linux/device.h> #include <linux/irqdomain.h> +#include <linux/irqchip/chained_irq.h> #include <asm/mach/irq.h> diff --git a/arch/arm/mach-shmobile/smp-emev2.c b/arch/arm/mach-shmobile/smp-emev2.c index 953eb1f9388..384e27dd360 100644 --- a/arch/arm/mach-shmobile/smp-emev2.c +++ b/arch/arm/mach-shmobile/smp-emev2.c @@ -23,7 +23,6 @@ #include <linux/spinlock.h> #include <linux/io.h> #include <linux/delay.h> -#include <linux/irqchip/arm-gic.h> #include <mach/common.h> #include <mach/emev2.h> #include <asm/smp_plat.h> @@ -85,11 +84,6 @@ static int __maybe_unused emev2_cpu_kill(unsigned int cpu) } -static void __cpuinit emev2_secondary_init(unsigned int cpu) -{ - gic_secondary_init(0); -} - static int __cpuinit emev2_boot_secondary(unsigned int cpu, struct task_struct *idle) { cpu = cpu_logical_map(cpu); @@ -124,7 +118,6 @@ static void __init emev2_smp_init_cpus(void) struct smp_operations emev2_smp_ops __initdata = { .smp_init_cpus = emev2_smp_init_cpus, .smp_prepare_cpus = emev2_smp_prepare_cpus, - .smp_secondary_init = emev2_secondary_init, .smp_boot_secondary = emev2_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_kill = emev2_cpu_kill, diff --git a/arch/arm/mach-shmobile/smp-r8a7779.c b/arch/arm/mach-shmobile/smp-r8a7779.c index 3a4acf23edc..994906560ed 100644 --- a/arch/arm/mach-shmobile/smp-r8a7779.c +++ b/arch/arm/mach-shmobile/smp-r8a7779.c @@ -23,7 +23,6 @@ #include <linux/spinlock.h> #include <linux/io.h> #include <linux/delay.h> -#include <linux/irqchip/arm-gic.h> #include <mach/common.h> #include <mach/r8a7779.h> #include <asm/smp_plat.h> @@ -132,11 +131,6 @@ static int __maybe_unused r8a7779_cpu_kill(unsigned int cpu) } -static void __cpuinit r8a7779_secondary_init(unsigned int cpu) -{ - gic_secondary_init(0); -} - static int __cpuinit r8a7779_boot_secondary(unsigned int cpu, struct task_struct *idle) { struct r8a7779_pm_ch *ch = NULL; @@ -186,7 +180,6 @@ static void __init r8a7779_smp_init_cpus(void) struct smp_operations r8a7779_smp_ops __initdata = { .smp_init_cpus = r8a7779_smp_init_cpus, .smp_prepare_cpus = r8a7779_smp_prepare_cpus, - .smp_secondary_init = r8a7779_secondary_init, .smp_boot_secondary = r8a7779_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_kill = r8a7779_cpu_kill, diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c index acb46a94ccd..d0f9aca2247 100644 --- a/arch/arm/mach-shmobile/smp-sh73a0.c +++ b/arch/arm/mach-shmobile/smp-sh73a0.c @@ -23,7 +23,6 @@ #include <linux/spinlock.h> #include <linux/io.h> #include <linux/delay.h> -#include <linux/irqchip/arm-gic.h> #include <mach/common.h> #include <asm/cacheflush.h> #include <asm/smp_plat.h> @@ -59,11 +58,6 @@ static unsigned int __init sh73a0_get_core_count(void) return scu_get_core_count(scu_base); } -static void __cpuinit sh73a0_secondary_init(unsigned int cpu) -{ - gic_secondary_init(0); -} - static int __cpuinit sh73a0_boot_secondary(unsigned int cpu, struct task_struct *idle) { cpu = cpu_logical_map(cpu); @@ -138,7 +132,6 @@ static void sh73a0_cpu_die(unsigned int cpu) struct smp_operations sh73a0_smp_ops __initdata = { .smp_init_cpus = sh73a0_smp_init_cpus, .smp_prepare_cpus = sh73a0_smp_prepare_cpus, - .smp_secondary_init = sh73a0_secondary_init, .smp_boot_secondary = sh73a0_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_kill = sh73a0_cpu_kill, diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index 84c60fa8daa..ca14d1d5ac7 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -22,7 +22,6 @@ #include <linux/io.h> #include <linux/of.h> #include <linux/of_address.h> -#include <linux/irqchip/arm-gic.h> #include <asm/cacheflush.h> #include <asm/smp_scu.h> @@ -33,16 +32,6 @@ extern void __iomem *sys_manager_base_addr; extern void __iomem *rst_manager_base_addr; -static void __cpuinit socfpga_secondary_init(unsigned int cpu) -{ - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); -} - static int __cpuinit socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) { int trampoline_size = &secondary_trampoline_end - &secondary_trampoline; @@ -109,7 +98,6 @@ static void socfpga_cpu_die(unsigned int cpu) struct smp_operations socfpga_smp_ops __initdata = { .smp_init_cpus = socfpga_smp_init_cpus, .smp_prepare_cpus = socfpga_smp_prepare_cpus, - .smp_secondary_init = socfpga_secondary_init, .smp_boot_secondary = socfpga_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = socfpga_cpu_die, diff --git a/arch/arm/mach-spear13xx/platsmp.c b/arch/arm/mach-spear13xx/platsmp.c index af4ade61cd9..551c69c9a22 100644 --- a/arch/arm/mach-spear13xx/platsmp.c +++ b/arch/arm/mach-spear13xx/platsmp.c @@ -15,7 +15,6 @@ #include <linux/jiffies.h> #include <linux/io.h> #include <linux/smp.h> -#include <linux/irqchip/arm-gic.h> #include <asm/cacheflush.h> #include <asm/smp_scu.h> #include <mach/spear.h> @@ -28,13 +27,6 @@ static void __iomem *scu_base = IOMEM(VA_SCU_BASE); static void __cpuinit spear13xx_secondary_init(unsigned int cpu) { /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - - /* * let the primary processor know we're out of the * pen, then head off into the C entry point */ diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c index 2c6b3d55213..9348d3c496a 100644 --- a/arch/arm/mach-tegra/platsmp.c +++ b/arch/arm/mach-tegra/platsmp.c @@ -18,7 +18,6 @@ #include <linux/jiffies.h> #include <linux/smp.h> #include <linux/io.h> -#include <linux/irqchip/arm-gic.h> #include <linux/clk/tegra.h> #include <asm/cacheflush.h> @@ -44,13 +43,6 @@ static cpumask_t tegra_cpu_init_mask; static void __cpuinit tegra_secondary_init(unsigned int cpu) { - /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - cpumask_set_cpu(cpu, &tegra_cpu_init_mask); } diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c index 18f7af339dc..152b1309b9a 100644 --- a/arch/arm/mach-ux500/platsmp.c +++ b/arch/arm/mach-ux500/platsmp.c @@ -16,7 +16,6 @@ #include <linux/device.h> #include <linux/smp.h> #include <linux/io.h> -#include <linux/irqchip/arm-gic.h> #include <asm/cacheflush.h> #include <asm/smp_plat.h> @@ -58,13 +57,6 @@ static DEFINE_SPINLOCK(boot_lock); static void __cpuinit ux500_secondary_init(unsigned int cpu) { /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - - /* * let the primary processor know we're out of the * pen, then head off into the C entry point */ diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index 25160aeaa3b..9506688ef9a 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -807,6 +807,6 @@ void __init versatile_timer_init(void) writel(0, TIMER2_VA_BASE + TIMER_CTRL); writel(0, TIMER3_VA_BASE + TIMER_CTRL); - sp804_clocksource_init(TIMER3_VA_BASE, "timer3"); - sp804_clockevents_init(TIMER0_VA_BASE, IRQ_TIMERINT0_1, "timer0"); + sp804_clocksource_init(TIMER3_VA_BASE, "timer3", NULL); + sp804_clockevents_init(TIMER0_VA_BASE, IRQ_TIMERINT0_1, "timer0", NULL); } diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index 52d315b792c..bdad7d49d4d 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -1,5 +1,6 @@ config ARCH_VEXPRESS bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7 + select ARCH_HAS_CPUFREQ select ARCH_REQUIRE_GPIOLIB select ARM_AMBA select ARM_GIC @@ -17,6 +18,9 @@ config ARCH_VEXPRESS select NO_IOPORT select PLAT_VERSATILE select PLAT_VERSATILE_CLCD + select POWER_RESET + select POWER_RESET_VEXPRESS + select POWER_SUPPLY select REGULATOR_FIXED_VOLTAGE if REGULATOR select VEXPRESS_CONFIG help @@ -52,4 +56,21 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA config ARCH_VEXPRESS_CA9X4 bool "Versatile Express Cortex-A9x4 tile" +config ARCH_VEXPRESS_DCSCB + bool "Dual Cluster System Control Block (DCSCB) support" + depends on MCPM + select ARM_CCI + help + Support for the Dual Cluster System Configuration Block (DCSCB). + This is needed to provide CPU and cluster power management + on RTSM. + +config ARCH_VEXPRESS_TC2 + bool "TC2 cluster management" + depends on MCPM + select ARM_SPC + select ARM_CCI + help + Support for CPU and cluster power management on TC2. + endmenu diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile index 80b64971fbd..14193dc7e6e 100644 --- a/arch/arm/mach-vexpress/Makefile +++ b/arch/arm/mach-vexpress/Makefile @@ -4,7 +4,15 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \ -I$(srctree)/arch/arm/plat-versatile/include -obj-y := v2m.o reset.o +obj-y := v2m.o obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o +obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o dcscb_setup.o +CFLAGS_REMOVE_dcscb.o = -pg +obj-$(CONFIG_ARCH_VEXPRESS_TC2) += tc2_pm.o tc2_pm_setup.o +CFLAGS_REMOVE_tc2_pm.o = -pg +ifeq ($(CONFIG_ARCH_VEXPRESS_TC2),y) +obj-$(CONFIG_ARM_PSCI) += tc2_pm_psci.o +CFLAGS_REMOVE_tc2_pm_psci.o = -pg +endif obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h index f134cd4a85f..bde4374ab6d 100644 --- a/arch/arm/mach-vexpress/core.h +++ b/arch/arm/mach-vexpress/core.h @@ -6,6 +6,8 @@ void vexpress_dt_smp_map_io(void); +bool vexpress_smp_init_ops(void); + extern struct smp_operations vexpress_smp_ops; extern void vexpress_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c new file mode 100644 index 00000000000..0dc3caca227 --- /dev/null +++ b/arch/arm/mach-vexpress/dcscb.c @@ -0,0 +1,256 @@ +/* + * arch/arm/mach-vexpress/dcscb.c - Dual Cluster System Configuration Block + * + * Created by: Nicolas Pitre, May 2012 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/io.h> +#include <linux/spinlock.h> +#include <linux/errno.h> +#include <linux/of_address.h> +#include <linux/vexpress.h> +#include <linux/arm-cci.h> + +#include <asm/mcpm.h> +#include <asm/proc-fns.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/cp15.h> +#include <asm/psci.h> + + +#define RST_HOLD0 0x0 +#define RST_HOLD1 0x4 +#define SYS_SWRESET 0x8 +#define RST_STAT0 0xc +#define RST_STAT1 0x10 +#define EAG_CFG_R 0x20 +#define EAG_CFG_W 0x24 +#define KFC_CFG_R 0x28 +#define KFC_CFG_W 0x2c +#define DCS_CFG_R 0x30 + +/* + * We can't use regular spinlocks. In the switcher case, it is possible + * for an outbound CPU to call power_down() after its inbound counterpart + * is already live using the same logical CPU number which trips lockdep + * debugging. + */ +static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED; + +static void __iomem *dcscb_base; +static int dcscb_use_count[4][2]; +static int dcscb_mcpm_cpu_mask[2]; + +static int dcscb_power_up(unsigned int cpu, unsigned int cluster) +{ + unsigned int rst_hold, cpumask = (1 << cpu); + unsigned int mcpm_mask = dcscb_mcpm_cpu_mask[cluster]; + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + if (cpu >= 4 || cluster >= 2) + return -EINVAL; + + /* + * Since this is called with IRQs enabled, and no arch_spin_lock_irq + * variant exists, we need to disable IRQs manually here. + */ + local_irq_disable(); + arch_spin_lock(&dcscb_lock); + + dcscb_use_count[cpu][cluster]++; + if (dcscb_use_count[cpu][cluster] == 1) { + rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4); + if (rst_hold & (1 << 8)) { + /* remove cluster reset and add individual CPU's reset */ + rst_hold &= ~(1 << 8); + rst_hold |= mcpm_mask; + } + rst_hold &= ~(cpumask | (cpumask << 4)); + writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4); + } else if (dcscb_use_count[cpu][cluster] != 2) { + /* + * The only possible values are: + * 0 = CPU down + * 1 = CPU (still) up + * 2 = CPU requested to be up before it had a chance + * to actually make itself down. + * Any other value is a bug. + */ + BUG(); + } + + arch_spin_unlock(&dcscb_lock); + local_irq_enable(); + + return 0; +} + +static void dcscb_power_down(void) +{ + unsigned int mpidr, cpu, cluster, rst_hold, cpumask, mcpm_mask; + bool last_man = false, skip_wfi = false; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpumask = (1 << cpu); + mcpm_mask = dcscb_mcpm_cpu_mask[cluster]; + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cpu >= 4 || cluster >= 2); + + __mcpm_cpu_going_down(cpu, cluster); + + arch_spin_lock(&dcscb_lock); + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); + dcscb_use_count[cpu][cluster]--; + if (dcscb_use_count[cpu][cluster] == 0) { + rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4); + rst_hold |= cpumask; + if (((rst_hold | (rst_hold >> 4)) & mcpm_mask) == mcpm_mask) { + rst_hold |= (1 << 8); + last_man = true; + } + writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4); + } else if (dcscb_use_count[cpu][cluster] == 1) { + /* + * A power_up request went ahead of us. + * Even if we do not want to shut this CPU down, + * the caller expects a certain state as if the WFI + * was aborted. So let's continue with cache cleaning. + */ + skip_wfi = true; + } else + BUG(); + + if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { + arch_spin_unlock(&dcscb_lock); + + /* + * Flush all cache levels for this cluster. + * + * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need + * a preliminary flush here for those CPUs. At least, that's + * the theory -- without the extra flush, Linux explodes on + * RTSM (maybe not needed anymore, to be investigated). + */ + flush_cache_all(); + set_cr(get_cr() & ~CR_C); + flush_cache_all(); + + /* + * This is a harmless no-op. On platforms with a real + * outer cache this might either be needed or not, + * depending on where the outer cache sits. + */ + outer_flush_all(); + + /* Disable local coherency by clearing the ACTLR "SMP" bit: */ + set_auxcr(get_auxcr() & ~(1 << 6)); + + /* + * Disable cluster-level coherency by masking + * incoming snoops and DVM messages: + */ + disable_cci(cluster); + + __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); + } else { + arch_spin_unlock(&dcscb_lock); + + /* + * Flush the local CPU cache. + * + * A15/A7 can hit in the cache with SCTLR.C=0, so we don't need + * a preliminary flush here for those CPUs. At least, that's + * the theory -- without the extra flush, Linux explodes on + * RTSM (maybe not needed anymore, to be investigated). + */ + flush_cache_louis(); + set_cr(get_cr() & ~CR_C); + flush_cache_louis(); + + /* Disable local coherency by clearing the ACTLR "SMP" bit: */ + set_auxcr(get_auxcr() & ~(1 << 6)); + } + + __mcpm_cpu_down(cpu, cluster); + + /* Now we are prepared for power-down, do it: */ + if (!skip_wfi) { + dsb(); + wfi(); + } + + /* Not dead at this point? Let our caller cope. */ +} + +static const struct mcpm_platform_ops dcscb_power_ops = { + .power_up = dcscb_power_up, + .power_down = dcscb_power_down, +}; + +static void __init dcscb_usage_count_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cpu >= 4 || cluster >= 2); + dcscb_use_count[cpu][cluster] = 1; +} + +extern void dcscb_power_up_setup(unsigned int affinity_level); + +static int __init dcscb_init(void) +{ + struct device_node *node; + unsigned int cfg; + int ret; + + ret = psci_probe(); + if (!ret) { + pr_debug("psci found. Aborting native init\n"); + return -ENODEV; + } + + node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb"); + if (!node) + return -ENODEV; + dcscb_base= of_iomap(node, 0); + if (!dcscb_base) + return -EADDRNOTAVAIL; + cfg = readl_relaxed(dcscb_base + DCS_CFG_R); + dcscb_mcpm_cpu_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1; + dcscb_mcpm_cpu_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1; + dcscb_usage_count_init(); + + ret = mcpm_platform_register(&dcscb_power_ops); + if (!ret) + ret = mcpm_sync_init(dcscb_power_up_setup); + if (ret) { + iounmap(dcscb_base); + return ret; + } + + /* + * Future entries into the kernel can now go + * through the cluster entry vectors. + */ + vexpress_flags_set(virt_to_phys(mcpm_entry_point)); + + return 0; +} + +early_initcall(dcscb_init); diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S new file mode 100644 index 00000000000..93bd13f458a --- /dev/null +++ b/arch/arm/mach-vexpress/dcscb_setup.S @@ -0,0 +1,80 @@ +/* + * arch/arm/include/asm/dcscb_setup.S + * + * Created by: Dave Martin, 2012-06-22 + * Copyright: (C) 2012-2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + +#include <linux/linkage.h> +#include <asm/mcpm.h> + + +#define SLAVE_SNOOPCTL_OFFSET 0 +#define SNOOPCTL_SNOOP_ENABLE (1 << 0) +#define SNOOPCTL_DVM_ENABLE (1 << 1) + +#define CCI_STATUS_OFFSET 0xc +#define STATUS_CHANGE_PENDING (1 << 0) + +#define CCI_SLAVE_OFFSET(n) (0x1000 + 0x1000 * (n)) + +#define RTSM_CCI_PHYS_BASE 0x2c090000 +#define RTSM_CCI_SLAVE_A15 3 +#define RTSM_CCI_SLAVE_A7 4 + +#define RTSM_CCI_A15_OFFSET CCI_SLAVE_OFFSET(RTSM_CCI_SLAVE_A15) +#define RTSM_CCI_A7_OFFSET CCI_SLAVE_OFFSET(RTSM_CCI_SLAVE_A7) + + +ENTRY(dcscb_power_up_setup) + + cmp r0, #0 @ check affinity level + beq 2f + +/* + * Enable cluster-level coherency, in preparation for turning on the MMU. + * The ACTLR SMP bit does not need to be set here, because cpu_resume() + * already restores that. + */ + + mrc p15, 0, r0, c0, c0, 5 @ MPIDR + ubfx r0, r0, #8, #4 @ cluster + + @ A15/A7 may not require explicit L2 invalidation on reset, dependent + @ on hardware integration desicions. + @ For now, this code assumes that L2 is either already invalidated, or + @ invalidation is not required. + + ldr r3, =RTSM_CCI_PHYS_BASE + RTSM_CCI_A15_OFFSET + cmp r0, #0 @ A15 cluster? + addne r3, r3, #RTSM_CCI_A7_OFFSET - RTSM_CCI_A15_OFFSET + + @ r3 now points to the correct CCI slave register block + + ldr r0, [r3, #SLAVE_SNOOPCTL_OFFSET] + orr r0, r0, #SNOOPCTL_SNOOP_ENABLE | SNOOPCTL_DVM_ENABLE + str r0, [r3, #SLAVE_SNOOPCTL_OFFSET] @ enable CCI snoops + + @ Wait for snoop control change to complete: + + ldr r3, =RTSM_CCI_PHYS_BASE + +1: ldr r0, [r3, #CCI_STATUS_OFFSET] + tst r0, #STATUS_CHANGE_PENDING + bne 1b + + dsb @ Synchronise side-effects of enabling CCI + + bx lr + +2: @ Implementation-specific local CPU setup operations should go here, + @ if any. In this case, there is nothing to do. + + bx lr + +ENDPROC(dcscb_power_up_setup) diff --git a/arch/arm/mach-vexpress/include/mach/tc2.h b/arch/arm/mach-vexpress/include/mach/tc2.h new file mode 100644 index 00000000000..d3b5a2225a0 --- /dev/null +++ b/arch/arm/mach-vexpress/include/mach/tc2.h @@ -0,0 +1,10 @@ +#ifndef __MACH_TC2_H +#define __MACH_TC2_H + +/* + * cpu and cluster limits + */ +#define TC2_MAX_CPUS 3 +#define TC2_MAX_CLUSTERS 2 + +#endif diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c index dc1ace55d55..21368ba6ca2 100644 --- a/arch/arm/mach-vexpress/platsmp.c +++ b/arch/arm/mach-vexpress/platsmp.c @@ -12,9 +12,11 @@ #include <linux/errno.h> #include <linux/smp.h> #include <linux/io.h> +#include <linux/of.h> #include <linux/of_fdt.h> #include <linux/vexpress.h> +#include <asm/mcpm.h> #include <asm/smp_scu.h> #include <asm/mach/map.h> @@ -203,3 +205,14 @@ struct smp_operations __initdata vexpress_smp_ops = { .cpu_die = vexpress_cpu_die, #endif }; + +bool __init vexpress_smp_init_ops(void) +{ +#ifdef CONFIG_MCPM + if(of_find_compatible_node(NULL, NULL, "arm,cci")) { + mcpm_smp_set_ops(); + return true; + } +#endif + return false; +} diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c new file mode 100644 index 00000000000..f2e9959fb26 --- /dev/null +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -0,0 +1,271 @@ +/* + * arch/arm/mach-vexpress/tc2_pm.c - TC2 power management support + * + * Created by: Nicolas Pitre, October 2012 + * Copyright: (C) 2012 Linaro Limited + * + * Some portions of this file were originally written by Achin Gupta + * Copyright: (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/errno.h> +#include <linux/irqchip/arm-gic.h> + +#include <asm/mcpm.h> +#include <asm/proc-fns.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/cp15.h> +#include <asm/psci.h> + +#include <mach/motherboard.h> +#include <mach/tc2.h> + +#include <linux/vexpress.h> +#include <linux/arm-cci.h> + +/* + * We can't use regular spinlocks. In the switcher case, it is possible + * for an outbound CPU to call power_down() after its inbound counterpart + * is already live using the same logical CPU number which trips lockdep + * debugging. + */ +static arch_spinlock_t tc2_pm_lock = __ARCH_SPIN_LOCK_UNLOCKED; + +static int tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS]; + +static int tc2_pm_power_up(unsigned int cpu, unsigned int cluster) +{ + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + if (cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)) + return -EINVAL; + + /* + * Since this is called with IRQs enabled, and no arch_spin_lock_irq + * variant exists, we need to disable IRQs manually here. + */ + local_irq_disable(); + arch_spin_lock(&tc2_pm_lock); + + if (!tc2_pm_use_count[0][cluster] && + !tc2_pm_use_count[1][cluster] && + !tc2_pm_use_count[2][cluster]) + vexpress_spc_powerdown_enable(cluster, 0); + + tc2_pm_use_count[cpu][cluster]++; + if (tc2_pm_use_count[cpu][cluster] == 1) { + vexpress_spc_write_bxaddr_reg(cluster, cpu, + virt_to_phys(mcpm_entry_point)); + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1); + } else if (tc2_pm_use_count[cpu][cluster] != 2) { + /* + * The only possible values are: + * 0 = CPU down + * 1 = CPU (still) up + * 2 = CPU requested to be up before it had a chance + * to actually make itself down. + * Any other value is a bug. + */ + BUG(); + } + + arch_spin_unlock(&tc2_pm_lock); + local_irq_enable(); + + return 0; +} + +static void tc2_pm_down(u64 residency) +{ + unsigned int mpidr, cpu, cluster; + bool last_man = false, skip_wfi = false; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + __mcpm_cpu_going_down(cpu, cluster); + + arch_spin_lock(&tc2_pm_lock); + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); + tc2_pm_use_count[cpu][cluster]--; + if (tc2_pm_use_count[cpu][cluster] == 0) { + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1); + if (!tc2_pm_use_count[0][cluster] && + !tc2_pm_use_count[1][cluster] && + !tc2_pm_use_count[2][cluster] && + (!residency || residency > 5000)) { + vexpress_spc_powerdown_enable(cluster, 1); + vexpress_spc_set_global_wakeup_intr(1); + last_man = true; + } + } else if (tc2_pm_use_count[cpu][cluster] == 1) { + /* + * A power_up request went ahead of us. + * Even if we do not want to shut this CPU down, + * the caller expects a certain state as if the WFI + * was aborted. So let's continue with cache cleaning. + */ + skip_wfi = true; + } else + BUG(); + + gic_cpu_if_down(); + + if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { + arch_spin_unlock(&tc2_pm_lock); + + set_cr(get_cr() & ~CR_C); + flush_cache_all(); + asm volatile ("clrex"); + set_auxcr(get_auxcr() & ~(1 << 6)); + + disable_cci(cluster); + + /* + * Ensure that both C & I bits are disabled in the SCTLR + * before disabling ACE snoops. This ensures that no + * coherency traffic will originate from this cpu after + * ACE snoops are turned off. + */ + cpu_proc_fin(); + + __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); + } else { + /* + * If last man then undo any setup done previously. + */ + if (last_man) { + vexpress_spc_powerdown_enable(cluster, 0); + vexpress_spc_set_global_wakeup_intr(0); + } + + arch_spin_unlock(&tc2_pm_lock); + + set_cr(get_cr() & ~CR_C); + flush_cache_louis(); + asm volatile ("clrex"); + set_auxcr(get_auxcr() & ~(1 << 6)); + } + + __mcpm_cpu_down(cpu, cluster); + + /* Now we are prepared for power-down, do it: */ + if (!skip_wfi) + wfi(); + + /* Not dead at this point? Let our caller cope. */ +} + +static void tc2_pm_power_down(void) +{ + tc2_pm_down(0); +} + +static void tc2_pm_suspend(u64 residency) +{ + extern void tc2_resume(void); + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + vexpress_spc_write_bxaddr_reg(cluster, cpu, + virt_to_phys(tc2_resume)); + + tc2_pm_down(residency); +} + +static void tc2_pm_powered_up(void) +{ + unsigned int mpidr, cpu, cluster; + unsigned long flags; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + local_irq_save(flags); + arch_spin_lock(&tc2_pm_lock); + + if (!tc2_pm_use_count[0][cluster] && + !tc2_pm_use_count[1][cluster] && + !tc2_pm_use_count[2][cluster]) { + vexpress_spc_powerdown_enable(cluster, 0); + vexpress_spc_set_global_wakeup_intr(0); + } + + if (!tc2_pm_use_count[cpu][cluster]) + tc2_pm_use_count[cpu][cluster] = 1; + + vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 0); + vexpress_spc_write_bxaddr_reg(cluster, cpu, 0); + + arch_spin_unlock(&tc2_pm_lock); + local_irq_restore(flags); +} + +static const struct mcpm_platform_ops tc2_pm_power_ops = { + .power_up = tc2_pm_power_up, + .power_down = tc2_pm_power_down, + .suspend = tc2_pm_suspend, + .powered_up = tc2_pm_powered_up, +}; + +static void __init tc2_pm_usage_count_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + tc2_pm_use_count[cpu][cluster] = 1; +} + +extern void tc2_pm_power_up_setup(unsigned int affinity_level); + +static int __init tc2_pm_init(void) +{ + int ret; + + ret = psci_probe(); + if (!ret) { + pr_debug("psci found. Aborting native init\n"); + return -ENODEV; + } + + if (!vexpress_spc_check_loaded()) + return -ENODEV; + + tc2_pm_usage_count_init(); + + ret = mcpm_platform_register(&tc2_pm_power_ops); + if (!ret) + ret = mcpm_sync_init(tc2_pm_power_up_setup); + if (!ret) + pr_info("TC2 power management initialized\n"); + return ret; +} + +early_initcall(tc2_pm_init); diff --git a/arch/arm/mach-vexpress/tc2_pm_psci.c b/arch/arm/mach-vexpress/tc2_pm_psci.c new file mode 100644 index 00000000000..5a5e4f56849 --- /dev/null +++ b/arch/arm/mach-vexpress/tc2_pm_psci.c @@ -0,0 +1,168 @@ +/* + * arch/arm/mach-vexpress/tc2_pm_psci.c - TC2 PSCI support + * + * Created by: Achin Gupta, December 2012 + * Copyright: (C) 2012 ARM Limited + * + * Some portions of this file were originally written by Nicolas Pitre + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/errno.h> + +#include <asm/mcpm.h> +#include <asm/proc-fns.h> +#include <asm/cacheflush.h> +#include <asm/psci.h> +#include <asm/atomic.h> +#include <asm/cputype.h> +#include <asm/cp15.h> + +#include <mach/motherboard.h> +#include <mach/tc2.h> + +#include <linux/vexpress.h> + +/* + * Platform specific state id understood by the firmware and used to + * program the power controller + */ +#define PSCI_POWER_STATE_ID 0 + +static atomic_t tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS]; + +static int tc2_pm_psci_power_up(unsigned int cpu, unsigned int cluster) +{ + unsigned int mpidr = (cluster << 8) | cpu; + int ret = 0; + + BUG_ON(!psci_ops.cpu_on); + + switch (atomic_inc_return(&tc2_pm_use_count[cpu][cluster])) { + case 1: + /* + * This is a request to power up a cpu that linux thinks has + * been powered down. Retries are needed if the firmware has + * seen the power down request as yet. + */ + do + ret = psci_ops.cpu_on(mpidr, + virt_to_phys(mcpm_entry_point)); + while (ret == -EAGAIN); + + return ret; + case 2: + /* This power up request has overtaken a power down request */ + return ret; + default: + /* Any other value is a bug */ + BUG(); + } +} + +static void tc2_pm_psci_power_down(void) +{ + struct psci_power_state power_state; + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + BUG_ON(!psci_ops.cpu_off); + + switch (atomic_dec_return(&tc2_pm_use_count[cpu][cluster])) { + case 1: + /* + * Overtaken by a power up. Flush caches, exit coherency, + * return & fake a reset + */ + set_cr(get_cr() & ~CR_C); + + flush_cache_louis(); + + asm volatile ("clrex"); + set_auxcr(get_auxcr() & ~(1 << 6)); + + return; + case 0: + /* A normal request to possibly power down the cluster */ + power_state.id = PSCI_POWER_STATE_ID; + power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN; + power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1; + + psci_ops.cpu_off(power_state); + + /* On success this function never returns */ + default: + /* Any other value is a bug */ + BUG(); + } +} + +static void tc2_pm_psci_suspend(u64 unused) +{ + struct psci_power_state power_state; + + BUG_ON(!psci_ops.cpu_suspend); + + /* On TC2 always attempt to power down the cluster */ + power_state.id = PSCI_POWER_STATE_ID; + power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN; + power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1; + + psci_ops.cpu_suspend(power_state, virt_to_phys(mcpm_entry_point)); + + /* On success this function never returns */ + BUG(); +} + +static const struct mcpm_platform_ops tc2_pm_power_ops = { + .power_up = tc2_pm_psci_power_up, + .power_down = tc2_pm_psci_power_down, + .suspend = tc2_pm_psci_suspend, +}; + +static void __init tc2_pm_usage_count_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_MAX_CLUSTERS || + cpu >= vexpress_spc_get_nb_cpus(cluster)); + + atomic_set(&tc2_pm_use_count[cpu][cluster], 1); +} + +static int __init tc2_pm_psci_init(void) +{ + int ret; + + ret = psci_probe(); + if (ret) { + pr_debug("psci not found. Aborting psci init\n"); + return -ENODEV; + } + + tc2_pm_usage_count_init(); + + ret = mcpm_platform_register(&tc2_pm_power_ops); + if (!ret) + ret = mcpm_sync_init(NULL); + if (!ret) + pr_info("TC2 power management initialized\n"); + return ret; +} + +early_initcall(tc2_pm_psci_init); diff --git a/arch/arm/mach-vexpress/tc2_pm_setup.S b/arch/arm/mach-vexpress/tc2_pm_setup.S new file mode 100644 index 00000000000..4728f83731a --- /dev/null +++ b/arch/arm/mach-vexpress/tc2_pm_setup.S @@ -0,0 +1,102 @@ +/* + * arch/arm/include/asm/tc2_pm_setup.S + * + * Created by: Nicolas Pitre, October 2012 + ( (based on dcscb_setup.S by Dave Martin) + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + + +#include <linux/linkage.h> +#include <asm/mcpm.h> + + +#define SPC_PHYS_BASE 0x7FFF0000 +#define SPC_WAKE_INT_STAT 0xb2c + +#define SNOOP_CTL_A15 0x404 +#define SNOOP_CTL_A7 0x504 + +#define A15_SNOOP_MASK (0x3 << 7) +#define A7_SNOOP_MASK (0x1 << 13) + +#define A15_BX_ADDR0 0xB68 + + +#define CCI_PHYS_BASE 0x2c090000 + +#define SLAVE_SNOOPCTL_OFFSET 0 +#define SNOOPCTL_SNOOP_ENABLE (1 << 0) +#define SNOOPCTL_DVM_ENABLE (1 << 1) + +#define CCI_STATUS_OFFSET 0xc +#define STATUS_CHANGE_PENDING (1 << 0) + +#define CCI_SLAVE_OFFSET(n) (0x1000 + 0x1000 * (n)) +#define CCI_SLAVE_A15 3 +#define CCI_SLAVE_A7 4 +#define CCI_A15_OFFSET CCI_SLAVE_OFFSET(CCI_SLAVE_A15) +#define CCI_A7_OFFSET CCI_SLAVE_OFFSET(CCI_SLAVE_A7) + + +ENTRY(tc2_resume) + mrc p15, 0, r0, c0, c0, 5 + ubfx r1, r0, #0, #4 @ r1 = cpu + ubfx r2, r0, #8, #4 @ r2 = cluster + add r1, r1, r2, lsl #2 @ r1 = index of CPU in WAKE_INT_STAT + ldr r3, =SPC_PHYS_BASE + SPC_WAKE_INT_STAT + ldr r3, [r3] + lsr r3, r1 + tst r3, #1 + wfieq @ if no pending IRQ reenters wfi + b mcpm_entry_point +ENDPROC(tc2_resume) + +/* + * Enable cluster-level coherency, in preparation for turning on the MMU. + * The ACTLR SMP bit does not need to be set here, because cpu_resume() + * already restores that. + */ + +ENTRY(tc2_pm_power_up_setup) + + cmp r0, #0 + beq 2f + + @ Enable CCI snoops + mrc p15, 0, r0, c0, c0, 5 @ MPIDR + ubfx r0, r0, #8, #4 @ cluster + ldr r3, =CCI_PHYS_BASE + CCI_A15_OFFSET + cmp r0, #0 @ A15 cluster? + addne r3, r3, #CCI_A7_OFFSET - CCI_A15_OFFSET + + @ r3 now points to the correct CCI slave register block + ldr r0, [r3, #SLAVE_SNOOPCTL_OFFSET] + orr r0, r0, #SNOOPCTL_SNOOP_ENABLE | SNOOPCTL_DVM_ENABLE + str r0, [r3, #SLAVE_SNOOPCTL_OFFSET] @ enable CCI snoops + + @ Wait for snoop control change to complete: + ldr r3, =CCI_PHYS_BASE +1: ldr r0, [r3, #CCI_STATUS_OFFSET] + tst r0, #STATUS_CHANGE_PENDING + bne 1b + + bx lr + +2: @ Clear the BX addr register + ldr r3, =SPC_PHYS_BASE + A15_BX_ADDR0 + mrc p15, 0, r0, c0, c0, 5 @ MPIDR + ubfx r1, r0, #8, #4 @ cluster + ubfx r0, r0, #0, #4 @ cpu + add r3, r3, r1, lsl #4 + mov r1, #0 + str r1, [r3, r0, lsl #2] + dsb + + bx lr + +ENDPROC(tc2_pm_power_up_setup) diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 915683cb67d..421b186df2b 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -8,6 +8,7 @@ #include <linux/smp.h> #include <linux/init.h> #include <linux/irqchip.h> +#include <linux/memblock.h> #include <linux/of_address.h> #include <linux/of_fdt.h> #include <linux/of_irq.h> @@ -56,7 +57,8 @@ static struct map_desc v2m_io_desc[] __initdata = { }, }; -static void __init v2m_sp804_init(void __iomem *base, unsigned int irq) +static void __init v2m_sp804_init(void __iomem *base, unsigned int irq, + struct clk *clk1, struct clk *clk2) { if (WARN_ON(!base || irq == NO_IRQ)) return; @@ -64,8 +66,8 @@ static void __init v2m_sp804_init(void __iomem *base, unsigned int irq) writel(0, base + TIMER_1_BASE + TIMER_CTRL); writel(0, base + TIMER_2_BASE + TIMER_CTRL); - sp804_clocksource_init(base + TIMER_2_BASE, "v2m-timer1"); - sp804_clockevents_init(base + TIMER_1_BASE, irq, "v2m-timer0"); + sp804_clocksource_init(base + TIMER_2_BASE, "v2m-timer1", clk2); + sp804_clockevents_init(base + TIMER_1_BASE, irq, "v2m-timer0", clk1); } @@ -288,7 +290,7 @@ static struct amba_device *v2m_amba_devs[] __initdata = { static void __init v2m_timer_init(void) { vexpress_clk_init(ioremap(V2M_SYSCTL, SZ_4K)); - v2m_sp804_init(ioremap(V2M_TIMER01, SZ_4K), IRQ_V2M_TIMER0); + v2m_sp804_init(ioremap(V2M_TIMER01, SZ_4K), IRQ_V2M_TIMER0, NULL, NULL); } static void __init v2m_init_early(void) @@ -361,8 +363,6 @@ static void __init v2m_init(void) for (i = 0; i < ARRAY_SIZE(v2m_amba_devs); i++) amba_device_register(v2m_amba_devs[i], &iomem_resource); - pm_power_off = vexpress_power_off; - ct_desc->init_tile(); } @@ -374,9 +374,33 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express") .init_irq = v2m_init_irq, .init_time = v2m_timer_init, .init_machine = v2m_init, - .restart = vexpress_restart, MACHINE_END +static void __init v2m_dt_hdlcd_init(void) +{ + struct device_node *node; + int len, na, ns; + const __be32 *prop; + phys_addr_t fb_base, fb_size; + + node = of_find_compatible_node(NULL, NULL, "arm,hdlcd"); + if (!node) + return; + + na = of_n_addr_cells(node); + ns = of_n_size_cells(node); + + prop = of_get_property(node, "framebuffer", &len); + if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop))) + return; + + fb_base = of_read_number(prop, na); + fb_size = of_read_number(prop + na, ns); + + if (WARN_ON(memblock_remove(fb_base, fb_size))) + return; +}; + static struct map_desc v2m_rs1_io_desc __initdata = { .virtual = V2M_PERIPH, .pfn = __phys_to_pfn(0x1c000000), @@ -427,6 +451,8 @@ void __init v2m_dt_init_early(void) pr_warning("vexpress: DT HBI (%x) is not matching " "hardware (%x)!\n", dt_hbi, hbi); } + + v2m_dt_hdlcd_init(); } static void __init v2m_dt_timer_init(void) @@ -442,7 +468,9 @@ static void __init v2m_dt_timer_init(void) pr_info("Using SP804 '%s' as a clock & events source\n", node->full_name); v2m_sp804_init(of_iomap(node, 0), - irq_of_parse_and_map(node, 0)); + irq_of_parse_and_map(node, 0), + of_clk_get_by_name(node, "timclken1"), + of_clk_get_by_name(node, "timclken2")); } if (arch_timer_of_register() != 0) @@ -464,7 +492,6 @@ static void __init v2m_dt_init(void) { l2x0_of_init(0x00400000, 0xfe0fffff); of_platform_populate(NULL, v2m_dt_bus_match, NULL, NULL); - pm_power_off = vexpress_power_off; } static const char * const v2m_dt_match[] __initconst = { @@ -476,10 +503,10 @@ static const char * const v2m_dt_match[] __initconst = { DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express") .dt_compat = v2m_dt_match, .smp = smp_ops(vexpress_smp_ops), + .smp_init = smp_init_ops(vexpress_smp_init_ops), .map_io = v2m_dt_map_io, .init_early = v2m_dt_init_early, .init_irq = irqchip_init, .init_time = v2m_dt_timer_init, .init_machine = v2m_dt_init, - .restart = vexpress_restart, MACHINE_END diff --git a/arch/arm/mach-virt/platsmp.c b/arch/arm/mach-virt/platsmp.c index 8badaabe70a..f4143f5bfa5 100644 --- a/arch/arm/mach-virt/platsmp.c +++ b/arch/arm/mach-virt/platsmp.c @@ -21,8 +21,6 @@ #include <linux/smp.h> #include <linux/of.h> -#include <linux/irqchip/arm-gic.h> - #include <asm/psci.h> #include <asm/smp_plat.h> @@ -45,14 +43,8 @@ static int __cpuinit virt_boot_secondary(unsigned int cpu, return -ENODEV; } -static void __cpuinit virt_secondary_init(unsigned int cpu) -{ - gic_secondary_init(0); -} - struct smp_operations __initdata virt_smp_ops = { .smp_init_cpus = virt_smp_init_cpus, .smp_prepare_cpus = virt_smp_prepare_cpus, - .smp_secondary_init = virt_secondary_init, .smp_boot_secondary = virt_boot_secondary, }; diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 5dbf13f954f..e207aa5f846 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -446,8 +446,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (pud_none(*pud_k)) goto bad_area; - if (!pud_present(*pud)) + if (!pud_present(*pud)) { set_pud(pud, *pud_k); + /* + * There is a small window during free_pgtables() where the + * user *pud entry is 0 but the TLB has not been invalidated + * and we get a level 2 (pmd) translation fault caused by the + * intermediate TLB caching of the old level 1 (pud) entry. + */ + flush_tlb_kernel_page(addr); + } pmd = pmd_offset(pud, addr); pmd_k = pmd_offset(pud_k, addr); @@ -470,8 +478,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, #endif if (pmd_none(pmd_k[index])) goto bad_area; + if (!pmd_present(pmd[index])) + copy_pmd(pmd, pmd_k); - copy_pmd(pmd, pmd_k); return 0; bad_area: diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 78f520bc0e9..2f76880060d 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -110,7 +110,9 @@ ENTRY(cpu_v7_set_pte_ext) ARM( str r3, [r0, #2048]! ) THUMB( add r0, r0, #2048 ) THUMB( str r3, [r0] ) - mcr p15, 0, r0, c7, c10, 1 @ flush_pte + mrc p15, 0, r3, c0, c1, 7 @ read ID_MMFR3 + tst r3, #0xf << 20 @ check the coherent walk bits + mcreq p15, 0, r0, c7, c10, 1 @ clean D-cache to PoU #endif mov pc, lr ENDPROC(cpu_v7_set_pte_ext) diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 6ffd78c0f9a..2098e026632 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -73,7 +73,9 @@ ENTRY(cpu_v7_set_pte_ext) tst r3, #1 << (55 - 32) @ L_PTE_DIRTY orreq r2, #L_PTE_RDONLY 1: strd r2, r3, [r0] - mcr p15, 0, r0, c7, c10, 1 @ flush_pte + mrc p15, 0, r3, c0, c1, 7 @ read ID_MMFR3 + tst r3, #0xf << 20 @ check the coherent walk bits + mcreq p15, 0, r0, c7, c10, 1 @ clean D-cache to PoU #endif mov pc, lr ENDPROC(cpu_v7_set_pte_ext) diff --git a/arch/arm/plat-samsung/irq-vic-timer.c b/arch/arm/plat-samsung/irq-vic-timer.c index f980cf3d2ba..5d205e74e49 100644 --- a/arch/arm/plat-samsung/irq-vic-timer.c +++ b/arch/arm/plat-samsung/irq-vic-timer.c @@ -16,6 +16,7 @@ #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/irqchip/chained_irq.h> #include <linux/io.h> #include <mach/map.h> @@ -23,8 +24,6 @@ #include <plat/irq-vic-timer.h> #include <plat/regs-timer.h> -#include <asm/mach/irq.h> - static void s3c_irq_demux_vic_timer(unsigned int irq, struct irq_desc *desc) { struct irq_chip *chip = irq_get_chip(irq); diff --git a/arch/arm/plat-samsung/s5p-irq-gpioint.c b/arch/arm/plat-samsung/s5p-irq-gpioint.c index bae56131a50..fafdb059043 100644 --- a/arch/arm/plat-samsung/s5p-irq-gpioint.c +++ b/arch/arm/plat-samsung/s5p-irq-gpioint.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/irqchip/chained_irq.h> #include <linux/io.h> #include <linux/gpio.h> #include <linux/slab.h> @@ -22,8 +23,6 @@ #include <plat/gpio-core.h> #include <plat/gpio-cfg.h> -#include <asm/mach/irq.h> - #define GPIO_BASE(chip) ((void __iomem *)((unsigned long)((chip)->base) & 0xFFFFF000u)) #define CON_OFFSET 0x700 diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c index f2ac1556177..1e1b2d76974 100644 --- a/arch/arm/plat-versatile/platsmp.c +++ b/arch/arm/plat-versatile/platsmp.c @@ -14,7 +14,6 @@ #include <linux/device.h> #include <linux/jiffies.h> #include <linux/smp.h> -#include <linux/irqchip/arm-gic.h> #include <asm/cacheflush.h> #include <asm/smp_plat.h> @@ -37,13 +36,6 @@ static DEFINE_SPINLOCK(boot_lock); void __cpuinit versatile_secondary_init(unsigned int cpu) { /* - * if any interrupts are already enabled for the primary - * core (e.g. timer irq), then they will not have been enabled - * for us: do so - */ - gic_secondary_init(0); - - /* * let the primary processor know we're out of the * pen, then head off into the C entry point */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9b6d19f7407..43b0e9f10a3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -6,6 +6,7 @@ config ARM64 select ARCH_WANT_FRAME_POINTERS select ARM_AMBA select ARM_ARCH_TIMER + select ARM_GIC select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS @@ -31,6 +32,8 @@ config ARM64 select OF select OF_EARLY_FLATTREE select PERF_USE_VMALLOC + select POWER_RESET + select POWER_SUPPLY select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE @@ -99,7 +102,17 @@ source "init/Kconfig" source "kernel/Kconfig.freezer" -menu "System Type" +menu "Platform selection" + +config ARCH_VEXPRESS + bool "ARMv8 software model (Versatile Express)" + select ARCH_REQUIRE_GPIOLIB + select COMMON_CLK_VERSATILE + select POWER_RESET_VEXPRESS + select VEXPRESS_CONFIG + help + This enables support for the ARMv8 software model (Versatile + Express). endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index c95c5cb212f..e1d64f58001 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -15,8 +15,6 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 -LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) - KBUILD_DEFCONFIG := defconfig KBUILD_CFLAGS += -mgeneral-regs-only @@ -38,7 +36,6 @@ export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ libs-y := arch/arm64/lib/ $(libs-y) -libs-y += $(LIBGCC) # Default target when executing plain make KBUILD_IMAGE := Image.gz diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index 32ac0aef006..68457e9e097 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -1,3 +1,5 @@ +dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb + targets += dtbs targets += $(dtb-y) diff --git a/arch/arm64/boot/dts/foundation-v8.dts b/arch/arm64/boot/dts/foundation-v8.dts new file mode 100644 index 00000000000..fe9f1ecf679 --- /dev/null +++ b/arch/arm64/boot/dts/foundation-v8.dts @@ -0,0 +1,230 @@ +/* + * ARM Ltd. + * + * ARMv8 Foundation model DTS + */ + +/dts-v1/; + +/ { + model = "Foundation-v8A"; + compatible = "arm,foundation-aarch64", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + /* chosen */ + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <0>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + cpu@1 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <1>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + cpu@2 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <2>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + cpu@3 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <3>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x00000000 0x80000000 0 0x80000000>, + <0x00000008 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x0 0x2c001000 0 0x1000>, + <0x0 0x2c002000 0 0x1000>, + <0x0 0x2c004000 0 0x2000>, + <0x0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <1 13 0xff01>, + <1 14 0xff01>, + <1 11 0xff01>, + <1 10 0xff01>; + clock-frequency = <100000000>; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = <0 60 4>, + <0 61 4>, + <0 62 4>, + <0 63 4>; + }; + + smb { + compatible = "arm,vexpress,v2m-p1", "simple-bus"; + arm,v2m-memory-map = "rs1"; + #address-cells = <2>; /* SMB chipselect number and offset */ + #size-cells = <1>; + + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + ethernet@2,02000000 { + compatible = "smsc,lan91c111"; + reg = <2 0x02000000 0x10000>; + interrupts = <15>; + }; + + v2m_clk24mhz: clk24mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <24000000>; + clock-output-names = "v2m:clk24mhz"; + }; + + v2m_refclk1mhz: refclk1mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <1000000>; + clock-output-names = "v2m:refclk1mhz"; + }; + + v2m_refclk32khz: refclk32khz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "v2m:refclk32khz"; + }; + + iofpga@3,00000000 { + compatible = "arm,amba-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 3 0 0x200000>; + + v2m_sysreg: sysreg@010000 { + compatible = "arm,vexpress-sysreg"; + reg = <0x010000 0x1000>; + }; + + v2m_serial0: uart@090000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x090000 0x1000>; + interrupts = <5>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial1: uart@0a0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0a0000 0x1000>; + interrupts = <6>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial2: uart@0b0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0b0000 0x1000>; + interrupts = <7>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial3: uart@0c0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0c0000 0x1000>; + interrupts = <8>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "uartclk", "apb_pclk"; + }; + + virtio_block@0130000 { + compatible = "virtio,mmio"; + reg = <0x130000 0x1000>; + interrupts = <42>; + }; + }; + }; +}; diff --git a/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts new file mode 100644 index 00000000000..c296b014ed1 --- /dev/null +++ b/arch/arm64/boot/dts/rtsm_ve-aemv8a.dts @@ -0,0 +1,206 @@ +/* + * ARM Ltd. Fast Models + * + * Architecture Envelope Model (AEM) ARMv8-A + * ARMAEMv8AMPCT + * + * RTSM_VE_AEMv8A.lisa + */ + +/dts-v1/; + +/memreserve/ 0x80000000 0x00010000; + +/ { +}; + +/ { + model = "RTSM_VE_AEMv8A"; + compatible = "arm,rtsm_ve,aemv8a", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + /* chosen */ + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <0>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + cpu@1 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <1>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + cpu@2 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <2>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + cpu@3 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <3>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x00000000 0x80000000 0 0x80000000>, + <0x00000008 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x0 0x2c001000 0 0x1000>, + <0x0 0x2c002000 0 0x1000>, + <0x0 0x2c004000 0 0x2000>, + <0x0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <1 13 0xff01>, + <1 14 0xff01>, + <1 11 0xff01>, + <1 10 0xff01>; + clock-frequency = <100000000>; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = <0 60 4>, + <0 61 4>, + <0 62 4>, + <0 63 4>; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; + + panels { + panel@0 { + compatible = "panel"; + mode = "VGA"; + refresh = <60>; + xres = <640>; + yres = <480>; + pixclock = <39721>; + left_margin = <40>; + right_margin = <24>; + upper_margin = <32>; + lower_margin = <11>; + hsync_len = <96>; + vsync_len = <2>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + + panel@1 { + compatible = "panel"; + mode = "XVGA"; + refresh = <60>; + xres = <1024>; + yres = <768>; + pixclock = <15748>; + left_margin = <152>; + right_margin = <48>; + upper_margin = <23>; + lower_margin = <3>; + hsync_len = <104>; + vsync_len = <4>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + }; +}; diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi new file mode 100644 index 00000000000..f5363d68116 --- /dev/null +++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi @@ -0,0 +1,237 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * Motherboard component + * + * VEMotherBoard.lisa + */ + + motherboard { + arm,v2m-memory-map = "rs1"; + compatible = "arm,vexpress,v2m-p1", "simple-bus"; + #address-cells = <2>; /* SMB chipselect number and offset */ + #size-cells = <1>; + #interrupt-cells = <1>; + ranges; + + flash@0,00000000 { + compatible = "arm,vexpress-flash", "cfi-flash"; + reg = <0 0x00000000 0x04000000>, + <4 0x00000000 0x04000000>; + bank-width = <4>; + }; + + vram@2,00000000 { + compatible = "arm,vexpress-vram"; + reg = <2 0x00000000 0x00800000>; + }; + + ethernet@2,02000000 { + compatible = "smsc,lan91c111"; + reg = <2 0x02000000 0x10000>; + interrupts = <15>; + }; + + v2m_clk24mhz: clk24mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <24000000>; + clock-output-names = "v2m:clk24mhz"; + }; + + v2m_refclk1mhz: refclk1mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <1000000>; + clock-output-names = "v2m:refclk1mhz"; + }; + + v2m_refclk32khz: refclk32khz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "v2m:refclk32khz"; + }; + + iofpga@3,00000000 { + compatible = "arm,amba-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 3 0 0x200000>; + + v2m_sysreg: sysreg@010000 { + compatible = "arm,vexpress-sysreg"; + reg = <0x010000 0x1000>; + gpio-controller; + #gpio-cells = <2>; + }; + + v2m_sysctl: sysctl@020000 { + compatible = "arm,sp810", "arm,primecell"; + reg = <0x020000 0x1000>; + clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&v2m_clk24mhz>; + clock-names = "refclk", "timclk", "apb_pclk"; + #clock-cells = <1>; + clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3"; + }; + + aaci@040000 { + compatible = "arm,pl041", "arm,primecell"; + reg = <0x040000 0x1000>; + interrupts = <11>; + clocks = <&v2m_clk24mhz>; + clock-names = "apb_pclk"; + }; + + mmci@050000 { + compatible = "arm,pl180", "arm,primecell"; + reg = <0x050000 0x1000>; + interrupts = <9 10>; + cd-gpios = <&v2m_sysreg 0 0>; + wp-gpios = <&v2m_sysreg 1 0>; + max-frequency = <12000000>; + vmmc-supply = <&v2m_fixed_3v3>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "mclk", "apb_pclk"; + }; + + kmi@060000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x060000 0x1000>; + interrupts = <12>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + kmi@070000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x070000 0x1000>; + interrupts = <13>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + v2m_serial0: uart@090000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x090000 0x1000>; + interrupts = <5>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial1: uart@0a0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0a0000 0x1000>; + interrupts = <6>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial2: uart@0b0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0b0000 0x1000>; + interrupts = <7>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial3: uart@0c0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0c0000 0x1000>; + interrupts = <8>; + clocks = <&v2m_clk24mhz>, <&v2m_clk24mhz>; + clock-names = "uartclk", "apb_pclk"; + }; + + wdt@0f0000 { + compatible = "arm,sp805", "arm,primecell"; + reg = <0x0f0000 0x1000>; + interrupts = <0>; + clocks = <&v2m_refclk32khz>, <&v2m_clk24mhz>; + clock-names = "wdogclk", "apb_pclk"; + }; + + v2m_timer01: timer@110000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x110000 0x1000>; + interrupts = <2>; + clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&v2m_clk24mhz>; + clock-names = "timclken1", "timclken2", "apb_pclk"; + }; + + v2m_timer23: timer@120000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x120000 0x1000>; + interrupts = <3>; + clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&v2m_clk24mhz>; + clock-names = "timclken1", "timclken2", "apb_pclk"; + }; + + rtc@170000 { + compatible = "arm,pl031", "arm,primecell"; + reg = <0x170000 0x1000>; + interrupts = <4>; + clocks = <&v2m_clk24mhz>; + clock-names = "apb_pclk"; + }; + + clcd@1f0000 { + compatible = "arm,pl111", "arm,primecell"; + reg = <0x1f0000 0x1000>; + interrupts = <14>; + clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>; + clock-names = "clcdclk", "apb_pclk"; + mode = "VGA"; + use_dma = <0>; + framebuffer = <0x18000000 0x00180000>; + }; + }; + + v2m_fixed_3v3: fixedregulator@0 { + compatible = "regulator-fixed"; + regulator-name = "3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + mcc { + compatible = "arm,vexpress,config-bus", "simple-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + v2m_oscclk1: osc@1 { + /* CLCD clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <23750000 63500000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk1"; + }; + + reset@0 { + compatible = "arm,vexpress-reset"; + arm,vexpress-sysreg,func = <5 0>; + }; + + muxfpga@0 { + compatible = "arm,vexpress-muxfpga"; + arm,vexpress-sysreg,func = <7 0>; + }; + + shutdown@0 { + compatible = "arm,vexpress-shutdown"; + arm,vexpress-sysreg,func = <8 0>; + }; + + reboot@0 { + compatible = "arm,vexpress-reboot"; + arm,vexpress-sysreg,func = <9 0>; + }; + + dvimode@0 { + compatible = "arm,vexpress-dvimode"; + arm,vexpress-sysreg,func = <11 0>; + }; + }; + }; diff --git a/arch/arm64/boot/dts/skeleton.dtsi b/arch/arm64/boot/dts/skeleton.dtsi new file mode 100644 index 00000000000..38ead821bb4 --- /dev/null +++ b/arch/arm64/boot/dts/skeleton.dtsi @@ -0,0 +1,13 @@ +/* + * Skeleton device tree; the bare minimum needed to boot; just include and + * add a compatible value. The bootloader will typically populate the memory + * node. + */ + +/ { + #address-cells = <2>; + #size-cells = <1>; + chosen { }; + aliases { }; + memory { device_type = "memory"; reg = <0 0 0>; }; +}; diff --git a/arch/arm64/boot/dts/vexpress-v2p-aarch64.dts b/arch/arm64/boot/dts/vexpress-v2p-aarch64.dts new file mode 100644 index 00000000000..7175a32f35a --- /dev/null +++ b/arch/arm64/boot/dts/vexpress-v2p-aarch64.dts @@ -0,0 +1,206 @@ +/* + * ARM Ltd. Fast Models + * + * Architecture Envelope Model (AEM) ARMv8-A + * ARMAEMv8AMPCT + * + * RTSM_VE_AEMv8A.lisa + */ + +/dts-v1/; + +/memreserve/ 0x80000000 0x00010000; + +/ { +}; + +/ { + model = "V2P-AARCH64"; + compatible = "arm,vexpress,v2p-aarch64", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + /* chosen */ + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <0>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + cpu@1 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <1>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + cpu@2 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <2>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + cpu@3 { + device_type = "cpu"; + compatible = "arm,armv8"; + reg = <3>; + enable-method = "spin-table"; + cpu-release-addr = <0x0 0x8000fff8>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x00000000 0x80000000 0 0x80000000>, + <0x00000008 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x0 0x2c001000 0 0x1000>, + <0x0 0x2c002000 0 0x1000>, + <0x0 0x2c004000 0 0x2000>, + <0x0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <1 13 0xff01>, + <1 14 0xff01>, + <1 11 0xff01>, + <1 10 0xff01>; + clock-frequency = <100000000>; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = <0 60 4>, + <0 61 4>, + <0 62 4>, + <0 63 4>; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; + + panels { + panel@0 { + compatible = "panel"; + mode = "VGA"; + refresh = <60>; + xres = <640>; + yres = <480>; + pixclock = <39721>; + left_margin = <40>; + right_margin = <24>; + upper_margin = <32>; + lower_margin = <11>; + hsync_len = <96>; + vsync_len = <2>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + + panel@1 { + compatible = "panel"; + mode = "XVGA"; + refresh = <60>; + xres = <1024>; + yres = <768>; + pixclock = <15748>; + left_margin = <152>; + right_margin = <48>; + upper_margin = <23>; + lower_margin = <3>; + hsync_len = <104>; + vsync_len = <4>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + }; +}; diff --git a/arch/arm64/configs/vexpress-android_defconfig b/arch/arm64/configs/vexpress-android_defconfig new file mode 100644 index 00000000000..bf122d041ba --- /dev/null +++ b/arch/arm64/configs/vexpress-android_defconfig @@ -0,0 +1,119 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_IPC_NS is not set +# CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_EMBEDDED=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_ARCH_VEXPRESS=y +CONFIG_SMP=y +CONFIG_PREEMPT_VOLUNTARY=y +# CONFIG_COMPACTION is not set +CONFIG_CMDLINE="console=ttyAMA0 androidboot.console=ttyAMA0 video=ambafb loglevel=8" +CONFIG_CMDLINE_FORCE=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_INET_LRO is not set +CONFIG_IPV6=y +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y +CONFIG_SW_SYNC_USER=y +# CONFIG_BLK_DEV is not set +CONFIG_SCSI=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_NETDEVICES=y +CONFIG_SMC91X=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_GPIO=y +# CONFIG_SERIO_I8042 is not set +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_AMBAKMI=y +CONFIG_LEGACY_PTY_COUNT=16 +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +# CONFIG_HW_RANDOM is not set +CONFIG_PDA_POWER=y +# CONFIG_HWMON is not set +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_FB=y +CONFIG_FB_ARMCLCD=y +# CONFIG_VGA_CONSOLE is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_MMC=y +CONFIG_MMC_ARMMMCI=y +CONFIG_SWITCH=y +CONFIG_RTC_CLASS=y +CONFIG_STAGING=y +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ASHMEM=y +CONFIG_ANDROID_LOGGER=y +CONFIG_ANDROID_TIMED_GPIO=y +CONFIG_ANDROID_LOW_MEMORY_KILLER=y +CONFIG_ANDROID_INTF_ALARM_DEV=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_DEFAULT_MESSAGE_LOGLEVEL=7 +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_DEBUG_INFO=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_FTRACE is not set +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DEBUG_ERRORS=y +CONFIG_XZ_DEC=y +CONFIG_XZ_DEC_X86=y +CONFIG_XZ_DEC_POWERPC=y +CONFIG_XZ_DEC_IA64=y +CONFIG_XZ_DEC_ARM=y +CONFIG_XZ_DEC_ARMTHUMB=y +CONFIG_XZ_DEC_SPARC=y diff --git a/arch/arm64/configs/vexpress_defconfig b/arch/arm64/configs/vexpress_defconfig new file mode 100644 index 00000000000..789c8723749 --- /dev/null +++ b/arch/arm64/configs/vexpress_defconfig @@ -0,0 +1,94 @@ +# CONFIG_LOCALVERSION_AUTO is not set +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_UTS_NS is not set +# CONFIG_IPC_NS is not set +# CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set +CONFIG_SCHED_AUTOGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_COMPAT_BRK is not set +CONFIG_PROFILING=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +CONFIG_ARCH_VEXPRESS=y +CONFIG_SMP=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_CMDLINE="console=ttyAMA0" +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_COMPAT=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_INET_LRO is not set +# CONFIG_IPV6 is not set +# CONFIG_WIRELESS is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +# CONFIG_BLK_DEV is not set +CONFIG_SCSI=y +# CONFIG_SCSI_PROC_FS is not set +CONFIG_BLK_DEV_SD=y +# CONFIG_SCSI_LOWLEVEL is not set +CONFIG_NETDEVICES=y +# CONFIG_NET_CADENCE is not set +CONFIG_SMC91X=y +# CONFIG_WLAN is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_SERIO_I8042 is not set +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIO_AMBAKMI=y +CONFIG_LEGACY_PTY_COUNT=16 +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +CONFIG_FB=y +CONFIG_FB_ARMCLCD=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_MMC=y +CONFIG_MMC_ARMMMCI=y +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +# CONFIG_EXT3_FS_XATTR is not set +CONFIG_FUSE_FS=y +CONFIG_CUSE=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y +CONFIG_DEBUG_KERNEL=y +# CONFIG_SCHED_DEBUG is not set +CONFIG_DEBUG_INFO=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +# CONFIG_FTRACE is not set +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_DEBUG_ERRORS=y diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ef54125e6c1..7a317029e73 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -17,6 +17,7 @@ #define __ASM_CPUTYPE_H #define ID_MIDR_EL1 "midr_el1" +#define ID_MPIDR_EL1 "mpidr_el1" #define ID_CTR_EL0 "ctr_el0" #define ID_AA64PFR0_EL1 "id_aa64pfr0_el1" @@ -31,6 +32,12 @@ __val; \ }) +#define ARM_CPU_IMP_ARM 0x41 + +#define ARM_CPU_PART_AEM_V8 0xD0F0 +#define ARM_CPU_PART_FOUNDATION 0xD000 +#define ARM_CPU_PART_CORTEX_A57 0xD070 + /* * The CPU ID never changes at run time, so we might as well tell the * compiler that it's constant. Use this function to read the CPU ID @@ -41,6 +48,21 @@ static inline u32 __attribute_const__ read_cpuid_id(void) return read_cpuid(ID_MIDR_EL1); } +static inline u64 __attribute_const__ read_cpuid_mpidr(void) +{ + return read_cpuid(ID_MPIDR_EL1); +} + +static inline unsigned int __attribute_const__ read_cpuid_implementor(void) +{ + return (read_cpuid_id() & 0xFF000000) >> 24; +} + +static inline unsigned int __attribute_const__ read_cpuid_part_number(void) +{ + return (read_cpuid_id() & 0xFFF0); +} + static inline u32 __attribute_const__ read_cpuid_cachetype(void) { return read_cpuid(ID_CTR_EL0); diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index ac63519b7b9..0303705fcad 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -19,5 +19,6 @@ #define __ASM_EXCEPTION_H #define __exception __attribute__((section(".exception.text"))) +#define __exception_irq_entry __exception #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 507546353d6..990c051e782 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -49,4 +49,9 @@ static inline void ack_bad_irq(unsigned int irq) extern void handle_IRQ(unsigned int, struct pt_regs *); +/* + * No arch-specific IRQ flags. + */ +#define set_irq_flags(irq, flags) + #endif /* __ASM_HARDIRQ_H */ diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index a4e1cad3202..0332fc077f6 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -4,5 +4,6 @@ #include <asm-generic/irq.h> extern void (*handle_arch_irq)(struct pt_regs *); +extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); #endif diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h new file mode 100644 index 00000000000..7d7a45f0fdd --- /dev/null +++ b/arch/arm64/include/asm/smp_plat.h @@ -0,0 +1,27 @@ +/* + * Definitions specific to SMP platforms. + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_SMP_PLAT_H +#define __ASM_SMP_PLAT_H + +/* + * Logical CPU mapping. + */ +#define cpu_logical_map(cpu) (cpu) + +#endif /* __ASM_SMP_PLAT_H */ diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 95e40725534..a6e1750369e 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -41,7 +41,7 @@ extern void show_pte(struct mm_struct *mm, unsigned long addr); extern void __show_regs(struct pt_regs *); void soft_restart(unsigned long); -extern void (*pm_restart)(const char *cmd); +extern void (*arm_pm_restart)(char str, const char *cmd); #define UDBG_UNDEFINED (1 << 0) #define UDBG_SYSCALL (1 << 1) diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c index 7e320a2edb9..ac974f48a7a 100644 --- a/arch/arm64/kernel/early_printk.c +++ b/arch/arm64/kernel/early_printk.c @@ -24,6 +24,7 @@ #include <linux/io.h> #include <linux/amba/serial.h> +#include <linux/serial_reg.h> static void __iomem *early_base; static void (*printch)(char ch); @@ -40,6 +41,37 @@ static void pl011_printch(char ch) ; } +/* + * Semihosting-based debug console + */ +static void smh_printch(char ch) +{ + asm volatile("mov x1, %0\n" + "mov x0, #3\n" + "hlt 0xf000\n" + : : "r" (&ch) : "x0", "x1", "memory"); +} + +/* + * 8250/16550 (8-bit aligned registers) single character TX. + */ +static void uart8250_8bit_printch(char ch) +{ + while (!(readb_relaxed(early_base + UART_LSR) & UART_LSR_THRE)) + ; + writeb_relaxed(ch, early_base + UART_TX); +} + +/* + * 8250/16550 (32-bit aligned registers) single character TX. + */ +static void uart8250_32bit_printch(char ch) +{ + while (!(readl_relaxed(early_base + (UART_LSR << 2)) & UART_LSR_THRE)) + ; + writel_relaxed(ch, early_base + (UART_TX << 2)); +} + struct earlycon_match { const char *name; void (*printch)(char ch); @@ -47,6 +79,9 @@ struct earlycon_match { static const struct earlycon_match earlycon_match[] __initconst = { { .name = "pl011", .printch = pl011_printch, }, + { .name = "smh", .printch = smh_printch, }, + { .name = "uart8250-8bit", .printch = uart8250_8bit_printch, }, + { .name = "uart8250-32bit", .printch = uart8250_32bit_printch, }, {} }; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 0373c6609ea..ecb3354292e 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -25,7 +25,7 @@ #include <linux/irq.h> #include <linux/smp.h> #include <linux/init.h> -#include <linux/of_irq.h> +#include <linux/irqchip.h> #include <linux/seq_file.h> #include <linux/ratelimit.h> @@ -67,18 +67,17 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs) set_irq_regs(old_regs); } -/* - * Interrupt controllers supported by the kernel. - */ -static const struct of_device_id intctrl_of_match[] __initconst = { - /* IRQ controllers { .compatible, .data } info to go here */ - {} -}; +void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) +{ + if (handle_arch_irq) + return; + + handle_arch_irq = handle_irq; +} void __init init_IRQ(void) { - of_irq_init(intctrl_of_match); - + irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 0337cdb0667..3fd745104ec 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -81,8 +81,8 @@ void soft_restart(unsigned long addr) void (*pm_power_off)(void); EXPORT_SYMBOL_GPL(pm_power_off); -void (*pm_restart)(const char *cmd); -EXPORT_SYMBOL_GPL(pm_restart); +void (*arm_pm_restart)(char str, const char *cmd); +EXPORT_SYMBOL_GPL(arm_pm_restart); /* @@ -164,8 +164,8 @@ void machine_restart(char *cmd) local_fiq_disable(); /* Now call the architecture specific reboot code. */ - if (pm_restart) - pm_restart(cmd); + if (arm_pm_restart) + arm_pm_restart('h', cmd); /* * Whoops - the architecture was unable to reboot. diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 113db863f83..9c023d714f4 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -32,6 +32,7 @@ #include <linux/kexec.h> #include <linux/crash_dump.h> #include <linux/root_dev.h> +#include <linux/clk-provider.h> #include <linux/cpu.h> #include <linux/interrupt.h> #include <linux/smp.h> @@ -277,6 +278,13 @@ void __init setup_arch(char **cmdline_p) #endif } +static int __init arm64_of_clk_init(void) +{ + of_clk_init(NULL); + return 0; +} +arch_initcall(arm64_of_clk_init); + static DEFINE_PER_CPU(struct cpu, cpu_data); static int __init topology_init(void) diff --git a/drivers/Kconfig b/drivers/Kconfig index 202fa6d051b..6a87332bf42 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -162,4 +162,6 @@ source "drivers/irqchip/Kconfig" source "drivers/ipack/Kconfig" +source "drivers/gator/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index dce39a95fa7..b825af2f4d6 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -148,3 +148,5 @@ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_IPACK_BUS) += ipack/ obj-$(CONFIG_NTB) += ntb/ + +obj-$(CONFIG_GATOR) += gator/ diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 0f51ed687dc..cd4ac9f001f 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -19,4 +19,9 @@ config OMAP_INTERCONNECT help Driver to enable OMAP interconnect error handling driver. + +config ARM_CCI + bool "ARM CCI driver support" + depends on ARM + endmenu diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile index 45d997c8545..55aac809e5b 100644 --- a/drivers/bus/Makefile +++ b/drivers/bus/Makefile @@ -6,3 +6,5 @@ obj-$(CONFIG_OMAP_OCP2SCP) += omap-ocp2scp.o # Interconnect bus driver for OMAP SoCs. obj-$(CONFIG_OMAP_INTERCONNECT) += omap_l3_smx.o omap_l3_noc.o + +obj-$(CONFIG_ARM_CCI) += arm-cci.o diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c new file mode 100644 index 00000000000..b110645bc56 --- /dev/null +++ b/drivers/bus/arm-cci.c @@ -0,0 +1,509 @@ +/* + * ARM Cache Coherency Interconnect (CCI400) support + * + * Copyright (C) 2012-2013 ARM Ltd. + * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/device.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/arm-cci.h> + +#include <asm/cacheflush.h> +#include <asm/memory.h> +#include <asm/outercache.h> + +#include <asm/irq_regs.h> +#include <asm/pmu.h> + +#define CCI_STATUS_OFFSET 0xc +#define STATUS_CHANGE_PENDING (1 << 0) + +#define CCI400_PMCR 0x0100 + +#define CCI_SLAVE_OFFSET(n) (0x1000 + 0x1000 * (n)) +#define CCI400_EAG_OFFSET CCI_SLAVE_OFFSET(3) +#define CCI400_KF_OFFSET CCI_SLAVE_OFFSET(4) + +#define DRIVER_NAME "CCI" +struct cci_drvdata { + void __iomem *baseaddr; +}; + +static struct cci_drvdata *info; + +#ifdef CONFIG_HW_PERF_EVENTS + +#define CCI400_PMU_CYCLE_CNTR_BASE 0x9000 +#define CCI400_PMU_CNTR_BASE(idx) (CCI400_PMU_CYCLE_CNTR_BASE + (idx) * 0x1000) + +#define CCI400_PMCR_CEN 0x00000001 +#define CCI400_PMCR_RST 0x00000002 +#define CCI400_PMCR_CCR 0x00000004 +#define CCI400_PMCR_CCD 0x00000008 +#define CCI400_PMCR_EX 0x00000010 +#define CCI400_PMCR_DP 0x00000020 +#define CCI400_PMCR_NCNT_MASK 0x0000F800 +#define CCI400_PMCR_NCNT_SHIFT 11 + +#define CCI400_PMU_EVT_SEL 0x000 +#define CCI400_PMU_CNTR 0x004 +#define CCI400_PMU_CNTR_CTRL 0x008 +#define CCI400_PMU_OVERFLOW 0x00C + +#define CCI400_PMU_OVERFLOW_FLAG 1 + +enum cci400_perf_events { + CCI400_PMU_CYCLES = 0xFF +}; + +#define CCI400_PMU_EVENT_MASK 0xff +#define CCI400_PMU_EVENT_SOURCE(event) ((event >> 5) & 0x7) +#define CCI400_PMU_EVENT_CODE(event) (event & 0x1f) + +#define CCI400_PMU_EVENT_SOURCE_S0 0 +#define CCI400_PMU_EVENT_SOURCE_S4 4 +#define CCI400_PMU_EVENT_SOURCE_M0 5 +#define CCI400_PMU_EVENT_SOURCE_M2 7 + +#define CCI400_PMU_EVENT_SLAVE_MIN 0x0 +#define CCI400_PMU_EVENT_SLAVE_MAX 0x13 + +#define CCI400_PMU_EVENT_MASTER_MIN 0x14 +#define CCI400_PMU_EVENT_MASTER_MAX 0x1A + +#define CCI400_PMU_MAX_HW_EVENTS 5 /* CCI PMU has 4 counters + 1 cycle counter */ + +#define CCI400_PMU_CYCLE_COUNTER_IDX 0 +#define CCI400_PMU_COUNTER0_IDX 1 +#define CCI400_PMU_COUNTER_LAST(cci_pmu) (CCI400_PMU_CYCLE_COUNTER_IDX + cci_pmu->num_events - 1) + + +static struct perf_event *events[CCI400_PMU_MAX_HW_EVENTS]; +static unsigned long used_mask[BITS_TO_LONGS(CCI400_PMU_MAX_HW_EVENTS)]; +static struct pmu_hw_events cci_hw_events = { + .events = events, + .used_mask = used_mask, +}; + +static int cci_pmu_validate_hw_event(u8 hw_event) +{ + u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event); + u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event); + + if (ev_source <= CCI400_PMU_EVENT_SOURCE_S4 && + ev_code <= CCI400_PMU_EVENT_SLAVE_MAX) + return hw_event; + else if (CCI400_PMU_EVENT_SOURCE_M0 <= ev_source && + ev_source <= CCI400_PMU_EVENT_SOURCE_M2 && + CCI400_PMU_EVENT_MASTER_MIN <= ev_code && + ev_code <= CCI400_PMU_EVENT_MASTER_MAX) + return hw_event; + + return -EINVAL; +} + +static inline int cci_pmu_counter_is_valid(struct arm_pmu *cci_pmu, int idx) +{ + return CCI400_PMU_CYCLE_COUNTER_IDX <= idx && + idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); +} + +static inline u32 cci_pmu_read_register(int idx, unsigned int offset) +{ + return readl_relaxed(info->baseaddr + CCI400_PMU_CNTR_BASE(idx) + offset); +} + +static inline void cci_pmu_write_register(u32 value, int idx, unsigned int offset) +{ + return writel_relaxed(value, info->baseaddr + CCI400_PMU_CNTR_BASE(idx) + offset); +} + +static inline void cci_pmu_disable_counter(int idx) +{ + cci_pmu_write_register(0, idx, CCI400_PMU_CNTR_CTRL); +} + +static inline void cci_pmu_enable_counter(int idx) +{ + cci_pmu_write_register(1, idx, CCI400_PMU_CNTR_CTRL); +} + +static inline void cci_pmu_select_event(int idx, unsigned long event) +{ + event &= CCI400_PMU_EVENT_MASK; + cci_pmu_write_register(event, idx, CCI400_PMU_EVT_SEL); +} + +static u32 cci_pmu_get_max_counters(void) +{ + u32 n_cnts = (readl_relaxed(info->baseaddr + CCI400_PMCR) & + CCI400_PMCR_NCNT_MASK) >> CCI400_PMCR_NCNT_SHIFT; + + /* add 1 for cycle counter */ + return n_cnts + 1; +} + +static struct pmu_hw_events *cci_pmu_get_hw_events(void) +{ + return &cci_hw_events; +} + +static int cci_pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event) +{ + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hw_event = &event->hw; + unsigned long cci_event = hw_event->config_base & CCI400_PMU_EVENT_MASK; + int idx; + + if (cci_event == CCI400_PMU_CYCLES) { + if (test_and_set_bit(CCI400_PMU_CYCLE_COUNTER_IDX, hw->used_mask)) + return -EAGAIN; + + return CCI400_PMU_CYCLE_COUNTER_IDX; + } + + for (idx = CCI400_PMU_COUNTER0_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); ++idx) { + if (!test_and_set_bit(idx, hw->used_mask)) + return idx; + } + + /* No counters available */ + return -EAGAIN; +} + +static int cci_pmu_map_event(struct perf_event *event) +{ + int mapping; + u8 config = event->attr.config & CCI400_PMU_EVENT_MASK; + + if (event->attr.type < PERF_TYPE_MAX) + return -ENOENT; + + /* 0xff is used to represent CCI Cycles */ + if (config == 0xff) + mapping = config; + else + mapping = cci_pmu_validate_hw_event(config); + + return mapping; +} + +static int cci_pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler) +{ + int irq, err, i = 0; + struct platform_device *pmu_device = cci_pmu->plat_device; + + if (unlikely(!pmu_device)) + return -ENODEV; + + /* CCI exports 6 interrupts - 1 nERRORIRQ + 5 nEVNTCNTOVERFLOW (PMU) + nERRORIRQ will be handled by secure firmware on TC2. So we + assume that all CCI interrupts listed in the linux device + tree are PMU interrupts. + + The following code should then be able to handle different routing + of the CCI PMU interrupts. + */ + while ((irq = platform_get_irq(pmu_device, i)) > 0) { + err = request_irq(irq, handler, 0, "arm-cci-pmu", cci_pmu); + if (err) { + dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n", + irq); + return err; + } + i++; + } + + return 0; +} + +static irqreturn_t cci_pmu_handle_irq(int irq_num, void *dev) +{ + struct arm_pmu *cci_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + struct perf_sample_data data; + struct pt_regs *regs; + int idx; + + regs = get_irq_regs(); + + /* Iterate over counters and update the corresponding perf events. + This should work regardless of whether we have per-counter overflow + interrupt or a combined overflow interrupt. */ + for (idx = CCI400_PMU_CYCLE_COUNTER_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); idx++) { + struct perf_event *event = events->events[idx]; + struct hw_perf_event *hw_counter; + + if (!event) + continue; + + hw_counter = &event->hw; + + /* Did this counter overflow? */ + if (!(cci_pmu_read_register(idx, CCI400_PMU_OVERFLOW) & CCI400_PMU_OVERFLOW_FLAG)) + continue; + cci_pmu_write_register(CCI400_PMU_OVERFLOW_FLAG, idx, CCI400_PMU_OVERFLOW); + + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hw_counter->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cci_pmu->disable(event); + } + + irq_work_run(); + return IRQ_HANDLED; +} + +static void cci_pmu_free_irq(struct arm_pmu *cci_pmu) +{ + int irq, i = 0; + struct platform_device *pmu_device = cci_pmu->plat_device; + + while ((irq = platform_get_irq(pmu_device, i)) > 0) { + free_irq(irq, cci_pmu); + i++; + } +} + +static void cci_pmu_enable_event(struct perf_event *event) +{ + unsigned long flags; + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return; + } + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Configure the event to count, unless you are counting cycles */ + if (idx != CCI400_PMU_CYCLE_COUNTER_IDX) + cci_pmu_select_event(idx, hw_counter->config_base); + + cci_pmu_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void cci_pmu_disable_event(struct perf_event *event) +{ + unsigned long flags; + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return; + } + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + cci_pmu_disable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void cci_pmu_start(struct arm_pmu *cci_pmu) +{ + u32 val; + unsigned long flags; + struct cci_drvdata *info = platform_get_drvdata(cci_pmu->plat_device); + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Enable all the PMU counters. */ + val = readl(info->baseaddr + CCI400_PMCR) | CCI400_PMCR_CEN; + writel(val, info->baseaddr + CCI400_PMCR); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void cci_pmu_stop(struct arm_pmu *cci_pmu) +{ + u32 val; + unsigned long flags; + struct cci_drvdata *info = platform_get_drvdata(cci_pmu->plat_device); + struct pmu_hw_events *events = cci_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable all the PMU counters. */ + val = readl(info->baseaddr + CCI400_PMCR) & ~CCI400_PMCR_CEN; + writel(val, info->baseaddr + CCI400_PMCR); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static u32 cci_pmu_read_counter(struct perf_event *event) +{ + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + u32 value; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) { + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + return 0; + } + value = cci_pmu_read_register(idx, CCI400_PMU_CNTR); + + return value; +} + +static void cci_pmu_write_counter(struct perf_event *event, u32 value) +{ + struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hw_counter = &event->hw; + int idx = hw_counter->idx; + + if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) + dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx); + else + cci_pmu_write_register(value, idx, CCI400_PMU_CNTR); +} + +static struct arm_pmu cci_pmu = { + .name = DRIVER_NAME, + .max_period = (1LLU << 32) - 1, + .get_hw_events = cci_pmu_get_hw_events, + .get_event_idx = cci_pmu_get_event_idx, + .map_event = cci_pmu_map_event, + .request_irq = cci_pmu_request_irq, + .handle_irq = cci_pmu_handle_irq, + .free_irq = cci_pmu_free_irq, + .enable = cci_pmu_enable_event, + .disable = cci_pmu_disable_event, + .start = cci_pmu_start, + .stop = cci_pmu_stop, + .read_counter = cci_pmu_read_counter, + .write_counter = cci_pmu_write_counter, +}; + +static int cci_pmu_init(struct platform_device *pdev) +{ + cci_pmu.plat_device = pdev; + cci_pmu.num_events = cci_pmu_get_max_counters(); + raw_spin_lock_init(&cci_hw_events.pmu_lock); + cpumask_setall(&cci_pmu.valid_cpus); + + return armpmu_register(&cci_pmu, -1); +} + +#else + +static int cci_pmu_init(struct platform_device *pdev) +{ + return 0; +} + +#endif /* CONFIG_HW_PERF_EVENTS */ + +void notrace disable_cci(int cluster) +{ + u32 slave_reg = cluster ? CCI400_KF_OFFSET : CCI400_EAG_OFFSET; + writel_relaxed(0x0, info->baseaddr + slave_reg); + + while (readl_relaxed(info->baseaddr + CCI_STATUS_OFFSET) + & STATUS_CHANGE_PENDING) + barrier(); +} +EXPORT_SYMBOL_GPL(disable_cci); + +static int cci_driver_probe(struct platform_device *pdev) +{ + struct resource *res; + int ret = 0; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + dev_err(&pdev->dev, "unable to allocate mem\n"); + return -ENOMEM; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "No memory resource\n"); + ret = -EINVAL; + goto mem_free; + } + + if (!request_mem_region(res->start, resource_size(res), + dev_name(&pdev->dev))) { + dev_err(&pdev->dev, "address 0x%x in use\n", (u32) res->start); + ret = -EBUSY; + goto mem_free; + } + + info->baseaddr = ioremap(res->start, resource_size(res)); + if (!info->baseaddr) { + ret = -EADDRNOTAVAIL; + goto ioremap_err; + } + + /* + * Multi-cluster systems may need this data when non-coherent, during + * cluster power-up/power-down. Make sure it reaches main memory: + */ + __cpuc_flush_dcache_area(info, sizeof *info); + __cpuc_flush_dcache_area(&info, sizeof info); + outer_clean_range(virt_to_phys(info), virt_to_phys(info + 1)); + outer_clean_range(virt_to_phys(&info), virt_to_phys(&info + 1)); + + platform_set_drvdata(pdev, info); + + if (cci_pmu_init(pdev) < 0) + pr_info("CCI PMU initialisation failed.\n"); + + pr_info("CCI loaded at %p\n", info->baseaddr); + return ret; + +ioremap_err: + release_region(res->start, resource_size(res)); +mem_free: + kfree(info); + + return ret; +} + +static const struct of_device_id arm_cci_matches[] = { + {.compatible = "arm,cci"}, + {}, +}; + +static struct platform_driver cci_platform_driver = { + .driver = { + .name = DRIVER_NAME, + .of_match_table = arm_cci_matches, + }, + .probe = cci_driver_probe, +}; + +static int __init cci_init(void) +{ + return platform_driver_register(&cci_platform_driver); +} + +core_initcall(cci_init); diff --git a/drivers/clk/versatile/Makefile b/drivers/clk/versatile/Makefile index ec3b88fe3e6..6e76bf87ca8 100644 --- a/drivers/clk/versatile/Makefile +++ b/drivers/clk/versatile/Makefile @@ -3,5 +3,5 @@ obj-$(CONFIG_ICST) += clk-icst.o obj-$(CONFIG_ARCH_INTEGRATOR) += clk-integrator.o obj-$(CONFIG_INTEGRATOR_IMPD1) += clk-impd1.o obj-$(CONFIG_ARCH_REALVIEW) += clk-realview.o -obj-$(CONFIG_ARCH_VEXPRESS) += clk-vexpress.o -obj-$(CONFIG_VEXPRESS_CONFIG) += clk-vexpress-osc.o +obj-$(CONFIG_ARCH_VEXPRESS) += clk-vexpress.o clk-sp810.o +obj-$(CONFIG_VEXPRESS_CONFIG) += clk-vexpress-osc.o clk-vexpress-spc.o diff --git a/drivers/clk/versatile/clk-sp810.c b/drivers/clk/versatile/clk-sp810.c new file mode 100644 index 00000000000..20399f9c1c5 --- /dev/null +++ b/drivers/clk/versatile/clk-sp810.c @@ -0,0 +1,172 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + */ + +#include <linux/amba/sp810.h> +#include <linux/clkdev.h> +#include <linux/clk-provider.h> +#include <linux/err.h> +#include <linux/of.h> +#include <linux/of_address.h> + +#define to_clk_sp810_timerclken(_hw) \ + container_of(_hw, struct clk_sp810_timerclken, hw) + +struct clk_sp810; + +struct clk_sp810_timerclken { + struct clk_hw hw; + struct clk *clk; + struct clk_sp810 *sp810; + int channel; +}; + +struct clk_sp810 { + struct device_node *node; + int refclk_index, timclk_index; + void __iomem *base; + spinlock_t lock; + struct clk_sp810_timerclken timerclken[4]; +}; + +static u8 clk_sp810_timerclken_get_parent(struct clk_hw *hw) +{ + struct clk_sp810_timerclken *timerclken = to_clk_sp810_timerclken(hw); + u32 val = readl(timerclken->sp810->base + SCCTRL); + + return !!(val & (1 << SCCTRL_TIMERENnSEL_SHIFT(timerclken->channel))); +} + +static int clk_sp810_timerclken_set_parent(struct clk_hw *hw, u8 index) +{ + struct clk_sp810_timerclken *timerclken = to_clk_sp810_timerclken(hw); + struct clk_sp810 *sp810 = timerclken->sp810; + u32 val, shift = SCCTRL_TIMERENnSEL_SHIFT(timerclken->channel); + unsigned long flags = 0; + + if (WARN_ON(index > 1)) + return -EINVAL; + + spin_lock_irqsave(&sp810->lock, flags); + + val = readl(sp810->base + SCCTRL); + val &= ~(1 << shift); + val |= index << shift; + writel(val, sp810->base + SCCTRL); + + spin_unlock_irqrestore(&sp810->lock, flags); + + return 0; +} + +static int clk_sp810_timerclken_prepare(struct clk_hw *hw) +{ + struct clk_sp810_timerclken *timerclken = to_clk_sp810_timerclken(hw); + struct clk_sp810 *sp810 = timerclken->sp810; + struct clk *refclk = of_clk_get(sp810->node, sp810->refclk_index); + struct clk *timclk = of_clk_get(sp810->node, sp810->timclk_index); + struct clk *old_parent = __clk_get_parent(hw->clk); + struct clk *new_parent = old_parent; + int new_parent_index; + + if (WARN_ON(IS_ERR(refclk) || IS_ERR(timclk))) + return -ENOENT; + + /* Select "better" (faster) parent */ + if (__clk_get_rate(refclk) > __clk_get_rate(timclk)) { + new_parent = refclk; + new_parent_index = 0; + } else { + new_parent = timclk; + new_parent_index = 1; + } + + /* Switch the parent if necessary */ + if (old_parent != new_parent) { + __clk_prepare(new_parent); + clk_sp810_timerclken_set_parent(hw, new_parent_index); + __clk_reparent(hw->clk, new_parent); + __clk_unprepare(old_parent); + } + + return 0; +} + +static const struct clk_ops clk_sp810_timerclken_ops = { + .prepare = clk_sp810_timerclken_prepare, + .get_parent = clk_sp810_timerclken_get_parent, + .set_parent = clk_sp810_timerclken_set_parent, +}; + +struct clk *clk_sp810_timerclken_of_get(struct of_phandle_args *clkspec, + void *data) +{ + struct clk_sp810 *sp810 = data; + + if (WARN_ON(clkspec->args_count != 1 || clkspec->args[0] > + ARRAY_SIZE(sp810->timerclken))) + return NULL; + + return sp810->timerclken[clkspec->args[0]].clk; +} + +void __init clk_sp810_of_setup(struct device_node *node) +{ + struct clk_sp810 *sp810 = kzalloc(sizeof(*sp810), GFP_KERNEL); + const char *parent_names[2]; + char name[12]; + struct clk_init_data init; + int i; + + if (!sp810) { + pr_err("Failed to allocate memory for SP810!\n"); + return; + } + + sp810->refclk_index = of_property_match_string(node, "clock-names", + "refclk"); + parent_names[0] = of_clk_get_parent_name(node, sp810->refclk_index); + + sp810->timclk_index = of_property_match_string(node, "clock-names", + "timclk"); + parent_names[1] = of_clk_get_parent_name(node, sp810->timclk_index); + + if (!parent_names[0] || !parent_names[1]) { + pr_warn("Failed to obtain parent clocks for SP810!\n"); + return; + } + + sp810->node = node; + sp810->base = of_iomap(node, 0); + spin_lock_init(&sp810->lock); + + init.name = name; + init.ops = &clk_sp810_timerclken_ops; + init.flags = CLK_IS_BASIC; + init.parent_names = parent_names; + init.num_parents = ARRAY_SIZE(parent_names); + + for (i = 0; i < ARRAY_SIZE(sp810->timerclken); i++) { + snprintf(name, ARRAY_SIZE(name), "timerclken%d", i); + + sp810->timerclken[i].sp810 = sp810; + sp810->timerclken[i].channel = i; + sp810->timerclken[i].hw.init = &init; + + sp810->timerclken[i].clk = clk_register(NULL, + &sp810->timerclken[i].hw); + WARN_ON(IS_ERR(sp810->timerclken[i].clk)); + } + + of_clk_add_provider(node, clk_sp810_timerclken_of_get, sp810); +} +CLK_OF_DECLARE(vexpress_soc, "arm,sp810", clk_sp810_of_setup); diff --git a/drivers/clk/versatile/clk-vexpress-spc.c b/drivers/clk/versatile/clk-vexpress-spc.c new file mode 100644 index 00000000000..d3f8fb44cca --- /dev/null +++ b/drivers/clk/versatile/clk-vexpress-spc.c @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2012 ARM Limited + * Copyright (C) 2012 Linaro + * + * Author: Viresh Kumar <viresh.kumar@linaro.org> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +/* SPC clock programming interface for Vexpress cpus */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/clk-provider.h> +#include <linux/clkdev.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/vexpress.h> + +struct clk_spc { + struct clk_hw hw; + spinlock_t *lock; + int cluster; +}; + +#define to_clk_spc(spc) container_of(spc, struct clk_spc, hw) + +static unsigned long spc_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_spc *spc = to_clk_spc(hw); + u32 freq; + + if (vexpress_spc_get_performance(spc->cluster, &freq)) { + return -EIO; + pr_err("%s: Failed", __func__); + } + + return freq * 1000; +} + +static long spc_round_rate(struct clk_hw *hw, unsigned long drate, + unsigned long *parent_rate) +{ + return drate; +} + +static int spc_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_spc *spc = to_clk_spc(hw); + + return vexpress_spc_set_performance(spc->cluster, rate / 1000); +} + +static struct clk_ops clk_spc_ops = { + .recalc_rate = spc_recalc_rate, + .round_rate = spc_round_rate, + .set_rate = spc_set_rate, +}; + +struct clk *vexpress_clk_register_spc(const char *name, int cluster_id) +{ + struct clk_init_data init; + struct clk_spc *spc; + struct clk *clk; + + if (!name) { + pr_err("Invalid name passed"); + return ERR_PTR(-EINVAL); + } + + spc = kzalloc(sizeof(*spc), GFP_KERNEL); + if (!spc) { + pr_err("could not allocate spc clk\n"); + return ERR_PTR(-ENOMEM); + } + + spc->hw.init = &init; + spc->cluster = cluster_id; + + init.name = name; + init.ops = &clk_spc_ops; + init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE; + init.num_parents = 0; + + clk = clk_register(NULL, &spc->hw); + if (!IS_ERR_OR_NULL(clk)) + return clk; + + pr_err("clk register failed\n"); + kfree(spc); + + return NULL; +} + +#if defined(CONFIG_OF) +void __init vexpress_clk_of_register_spc(void) +{ + char name[9] = "cluster"; + struct device_node *node = NULL; + struct clk *clk; + const u32 *val; + int cluster_id = 0, len; + + if (!of_find_compatible_node(NULL, NULL, "arm,spc")) { + pr_debug("%s: No SPC found, Exiting!!\n", __func__); + return; + } + + while ((node = of_find_node_by_name(node, "cluster"))) { + val = of_get_property(node, "reg", &len); + if (val && len == 4) + cluster_id = be32_to_cpup(val); + + name[7] = cluster_id + '0'; + clk = vexpress_clk_register_spc(name, cluster_id); + if (IS_ERR(clk)) + return; + + pr_debug("Registered clock '%s'\n", name); + clk_register_clkdev(clk, name, NULL); + } +} +CLK_OF_DECLARE(spc, "arm,spc", vexpress_clk_of_register_spc); +#endif diff --git a/drivers/clk/versatile/clk-vexpress.c b/drivers/clk/versatile/clk-vexpress.c index 82b45aad8cc..187306e104c 100644 --- a/drivers/clk/versatile/clk-vexpress.c +++ b/drivers/clk/versatile/clk-vexpress.c @@ -16,39 +16,10 @@ #include <linux/clk-provider.h> #include <linux/err.h> #include <linux/of.h> -#include <linux/of_address.h> #include <linux/vexpress.h> -static struct clk *vexpress_sp810_timerclken[4]; static DEFINE_SPINLOCK(vexpress_sp810_lock); -static void __init vexpress_sp810_init(void __iomem *base) -{ - int i; - - if (WARN_ON(!base)) - return; - - for (i = 0; i < ARRAY_SIZE(vexpress_sp810_timerclken); i++) { - char name[12]; - const char *parents[] = { - "v2m:refclk32khz", /* REFCLK */ - "v2m:refclk1mhz" /* TIMCLK */ - }; - - snprintf(name, ARRAY_SIZE(name), "timerclken%d", i); - - vexpress_sp810_timerclken[i] = clk_register_mux(NULL, name, - parents, 2, 0, base + SCCTRL, - SCCTRL_TIMERENnSEL_SHIFT(i), 1, - 0, &vexpress_sp810_lock); - - if (WARN_ON(IS_ERR(vexpress_sp810_timerclken[i]))) - break; - } -} - - static const char * const vexpress_clk_24mhz_periphs[] __initconst = { "mb:uart0", "mb:uart1", "mb:uart2", "mb:uart3", "mb:mmci", "mb:kmi0", "mb:kmi1" @@ -57,6 +28,11 @@ static const char * const vexpress_clk_24mhz_periphs[] __initconst = { void __init vexpress_clk_init(void __iomem *sp810_base) { struct clk *clk; + const char *sp810_parent_names[] = { + "v2m:refclk32khz", /* REFCLK */ + "v2m:refclk1mhz" /* TIMCLK */ + }; + struct clk *sp810_parent; int i; clk = clk_register_fixed_rate(NULL, "dummy_apb_pclk", NULL, @@ -73,63 +49,29 @@ void __init vexpress_clk_init(void __iomem *sp810_base) CLK_IS_ROOT, 32768); WARN_ON(clk_register_clkdev(clk, NULL, "v2m:wdt")); - clk = clk_register_fixed_rate(NULL, "v2m:refclk1mhz", NULL, + sp810_parent = clk_register_fixed_rate(NULL, "v2m:refclk1mhz", NULL, CLK_IS_ROOT, 1000000); - vexpress_sp810_init(sp810_base); - - for (i = 0; i < ARRAY_SIZE(vexpress_sp810_timerclken); i++) - WARN_ON(clk_set_parent(vexpress_sp810_timerclken[i], clk)); - - WARN_ON(clk_register_clkdev(vexpress_sp810_timerclken[0], - "v2m-timer0", "sp804")); - WARN_ON(clk_register_clkdev(vexpress_sp810_timerclken[1], - "v2m-timer1", "sp804")); + clk = clk_register_mux(NULL, "timerclken0", + sp810_parent_names, 2, 0, sp810_base + SCCTRL, + SCCTRL_TIMERENnSEL_SHIFT(0), 1, + 0, &vexpress_sp810_lock); + WARN_ON(clk_set_parent(clk, sp810_parent)); + WARN_ON(clk_register_clkdev(clk, "v2m-timer0", "sp804")); + + clk = clk_register_mux(NULL, "timerclken1", + sp810_parent_names, 2, 0, sp810_base + SCCTRL, + SCCTRL_TIMERENnSEL_SHIFT(1), 1, + 0, &vexpress_sp810_lock); + WARN_ON(clk_set_parent(clk, sp810_parent)); + WARN_ON(clk_register_clkdev(clk, "v2m-timer1", "sp804")); } #if defined(CONFIG_OF) -struct clk *vexpress_sp810_of_get(struct of_phandle_args *clkspec, void *data) -{ - if (WARN_ON(clkspec->args_count != 1 || clkspec->args[0] > - ARRAY_SIZE(vexpress_sp810_timerclken))) - return NULL; - - return vexpress_sp810_timerclken[clkspec->args[0]]; -} - void __init vexpress_clk_of_init(void) { - struct device_node *node; - struct clk *clk; - struct clk *refclk, *timclk; - of_clk_init(NULL); - - node = of_find_compatible_node(NULL, NULL, "arm,sp810"); - vexpress_sp810_init(of_iomap(node, 0)); - of_clk_add_provider(node, vexpress_sp810_of_get, NULL); - - /* Select "better" (faster) parent for SP804 timers */ - refclk = of_clk_get_by_name(node, "refclk"); - timclk = of_clk_get_by_name(node, "timclk"); - if (!WARN_ON(IS_ERR(refclk) || IS_ERR(timclk))) { - int i = 0; - - if (clk_get_rate(refclk) > clk_get_rate(timclk)) - clk = refclk; - else - clk = timclk; - - for (i = 0; i < ARRAY_SIZE(vexpress_sp810_timerclken); i++) - WARN_ON(clk_set_parent(vexpress_sp810_timerclken[i], - clk)); - } - - WARN_ON(clk_register_clkdev(vexpress_sp810_timerclken[0], - "v2m-timer0", "sp804")); - WARN_ON(clk_register_clkdev(vexpress_sp810_timerclken[1], - "v2m-timer1", "sp804")); } #endif diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 030ddf6dd3f..6ef9c7b0691 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -113,3 +113,24 @@ config ARM_HIGHBANK_CPUFREQ based boards. If in doubt, say N. + +config ARM_BIG_LITTLE_CPUFREQ + tristate + depends on ARM_CPU_TOPOLOGY + +config ARM_DT_BL_CPUFREQ + tristate "Generic ARM big LITTLE CPUfreq driver probed via DT" + select ARM_BIG_LITTLE_CPUFREQ + depends on OF && BIG_LITTLE + default y + help + This enables the Generic CPUfreq driver for ARM big.LITTLE platform. + This gets frequency tables from DT. + +config ARM_VEXPRESS_BL_CPUFREQ + tristate "ARM Vexpress big LITTLE CPUfreq driver" + select ARM_BIG_LITTLE_CPUFREQ + depends on ARM_SPC && BIG_LITTLE + help + This enables the CPUfreq driver for ARM Vexpress big.LITTLE platform. + If in doubt, say N. diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 863fd1865d4..6148eac24fc 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -57,6 +57,11 @@ obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o +obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o +obj-$(CONFIG_ARM_VEXPRESS_BL_CPUFREQ) += vexpress_big_little.o +#Keep DT_BL_CPUFREQ as the last entry in all big LITTLE drivers, so that it is +#probed last. +obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_dt_big_little.o ################################################################################## # PowerPC platform drivers diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c new file mode 100644 index 00000000000..b5601fcd79e --- /dev/null +++ b/drivers/cpufreq/arm_big_little.c @@ -0,0 +1,283 @@ +/* + * ARM big.LITTLE Platforms CPUFreq support + * + * Copyright (C) 2012 ARM Ltd. + * Author: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com> + * + * Copyright (C) 2012 Linaro. + * Viresh Kumar <viresh.kumar@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/clk.h> +#include <linux/cpufreq.h> +#include <linux/cpumask.h> +#include <linux/export.h> +#include <linux/of_platform.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <asm/topology.h> +#include "arm_big_little.h" + +#define MAX_CLUSTERS 2 + +static struct cpufreq_arm_bL_ops *arm_bL_ops; +static struct clk *clk[MAX_CLUSTERS]; +static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS]; +static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)}; + +/* + * Functions to get the current status. + * + * Beware that the cluster for another CPU may change unexpectedly. + */ +static int cpu_to_cluster(int cpu) +{ + return topology_physical_package_id(cpu); +} + +static unsigned int bL_cpufreq_get(unsigned int cpu) +{ + u32 cur_cluster = cpu_to_cluster(cpu); + + return clk_get_rate(clk[cur_cluster]) / 1000; +} + +/* Validate policy frequency range */ +static int bL_cpufreq_verify_policy(struct cpufreq_policy *policy) +{ + u32 cur_cluster = cpu_to_cluster(policy->cpu); + + /* This call takes care of it all using freq_table */ + return cpufreq_frequency_table_verify(policy, freq_table[cur_cluster]); +} + +/* Set clock frequency */ +static int bL_cpufreq_set_target(struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) +{ + struct cpufreq_freqs freqs; + u32 cpu = policy->cpu, freq_tab_idx, cur_cluster; + int ret = 0; + + /* ASSUMPTION: The cpu can't be hotplugged in this function */ + cur_cluster = cpu_to_cluster(policy->cpu); + + freqs.old = bL_cpufreq_get(policy->cpu); + + /* Determine valid target frequency using freq_table */ + cpufreq_frequency_table_target(policy, freq_table[cur_cluster], + target_freq, relation, &freq_tab_idx); + freqs.new = freq_table[cur_cluster][freq_tab_idx].frequency; + + freqs.cpu = policy->cpu; + + pr_debug("%s: cpu: %d, cluster: %d, oldfreq: %d, target freq: %d, new freq: %d\n", + __func__, cpu, cur_cluster, freqs.old, target_freq, + freqs.new); + + if (freqs.old == freqs.new) + return 0; + + for_each_cpu(freqs.cpu, policy->cpus) + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000); + if (ret) { + pr_err("clk_set_rate failed: %d\n", ret); + return ret; + } + + policy->cur = freqs.new; + + for_each_cpu(freqs.cpu, policy->cpus) + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + + return ret; +} + +/* translate the integer array into cpufreq_frequency_table entries */ +struct cpufreq_frequency_table * +arm_bL_copy_table_from_array(unsigned int *table, int count) +{ + int i; + + struct cpufreq_frequency_table *freq_table; + + pr_debug("%s: table: %p, count: %d\n", __func__, table, count); + + freq_table = kmalloc(sizeof(*freq_table) * (count + 1), GFP_KERNEL); + if (!freq_table) + return NULL; + + for (i = 0; i < count; i++) { + pr_debug("%s: index: %d, freq: %d\n", __func__, i, table[i]); + freq_table[i].index = i; + freq_table[i].frequency = table[i]; /* in kHZ */ + } + + freq_table[i].index = count; + freq_table[i].frequency = CPUFREQ_TABLE_END; + + return freq_table; +} +EXPORT_SYMBOL_GPL(arm_bL_copy_table_from_array); + +void arm_bL_free_freq_table(u32 cluster) +{ + pr_debug("%s: free freq table\n", __func__); + + kfree(freq_table[cluster]); +} +EXPORT_SYMBOL_GPL(arm_bL_free_freq_table); + +static void put_cluster_clk_and_freq_table(u32 cluster) +{ + if (!atomic_dec_return(&cluster_usage[cluster])) { + clk_put(clk[cluster]); + clk[cluster] = NULL; + arm_bL_ops->put_freq_tbl(cluster); + freq_table[cluster] = NULL; + pr_debug("%s: cluster: %d\n", __func__, cluster); + } +} + +static int get_cluster_clk_and_freq_table(u32 cluster) +{ + char name[9] = "cluster"; + int count; + + if (atomic_inc_return(&cluster_usage[cluster]) != 1) + return 0; + + freq_table[cluster] = arm_bL_ops->get_freq_tbl(cluster, &count); + if (!freq_table[cluster]) + goto atomic_dec; + + name[7] = cluster + '0'; + clk[cluster] = clk_get(NULL, name); + if (!IS_ERR_OR_NULL(clk[cluster])) { + pr_debug("%s: clk: %p & freq table: %p, cluster: %d\n", + __func__, clk[cluster], freq_table[cluster], + cluster); + return 0; + } + + arm_bL_ops->put_freq_tbl(cluster); + +atomic_dec: + atomic_dec(&cluster_usage[cluster]); + pr_err("%s: Failed to get data for cluster: %d\n", __func__, cluster); + return -ENODATA; +} + +/* Per-CPU initialization */ +static int bL_cpufreq_init(struct cpufreq_policy *policy) +{ + u32 cur_cluster = cpu_to_cluster(policy->cpu); + int result; + + result = get_cluster_clk_and_freq_table(cur_cluster); + if (result) + return result; + + result = cpufreq_frequency_table_cpuinfo(policy, + freq_table[cur_cluster]); + if (result) { + pr_err("CPU %d, cluster: %d invalid freq table\n", policy->cpu, + cur_cluster); + put_cluster_clk_and_freq_table(cur_cluster); + return result; + } + + cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu); + + policy->cpuinfo.transition_latency = 1000000; /* 1 ms assumed */ + policy->cur = bL_cpufreq_get(policy->cpu); + + cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu)); + cpumask_copy(policy->related_cpus, policy->cpus); + + pr_info("CPU %d initialized\n", policy->cpu); + return 0; +} + +static int bL_cpufreq_exit(struct cpufreq_policy *policy) +{ + put_cluster_clk_and_freq_table(cpu_to_cluster(policy->cpu)); + pr_debug("%s: Exited, cpu: %d\n", __func__, policy->cpu); + + return 0; +} + +/* Export freq_table to sysfs */ +static struct freq_attr *bL_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver bL_cpufreq_driver = { + .name = "arm-big-little", + .flags = CPUFREQ_STICKY, + .verify = bL_cpufreq_verify_policy, + .target = bL_cpufreq_set_target, + .get = bL_cpufreq_get, + .init = bL_cpufreq_init, + .exit = bL_cpufreq_exit, + .attr = bL_cpufreq_attr, +}; + +int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops) +{ + int ret; + + if (arm_bL_ops) { + pr_debug("%s: Already registered: %s, exiting\n", __func__, + arm_bL_ops->name); + return -EBUSY; + } + + if (!ops || !strlen(ops->name) || !ops->get_freq_tbl) { + pr_err("%s: Invalid arm_bL_ops, exiting\n", __func__); + return -ENODEV; + } + + arm_bL_ops = ops; + + ret = cpufreq_register_driver(&bL_cpufreq_driver); + if (ret) { + pr_info("%s: Failed registering platform driver: %s, err: %d\n", + __func__, ops->name, ret); + arm_bL_ops = NULL; + } else { + pr_info("%s: Registered platform driver: %s\n", __func__, + ops->name); + } + + return ret; +} +EXPORT_SYMBOL_GPL(bL_cpufreq_register); + +void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops) +{ + if (arm_bL_ops != ops) { + pr_info("%s: Registered with: %s, can't unregister, exiting\n", + __func__, arm_bL_ops->name); + } + + cpufreq_unregister_driver(&bL_cpufreq_driver); + pr_info("%s: Un-registered platform driver: %s\n", __func__, + arm_bL_ops->name); + arm_bL_ops = NULL; +} +EXPORT_SYMBOL_GPL(bL_cpufreq_unregister); diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h new file mode 100644 index 00000000000..6712a501198 --- /dev/null +++ b/drivers/cpufreq/arm_big_little.h @@ -0,0 +1,38 @@ +/* + * ARM big.LITTLE platform's CPUFreq header file + * + * Copyright (C) 2012 ARM Ltd. + * Author: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com> + * + * Copyright (C) 2012 ARM Ltd. + * Viresh Kumar <viresh.kumar@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef CPUFREQ_ARM_BIG_LITTLE_H +#define CPUFREQ_ARM_BIG_LITTLE_H + +#include <linux/cpufreq.h> +#include <linux/types.h> + +struct cpufreq_arm_bL_ops { + char name[CPUFREQ_NAME_LEN]; + struct cpufreq_frequency_table *(*get_freq_tbl)(u32 cluster, int *count); + void (*put_freq_tbl)(u32 cluster); +}; + +struct cpufreq_frequency_table * +arm_bL_copy_table_from_array(unsigned int *table, int count); +void arm_bL_free_freq_table(u32 cluster); + +int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops); +void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops); + +#endif /* CPUFREQ_ARM_BIG_LITTLE_H */ diff --git a/drivers/cpufreq/arm_dt_big_little.c b/drivers/cpufreq/arm_dt_big_little.c new file mode 100644 index 00000000000..fabfb9c5c37 --- /dev/null +++ b/drivers/cpufreq/arm_dt_big_little.c @@ -0,0 +1,101 @@ +/* + * Generic big.LITTLE CPUFreq Interface driver + * + * It provides necessary ops to arm_big_little cpufreq driver and gets + * Frequency information from Device Tree. Freq table in DT must be in KHz. + * + * Copyright (C) 2012 Linaro. + * Viresh Kumar <viresh.kumar@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/cpufreq.h> +#include <linux/export.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/types.h> +#include "arm_big_little.h" + +static struct cpufreq_frequency_table *generic_get_freq_tbl(u32 cluster, + int *count) +{ + struct device_node *np = NULL; + const struct property *pp; + unsigned int *table = NULL; + int cluster_id; + struct cpufreq_frequency_table *cpufreq_table; + + while ((np = of_find_node_by_name(np, "cluster"))) { + if (of_property_read_u32(np, "reg", &cluster_id)) + continue; + + if (cluster_id != cluster) + continue; + + pp = of_find_property(np, "freqs", NULL); + if (!pp) + continue; + + *count = pp->length / sizeof(u32); + if (!*count) + continue; + + table = kmalloc(sizeof(*table) * (*count), GFP_KERNEL); + if (!table) { + pr_err("%s: Failed to allocate memory for table\n", + __func__); + return NULL; + } + + of_property_read_u32_array(np, "freqs", table, *count); + break; + } + + if (!table) { + pr_err("%s: Unable to retrieve Freq table from Device Tree", + __func__); + return NULL; + } + + cpufreq_table = arm_bL_copy_table_from_array(table, *count); + kfree(table); + + return cpufreq_table; +} + +static void generic_put_freq_tbl(u32 cluster) +{ + arm_bL_free_freq_table(cluster); +} + +static struct cpufreq_arm_bL_ops generic_bL_ops = { + .name = "generic-bl", + .get_freq_tbl = generic_get_freq_tbl, + .put_freq_tbl = generic_put_freq_tbl, +}; + +static int generic_bL_init(void) +{ + return bL_cpufreq_register(&generic_bL_ops); +} +module_init(generic_bL_init); + +static void generic_bL_exit(void) +{ + return bL_cpufreq_unregister(&generic_bL_ops); +} +module_exit(generic_bL_exit); + +MODULE_DESCRIPTION("Generic ARM big LITTLE cpufreq driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/vexpress_big_little.c b/drivers/cpufreq/vexpress_big_little.c new file mode 100644 index 00000000000..66648c3fc94 --- /dev/null +++ b/drivers/cpufreq/vexpress_big_little.c @@ -0,0 +1,74 @@ +/* + * Vexpress big.LITTLE CPUFreq Interface driver + * + * It provides necessary ops to arm_big_little cpufreq driver and gets + * information from spc controller. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com> + * + * Copyright (C) 2012 Linaro. + * Viresh Kumar <viresh.kumar@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/cpufreq.h> +#include <linux/export.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/vexpress.h> +#include "arm_big_little.h" + +static struct cpufreq_frequency_table *vexpress_get_freq_tbl(u32 cluster, + int *count) +{ + unsigned int *table = vexpress_spc_get_freq_table(cluster, count); + + if (!table || !*count) { + pr_err("SPC controller returned invalid freq table"); + return NULL; + } + + return arm_bL_copy_table_from_array(table, *count); +} + +static void vexpress_put_freq_tbl(u32 cluster) +{ + arm_bL_free_freq_table(cluster); +} + +static struct cpufreq_arm_bL_ops vexpress_bL_ops = { + .name = "vexpress-bL", + .get_freq_tbl = vexpress_get_freq_tbl, + .put_freq_tbl = vexpress_put_freq_tbl, +}; + +static int vexpress_bL_init(void) +{ + if (!vexpress_spc_check_loaded()) { + pr_info("%s: No SPC found\n", __func__); + return -ENOENT; + } + + return bL_cpufreq_register(&vexpress_bL_ops); +} +module_init(vexpress_bL_init); + +static void vexpress_bL_exit(void) +{ + return bL_cpufreq_unregister(&vexpress_bL_ops); +} +module_exit(vexpress_bL_exit); + +MODULE_DESCRIPTION("ARM Vexpress big LITTLE cpufreq driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 24c6e7d945e..6a7e6a9beff 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -4,6 +4,6 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o - +obj-$(CONFIG_BIG_LITTLE) += arm_big_little.o obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o obj-$(CONFIG_CPU_IDLE_KIRKWOOD) += cpuidle-kirkwood.o diff --git a/drivers/cpuidle/arm_big_little.c b/drivers/cpuidle/arm_big_little.c new file mode 100644 index 00000000000..a430800d4a7 --- /dev/null +++ b/drivers/cpuidle/arm_big_little.c @@ -0,0 +1,183 @@ +/* + * big.LITTLE CPU idle driver. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/arm-cci.h> +#include <linux/bitmap.h> +#include <linux/cpuidle.h> +#include <linux/cpu_pm.h> +#include <linux/clockchips.h> +#include <linux/debugfs.h> +#include <linux/hrtimer.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/tick.h> +#include <linux/vexpress.h> +#include <asm/mcpm.h> +#include <asm/cpuidle.h> +#include <asm/cputype.h> +#include <asm/idmap.h> +#include <asm/proc-fns.h> +#include <asm/suspend.h> +#include <linux/of.h> + +static int bl_cpuidle_simple_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + ktime_t time_start, time_end; + s64 diff; + + time_start = ktime_get(); + + cpu_do_idle(); + + time_end = ktime_get(); + + local_irq_enable(); + + diff = ktime_to_us(ktime_sub(time_end, time_start)); + if (diff > INT_MAX) + diff = INT_MAX; + + dev->last_residency = (int) diff; + + return index; +} + +static int bl_enter_powerdown(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx); + +static struct cpuidle_state bl_cpuidle_set[] __initdata = { + [0] = { + .enter = bl_cpuidle_simple_enter, + .exit_latency = 1, + .target_residency = 1, + .power_usage = UINT_MAX, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "WFI", + .desc = "ARM WFI", + }, + [1] = { + .enter = bl_enter_powerdown, + .exit_latency = 300, + .target_residency = 1000, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "C1", + .desc = "ARM power down", + }, +}; + +struct cpuidle_driver bl_idle_driver = { + .name = "bl_idle", + .owner = THIS_MODULE, + .safe_state_index = 0 +}; + +static DEFINE_PER_CPU(struct cpuidle_device, bl_idle_dev); + +static int notrace bl_powerdown_finisher(unsigned long arg) +{ + unsigned int mpidr = read_cpuid_mpidr(); + unsigned int cluster = (mpidr >> 8) & 0xf; + unsigned int cpu = mpidr & 0xf; + + mcpm_set_entry_vector(cpu, cluster, cpu_resume); + mcpm_cpu_suspend(0); /* 0 should be replaced with better value here */ + return 1; +} + +/* + * bl_enter_powerdown - Programs CPU to enter the specified state + * @dev: cpuidle device + * @drv: The target state to be programmed + * @idx: state index + * + * Called from the CPUidle framework to program the device to the + * specified target state selected by the governor. + */ +static int bl_enter_powerdown(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + struct timespec ts_preidle, ts_postidle, ts_idle; + int ret; + + /* Used to keep track of the total time in idle */ + getnstimeofday(&ts_preidle); + + BUG_ON(!irqs_disabled()); + + cpu_pm_enter(); + + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + + ret = cpu_suspend((unsigned long) dev, bl_powerdown_finisher); + if (ret) + BUG(); + + mcpm_cpu_powered_up(); + + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + + cpu_pm_exit(); + + getnstimeofday(&ts_postidle); + local_irq_enable(); + ts_idle = timespec_sub(ts_postidle, ts_preidle); + + dev->last_residency = ts_idle.tv_nsec / NSEC_PER_USEC + + ts_idle.tv_sec * USEC_PER_SEC; + return idx; +} + +/* + * bl_idle_init + * + * Registers the bl specific cpuidle driver with the cpuidle + * framework with the valid set of states. + */ +int __init bl_idle_init(void) +{ + struct cpuidle_device *dev; + int i, cpu_id; + struct cpuidle_driver *drv = &bl_idle_driver; + + if (!of_find_compatible_node(NULL, NULL, "arm,generic")) { + pr_info("%s: No compatible node found\n", __func__); + return -ENODEV; + } + + drv->state_count = (sizeof(bl_cpuidle_set) / + sizeof(struct cpuidle_state)); + + for (i = 0; i < drv->state_count; i++) { + memcpy(&drv->states[i], &bl_cpuidle_set[i], + sizeof(struct cpuidle_state)); + } + + cpuidle_register_driver(drv); + + for_each_cpu(cpu_id, cpu_online_mask) { + pr_err("CPUidle for CPU%d registered\n", cpu_id); + dev = &per_cpu(bl_idle_dev, cpu_id); + dev->cpu = cpu_id; + + dev->state_count = drv->state_count; + + if (cpuidle_register_device(dev)) { + printk(KERN_ERR "%s: Cpuidle register device failed\n", + __func__); + return -EIO; + } + } + + return 0; +} + +late_initcall(bl_idle_init); diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c index e1aab38c5a8..ece83d6e049 100644 --- a/drivers/cpuidle/cpuidle-calxeda.c +++ b/drivers/cpuidle/cpuidle-calxeda.c @@ -37,20 +37,6 @@ extern void *scu_base_addr; static struct cpuidle_device __percpu *calxeda_idle_cpuidle_devices; -static inline unsigned int get_auxcr(void) -{ - unsigned int val; - asm("mrc p15, 0, %0, c1, c0, 1 @ get AUXCR" : "=r" (val) : : "cc"); - return val; -} - -static inline void set_auxcr(unsigned int val) -{ - asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set AUXCR" - : : "r" (val) : "cc"); - isb(); -} - static noinline void calxeda_idle_restore(void) { set_cr(get_cr() | CR_C); diff --git a/drivers/gator/Kconfig b/drivers/gator/Kconfig new file mode 100644 index 00000000000..7ea0fcc3d01 --- /dev/null +++ b/drivers/gator/Kconfig @@ -0,0 +1,33 @@ +config GATOR + tristate "Gator module for ARM's Streamline Performance Analyzer" + default m if (ARM || ARM64) + depends on PROFILING + depends on HIGH_RES_TIMERS + depends on LOCAL_TIMERS || !(ARM && SMP) + select TRACING + +config GATOR_WITH_MALI_SUPPORT + bool + +choice + prompt "Enable Mali GPU support in Gator" + depends on GATOR + optional + +config GATOR_MALI_400MP + bool "Mali-400MP" + select GATOR_WITH_MALI_SUPPORT + +config GATOR_MALI_T6XX + bool "Mali-T604 or Mali-T658" + select GATOR_WITH_MALI_SUPPORT + +endchoice + +config GATOR_MALI_PATH + string "Path to Mali driver" + depends on GATOR_WITH_MALI_SUPPORT + default "drivers/gpu/arm/mali400mp" + help + The gator code adds this to its include path so it can get the Mali + trace headers with: #include "linux/mali_linux_trace.h" diff --git a/drivers/gator/LICENSE b/drivers/gator/LICENSE new file mode 100644 index 00000000000..d159169d105 --- /dev/null +++ b/drivers/gator/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile new file mode 100644 index 00000000000..5dadbacb30f --- /dev/null +++ b/drivers/gator/Makefile @@ -0,0 +1,72 @@ +ifneq ($(KERNELRELEASE),) + +# Uncomment the following line to enable kernel stack unwinding within gator, or update gator_backtrace.c +# EXTRA_CFLAGS += -DGATOR_KERNEL_STACK_UNWINDING + +obj-$(CONFIG_GATOR) := gator.o + +gator-y := gator_main.o \ + gator_events_irq.o \ + gator_events_sched.o \ + gator_events_net.o \ + gator_events_block.o \ + gator_events_meminfo.o \ + gator_events_perf_pmu.o + +gator-y += gator_events_mmaped.o + +ifeq ($(CONFIG_GATOR_WITH_MALI_SUPPORT),y) + +ifeq ($(CONFIG_GATOR_MALI_T6XX),y) +gator-y += gator_events_mali_t6xx.o \ + gator_events_mali_t6xx_hw.o +include $(M)/mali_t6xx.mk +else +gator-y += gator_events_mali_400.o +endif +gator-y += gator_events_mali_common.o + +ccflags-y += -I$(CONFIG_GATOR_MALI_PATH) +ccflags-$(CONFIG_GATOR_MALI_400MP) += -DMALI_SUPPORT=MALI_400 +ccflags-$(CONFIG_GATOR_MALI_T6XX) += -DMALI_SUPPORT=MALI_T6xx +endif + +# GATOR_TEST controls whether to include (=1) or exclude (=0) test code. +GATOR_TEST ?= 0 +EXTRA_CFLAGS += -DGATOR_TEST=$(GATOR_TEST) + +gator-$(CONFIG_ARM) += gator_events_armv6.o \ + gator_events_armv7.o \ + gator_events_l2c-310.o \ + gator_events_scorpion.o + +$(obj)/gator_main.o: $(obj)/gator_events.h + +clean-files := gator_events.h + +# Note, in the recipe below we use "cd $(srctree) && cd $(src)" rather than +# "cd $(srctree)/$(src)" because under DKMS $(src) is an absolute path, and we +# can't just use $(src) because for normal kernel builds this is relative to +# $(srctree) + + chk_events.h = : + quiet_chk_events.h = echo ' CHK $@' +silent_chk_events.h = : +$(obj)/gator_events.h: FORCE + @$($(quiet)chk_events.h) + $(Q)cd $(srctree) && cd $(src) ; $(CONFIG_SHELL) gator_events.sh $(abspath $@) + +else + +all: + @echo + @echo "usage:" + @echo " make -C <kernel_build_dir> M=\`pwd\` ARCH=arm CROSS_COMPILE=<...> modules" + @echo + $(error) + +clean: + rm -f *.o .*.cmd gator_events.h modules.order Module.symvers gator.ko gator.mod.c + rm -rf .tmp_versions + +endif diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h new file mode 100644 index 00000000000..205cbcdaeab --- /dev/null +++ b/drivers/gator/gator.h @@ -0,0 +1,142 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef GATOR_H_ +#define GATOR_H_ + +#include <linux/version.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/list.h> + +#define GATOR_PERF_SUPPORT LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0) +#define GATOR_PERF_PMU_SUPPORT GATOR_PERF_SUPPORT && defined(CONFIG_PERF_EVENTS) && (!(defined(__arm__) || defined(__aarch64__)) || defined(CONFIG_HW_PERF_EVENTS)) +#define GATOR_NO_PERF_SUPPORT (!(GATOR_PERF_SUPPORT)) +#define GATOR_CPU_FREQ_SUPPORT (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)) && defined(CONFIG_CPU_FREQ) +#define GATOR_IKS_SUPPORT defined(CONFIG_BL_SWITCHER) + +#define GATOR_LIVE 1 + +// cpu ids +#define ARM1136 0xb36 +#define ARM1156 0xb56 +#define ARM1176 0xb76 +#define ARM11MPCORE 0xb02 +#define CORTEX_A5 0xc05 +#define CORTEX_A7 0xc07 +#define CORTEX_A8 0xc08 +#define CORTEX_A9 0xc09 +#define CORTEX_A15 0xc0f +#define SCORPION 0x00f +#define SCORPIONMP 0x02d +#define KRAITSIM 0x049 +#define KRAIT 0x04d +#define KRAIT_S4_PRO 0x06f +#define CORTEX_A53 0xd03 +#define CORTEX_A57 0xd07 +#define AARCH64 0xd0f +#define OTHER 0xfff + +#define MAXSIZE_CORE_NAME 32 + +struct gator_cpu { + const int cpuid; + const char core_name[MAXSIZE_CORE_NAME]; + const char * const pmu_name; + const char * const pmnc_name; + const int pmnc_counters; +}; + +const struct gator_cpu *gator_find_cpu_by_cpuid(const u32 cpuid); +const struct gator_cpu *gator_find_cpu_by_pmu_name(const char *const name); + +/****************************************************************************** + * Filesystem + ******************************************************************************/ +int gatorfs_create_file_perm(struct super_block *sb, struct dentry *root, + char const *name, + const struct file_operations *fops, int perm); + +struct dentry *gatorfs_mkdir(struct super_block *sb, struct dentry *root, + char const *name); + +int gatorfs_create_ulong(struct super_block *sb, struct dentry *root, + char const *name, unsigned long *val); + +int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root, + char const *name, unsigned long *val); + +void gator_op_create_files(struct super_block *sb, struct dentry *root); + +/****************************************************************************** + * Tracepoints + ******************************************************************************/ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) +# error Kernels prior to 2.6.32 not supported +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) +# define GATOR_DEFINE_PROBE(probe_name, proto) \ + static void probe_##probe_name(PARAMS(proto)) +# define GATOR_REGISTER_TRACE(probe_name) \ + register_trace_##probe_name(probe_##probe_name) +# define GATOR_UNREGISTER_TRACE(probe_name) \ + unregister_trace_##probe_name(probe_##probe_name) +#else +# define GATOR_DEFINE_PROBE(probe_name, proto) \ + static void probe_##probe_name(void *data, PARAMS(proto)) +# define GATOR_REGISTER_TRACE(probe_name) \ + register_trace_##probe_name(probe_##probe_name, NULL) +# define GATOR_UNREGISTER_TRACE(probe_name) \ + unregister_trace_##probe_name(probe_##probe_name, NULL) +#endif + +/****************************************************************************** + * Events + ******************************************************************************/ +struct gator_interface { + void (*shutdown)(void); // Complementary function to init + int (*create_files)(struct super_block *sb, struct dentry *root); + int (*start)(void); + void (*stop)(void); // Complementary function to start + int (*online)(int **buffer, bool migrate); + int (*offline)(int **buffer, bool migrate); + void (*online_dispatch)(int cpu, bool migrate); // called in process context but may not be running on core 'cpu' + void (*offline_dispatch)(int cpu, bool migrate); // called in process context but may not be running on core 'cpu' + int (*read)(int **buffer); + int (*read64)(long long **buffer); + struct list_head list; +}; + +// gator_events_init is used as a search term in gator_events.sh +#define gator_events_init(initfn) \ + static inline int __gator_events_init_test(void) \ + { return initfn(); } + +int gator_events_install(struct gator_interface *interface); +int gator_events_get_key(void); +u32 gator_cpuid(void); + +void gator_backtrace_handler(struct pt_regs *const regs); + +#if !GATOR_IKS_SUPPORT + +#define get_physical_cpu() smp_processor_id() +#define lcpu_to_pcpu(lcpu) lcpu +#define pcpu_to_lcpu(pcpu) pcpu + +#else + +#define get_physical_cpu() lcpu_to_pcpu(get_logical_cpu()) +int lcpu_to_pcpu(const int lcpu); +int pcpu_to_lcpu(const int pcpu); + +#endif + +#define get_logical_cpu() smp_processor_id() +#define on_primary_core() (get_logical_cpu() == 0) + +#endif // GATOR_H_ diff --git a/drivers/gator/gator_annotate.c b/drivers/gator/gator_annotate.c new file mode 100644 index 00000000000..ad9f3091235 --- /dev/null +++ b/drivers/gator/gator_annotate.c @@ -0,0 +1,171 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <asm/current.h> +#include <linux/spinlock.h> + +static DEFINE_SPINLOCK(annotate_lock); +static bool collect_annotations = false; + +static int annotate_copy(struct file *file, char const __user *buf, size_t count) +{ + int cpu = 0; + int write = per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF]; + + if (file == NULL) { + // copy from kernel + memcpy(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count); + } else { + // copy from user space + if (copy_from_user(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count) != 0) + return -1; + } + per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF] = (write + count) & gator_buffer_mask[ANNOTATE_BUF]; + + return 0; +} + +static ssize_t annotate_write(struct file *file, char const __user *buf, size_t count_orig, loff_t *offset) +{ + int pid, cpu, header_size, available, contiguous, length1, length2, size, count = count_orig & 0x7fffffff; + + if (*offset) { + return -EINVAL; + } + + // Annotations are not supported in interrupt context + if (in_interrupt()) { + printk(KERN_WARNING "gator: Annotations are not supported in interrupt context\n"); + return -EINVAL; + } + + retry: + // synchronize between cores and with collect_annotations + spin_lock(&annotate_lock); + + if (!collect_annotations) { + // Not collecting annotations, tell the caller everything was written + size = count_orig; + goto annotate_write_out; + } + + // Annotation only uses a single per-cpu buffer as the data must be in order to the engine + cpu = 0; + + if (current == NULL) { + pid = 0; + } else { + pid = current->pid; + } + + // determine total size of the payload + header_size = MAXSIZE_PACK32 * 3 + MAXSIZE_PACK64; + available = buffer_bytes_available(cpu, ANNOTATE_BUF) - header_size; + size = count < available ? count : available; + + if (size <= 0) { + // Buffer is full, wait until space is available + spin_unlock(&annotate_lock); + wait_event_interruptible(gator_annotate_wait, buffer_bytes_available(cpu, ANNOTATE_BUF) > header_size || !collect_annotations); + goto retry; + } + + // synchronize shared variables annotateBuf and annotatePos + if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF]) { + u64 time = gator_get_time(); + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu()); + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid); + gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, time); + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, size); + + // determine the sizes to capture, length1 + length2 will equal size + contiguous = contiguous_space_available(cpu, ANNOTATE_BUF); + if (size < contiguous) { + length1 = size; + length2 = 0; + } else { + length1 = contiguous; + length2 = size - contiguous; + } + + if (annotate_copy(file, buf, length1) != 0) { + size = -EINVAL; + goto annotate_write_out; + } + + if (length2 > 0 && annotate_copy(file, &buf[length1], length2) != 0) { + size = -EINVAL; + goto annotate_write_out; + } + + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, ANNOTATE_BUF, time); + } + +annotate_write_out: + spin_unlock(&annotate_lock); + + // return the number of bytes written + return size; +} + +#include "gator_annotate_kernel.c" + +static int annotate_release(struct inode *inode, struct file *file) +{ + int cpu = 0; + + // synchronize between cores + spin_lock(&annotate_lock); + + if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF] && buffer_check_space(cpu, ANNOTATE_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) { + uint32_t pid = current->pid; + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu()); + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid); + gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, 0); // time + gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, 0); // size + } + + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, ANNOTATE_BUF, gator_get_time()); + + spin_unlock(&annotate_lock); + + return 0; +} + +static const struct file_operations annotate_fops = { + .write = annotate_write, + .release = annotate_release +}; + +static int gator_annotate_create_files(struct super_block *sb, struct dentry *root) +{ + return gatorfs_create_file_perm(sb, root, "annotate", &annotate_fops, 0666); +} + +static int gator_annotate_start(void) +{ + collect_annotations = true; + return 0; +} + +static void gator_annotate_stop(void) +{ + // the spinlock here will ensure that when this function exits, we are not in the middle of an annotation + spin_lock(&annotate_lock); + collect_annotations = false; + wake_up(&gator_annotate_wait); + spin_unlock(&annotate_lock); +} diff --git a/drivers/gator/gator_annotate_kernel.c b/drivers/gator/gator_annotate_kernel.c new file mode 100644 index 00000000000..4715f64a186 --- /dev/null +++ b/drivers/gator/gator_annotate_kernel.c @@ -0,0 +1,157 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define ESCAPE_CODE 0x1c +#define STRING_ANNOTATION 0x06 +#define NAME_CHANNEL_ANNOTATION 0x07 +#define NAME_GROUP_ANNOTATION 0x08 +#define VISUAL_ANNOTATION 0x04 +#define MARKER_ANNOTATION 0x05 + +static void kannotate_write(const char *ptr, unsigned int size) +{ + int retval; + int pos = 0; + loff_t offset = 0; + while (pos < size) { + retval = annotate_write(NULL, &ptr[pos], size - pos, &offset); + if (retval < 0) { + printk(KERN_WARNING "gator: kannotate_write failed with return value %d\n", retval); + return; + } + pos += retval; + } +} + +void gator_annotate_channel(int channel, const char *str) +{ + int str_size = strlen(str) & 0xffff; + long long header = ESCAPE_CODE | (STRING_ANNOTATION << 8) | (channel << 16) | ((long long)str_size << 48); + kannotate_write((char *)&header, sizeof(header)); + kannotate_write(str, str_size); +} + +EXPORT_SYMBOL(gator_annotate_channel); + +void gator_annotate(const char *str) +{ + gator_annotate_channel(0, str); +} + +EXPORT_SYMBOL(gator_annotate); + +void gator_annotate_channel_color(int channel, int color, const char *str) +{ + int str_size = (strlen(str) + 4) & 0xffff; + char header[12]; + header[0] = ESCAPE_CODE; + header[1] = STRING_ANNOTATION; + *(u32 *)(&header[2]) = channel; + *(u16 *)(&header[6]) = str_size; + *(u32 *)(&header[8]) = color; + kannotate_write((char *)&header, sizeof(header)); + kannotate_write(str, str_size - 4); +} + +EXPORT_SYMBOL(gator_annotate_channel_color); + +void gator_annotate_color(int color, const char *str) +{ + gator_annotate_channel_color(0, color, str); +} + +EXPORT_SYMBOL(gator_annotate_color); + +void gator_annotate_channel_end(int channel) +{ + long long header = ESCAPE_CODE | (STRING_ANNOTATION << 8) | (channel << 16); + kannotate_write((char *)&header, sizeof(header)); +} + +EXPORT_SYMBOL(gator_annotate_channel_end); + +void gator_annotate_end(void) +{ + gator_annotate_channel_end(0); +} + +EXPORT_SYMBOL(gator_annotate_end); + +void gator_annotate_name_channel(int channel, int group, const char* str) +{ + int str_size = strlen(str) & 0xffff; + char header[12]; + header[0] = ESCAPE_CODE; + header[1] = NAME_CHANNEL_ANNOTATION; + *(u32 *)(&header[2]) = channel; + *(u32 *)(&header[6]) = group; + *(u16 *)(&header[10]) = str_size; + kannotate_write((char *)&header, sizeof(header)); + kannotate_write(str, str_size); +} + +EXPORT_SYMBOL(gator_annotate_name_channel); + +void gator_annotate_name_group(int group, const char* str) +{ + int str_size = strlen(str) & 0xffff; + long long header = ESCAPE_CODE | (NAME_GROUP_ANNOTATION << 8) | (group << 16) | ((long long)str_size << 48); + kannotate_write((char *)&header, sizeof(header)); + kannotate_write(str, str_size); +} + +EXPORT_SYMBOL(gator_annotate_name_group); + +void gator_annotate_visual(const char *data, unsigned int length, const char *str) +{ + int str_size = strlen(str) & 0xffff; + int visual_annotation = ESCAPE_CODE | (VISUAL_ANNOTATION << 8) | (str_size << 16); + kannotate_write((char *)&visual_annotation, sizeof(visual_annotation)); + kannotate_write(str, str_size); + kannotate_write((char *)&length, sizeof(length)); + kannotate_write(data, length); +} + +EXPORT_SYMBOL(gator_annotate_visual); + +void gator_annotate_marker(void) +{ + int header = ESCAPE_CODE | (MARKER_ANNOTATION << 8); + kannotate_write((char *)&header, sizeof(header)); +} + +EXPORT_SYMBOL(gator_annotate_marker); + +void gator_annotate_marker_str(const char *str) +{ + int str_size = strlen(str) & 0xffff; + int header = ESCAPE_CODE | (MARKER_ANNOTATION << 8) | (str_size << 16); + kannotate_write((char *)&header, sizeof(header)); + kannotate_write(str, str_size); +} + +EXPORT_SYMBOL(gator_annotate_marker_str); + +void gator_annotate_marker_color(int color) +{ + long long header = (ESCAPE_CODE | (MARKER_ANNOTATION << 8) | 0x00040000 | ((long long)color << 32)); + kannotate_write((char *)&header, sizeof(header)); +} + +EXPORT_SYMBOL(gator_annotate_marker_color); + +void gator_annotate_marker_color_str(int color, const char *str) +{ + int str_size = (strlen(str) + 4) & 0xffff; + long long header = ESCAPE_CODE | (MARKER_ANNOTATION << 8) | (str_size << 16) | ((long long)color << 32); + kannotate_write((char *)&header, sizeof(header)); + kannotate_write(str, str_size - 4); +} + +EXPORT_SYMBOL(gator_annotate_marker_color_str); diff --git a/drivers/gator/gator_backtrace.c b/drivers/gator/gator_backtrace.c new file mode 100644 index 00000000000..6bd8ea2308c --- /dev/null +++ b/drivers/gator/gator_backtrace.c @@ -0,0 +1,139 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/* + * EABI backtrace stores {fp,lr} on the stack. + */ +struct frame_tail_eabi { + union { + struct { + unsigned long fp; // points to prev_lr + unsigned long lr; + }; + // Used to read 32 bit fp/lr from a 64 bit kernel + struct { + u32 fp_32; + u32 lr_32; + }; + }; +}; + +static void arm_backtrace_eabi(int cpu, struct pt_regs *const regs, unsigned int depth) +{ +#if defined(__arm__) || defined(__aarch64__) + struct frame_tail_eabi *tail; + struct frame_tail_eabi *next; + struct frame_tail_eabi buftail; +#if defined(__arm__) + const bool is_compat = false; + unsigned long fp = regs->ARM_fp; + unsigned long sp = regs->ARM_sp; + unsigned long lr = regs->ARM_lr; + const int frame_offset = 4; +#else + // Is userspace aarch32 (32 bit) + const bool is_compat = compat_user_mode(regs); + unsigned long fp = (is_compat ? regs->regs[11] : regs->regs[29]); + unsigned long sp = (is_compat ? regs->compat_sp : regs->sp); + unsigned long lr = (is_compat ? regs->compat_lr : regs->regs[30]); + const int frame_offset = (is_compat ? 4 : 0); +#endif + int is_user_mode = user_mode(regs); + + if (!is_user_mode) { + return; + } + + /* entry preamble may not have executed */ + gator_add_trace(cpu, lr); + + /* check tail is valid */ + if (fp == 0 || fp < sp) { + return; + } + + tail = (struct frame_tail_eabi *)(fp - frame_offset); + + while (depth-- && tail && !((unsigned long)tail & 3)) { + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(struct frame_tail_eabi))) + return; + if (__copy_from_user_inatomic(&buftail, tail, sizeof(struct frame_tail_eabi))) + return; + + lr = (is_compat ? buftail.lr_32 : buftail.lr); + gator_add_trace(cpu, lr); + + /* frame pointers should progress back up the stack, towards higher addresses */ + next = (struct frame_tail_eabi *)(lr - frame_offset); + if (tail >= next || lr == 0) { + fp = (is_compat ? buftail.fp_32 : buftail.fp); + next = (struct frame_tail_eabi *)(fp - frame_offset); + /* check tail is valid */ + if (tail >= next || fp == 0) { + return; + } + } + + tail = next; + } +#endif +} + +#if defined(__arm__) || defined(__aarch64__) +static int report_trace(struct stackframe *frame, void *d) +{ + unsigned int *depth = d, cookie = NO_COOKIE; + unsigned long addr = frame->pc; + + if (*depth) { +#if defined(MODULE) + unsigned int cpu = get_physical_cpu(); + struct module *mod = __module_address(addr); + if (mod) { + cookie = get_cookie(cpu, current, mod->name, false); + addr = addr - (unsigned long)mod->module_core; + } +#endif + marshal_backtrace(addr & ~1, cookie); + (*depth)--; + } + + return *depth == 0; +} +#endif + +// Uncomment the following line to enable kernel stack unwinding within gator, note it can also be defined from the Makefile +// #define GATOR_KERNEL_STACK_UNWINDING +static void kernel_backtrace(int cpu, struct pt_regs *const regs) +{ +#if defined(__arm__) || defined(__aarch64__) +#ifdef GATOR_KERNEL_STACK_UNWINDING + int depth = gator_backtrace_depth; +#else + int depth = 1; +#endif + struct stackframe frame; + if (depth == 0) + depth = 1; +#if defined(__arm__) + frame.fp = regs->ARM_fp; + frame.sp = regs->ARM_sp; + frame.lr = regs->ARM_lr; + frame.pc = regs->ARM_pc; +#else + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; +#endif + walk_stackframe(&frame, report_trace, &depth); +#else + marshal_backtrace(PC_REG & ~1, NO_COOKIE); +#endif +} diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c new file mode 100644 index 00000000000..c332187e9b5 --- /dev/null +++ b/drivers/gator/gator_cookies.c @@ -0,0 +1,397 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define COOKIEMAP_ENTRIES 1024 /* must be power of 2 */ +#define TRANSLATE_SIZE 256 +#define MAX_COLLISIONS 2 + +static uint32_t *gator_crc32_table; +static unsigned int translate_buffer_mask; + +static DEFINE_PER_CPU(char *, translate_text); +static DEFINE_PER_CPU(uint32_t, cookie_next_key); +static DEFINE_PER_CPU(uint64_t *, cookie_keys); +static DEFINE_PER_CPU(uint32_t *, cookie_values); +static DEFINE_PER_CPU(int, translate_buffer_read); +static DEFINE_PER_CPU(int, translate_buffer_write); +static DEFINE_PER_CPU(void **, translate_buffer); + +static inline uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq); +static void wq_cookie_handler(struct work_struct *unused); +DECLARE_WORK(cookie_work, wq_cookie_handler); +static struct timer_list app_process_wake_up_timer; +static void app_process_wake_up_handler(unsigned long unused_data); + +static uint32_t cookiemap_code(uint64_t value64) +{ + uint32_t value = (uint32_t)((value64 >> 32) + value64); + uint32_t cookiecode = (value >> 24) & 0xff; + cookiecode = cookiecode * 31 + ((value >> 16) & 0xff); + cookiecode = cookiecode * 31 + ((value >> 8) & 0xff); + cookiecode = cookiecode * 31 + ((value >> 0) & 0xff); + cookiecode &= (COOKIEMAP_ENTRIES - 1); + return cookiecode * MAX_COLLISIONS; +} + +static uint32_t gator_chksum_crc32(const char *data) +{ + register unsigned long crc; + const unsigned char *block = data; + int i, length = strlen(data); + + crc = 0xFFFFFFFF; + for (i = 0; i < length; i++) { + crc = ((crc >> 8) & 0x00FFFFFF) ^ gator_crc32_table[(crc ^ *block++) & 0xFF]; + } + + return (crc ^ 0xFFFFFFFF); +} + +/* + * Exists + * Pre: [0][1][v][3]..[n-1] + * Post: [v][0][1][3]..[n-1] + */ +static uint32_t cookiemap_exists(uint64_t key) +{ + unsigned long x, flags, retval = 0; + int cpu = get_physical_cpu(); + uint32_t cookiecode = cookiemap_code(key); + uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]); + uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]); + + // Can be called from interrupt handler or from work queue + local_irq_save(flags); + for (x = 0; x < MAX_COLLISIONS; x++) { + if (keys[x] == key) { + uint32_t value = values[x]; + for (; x > 0; x--) { + keys[x] = keys[x - 1]; + values[x] = values[x - 1]; + } + keys[0] = key; + values[0] = value; + retval = value; + break; + } + } + local_irq_restore(flags); + + return retval; +} + +/* + * Add + * Pre: [0][1][2][3]..[n-1] + * Post: [v][0][1][2]..[n-2] + */ +static void cookiemap_add(uint64_t key, uint32_t value) +{ + int cpu = get_physical_cpu(); + int cookiecode = cookiemap_code(key); + uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]); + uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]); + int x; + + for (x = MAX_COLLISIONS - 1; x > 0; x--) { + keys[x] = keys[x - 1]; + values[x] = values[x - 1]; + } + keys[0] = key; + values[0] = value; +} + +static void translate_buffer_write_ptr(int cpu, void *x) +{ + per_cpu(translate_buffer, cpu)[per_cpu(translate_buffer_write, cpu)++] = x; + per_cpu(translate_buffer_write, cpu) &= translate_buffer_mask; +} + +static void *translate_buffer_read_ptr(int cpu) +{ + void *value = per_cpu(translate_buffer, cpu)[per_cpu(translate_buffer_read, cpu)++]; + per_cpu(translate_buffer_read, cpu) &= translate_buffer_mask; + return value; +} + +static void wq_cookie_handler(struct work_struct *unused) +{ + struct task_struct *task; + char *text; + int cpu = get_physical_cpu(); + unsigned int commit; + + mutex_lock(&start_mutex); + + if (gator_started != 0) { + commit = per_cpu(translate_buffer_write, cpu); + while (per_cpu(translate_buffer_read, cpu) != commit) { + task = (struct task_struct *)translate_buffer_read_ptr(cpu); + text = (char *)translate_buffer_read_ptr(cpu); + get_cookie(cpu, task, text, true); + } + } + + mutex_unlock(&start_mutex); +} + +static void app_process_wake_up_handler(unsigned long unused_data) +{ + // had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater + schedule_work(&cookie_work); +} + +// Retrieve full name from proc/pid/cmdline for java processes on Android +static int translate_app_process(const char **text, int cpu, struct task_struct *task, bool from_wq) +{ + void *maddr; + unsigned int len; + unsigned long addr; + struct mm_struct *mm; + struct page *page = NULL; + struct vm_area_struct *page_vma; + int bytes, offset, retval = 0, ptr; + char *buf = per_cpu(translate_text, cpu); + + // Push work into a work queue if in atomic context as the kernel functions below might sleep + // Rely on the in_interrupt variable rather than in_irq() or in_interrupt() kernel functions, as the value of these functions seems + // inconsistent during a context switch between android/linux versions + if (!from_wq) { + // Check if already in buffer + ptr = per_cpu(translate_buffer_read, cpu); + while (ptr != per_cpu(translate_buffer_write, cpu)) { + if (per_cpu(translate_buffer, cpu)[ptr] == (void *)task) + goto out; + ptr = (ptr + 2) & translate_buffer_mask; + } + + translate_buffer_write_ptr(cpu, (void *)task); + translate_buffer_write_ptr(cpu, (void *)*text); + + mod_timer(&app_process_wake_up_timer, jiffies + 1); + goto out; + } + + mm = get_task_mm(task); + if (!mm) + goto out; + if (!mm->arg_end) + goto outmm; + addr = mm->arg_start; + len = mm->arg_end - mm->arg_start; + + if (len > TRANSLATE_SIZE) + len = TRANSLATE_SIZE; + + down_read(&mm->mmap_sem); + while (len) { + if (get_user_pages(task, mm, addr, 1, 0, 1, &page, &page_vma) <= 0) + goto outsem; + + maddr = kmap(page); + offset = addr & (PAGE_SIZE - 1); + bytes = len; + if (bytes > PAGE_SIZE - offset) + bytes = PAGE_SIZE - offset; + + copy_from_user_page(page_vma, page, addr, buf, maddr + offset, bytes); + + kunmap(page); // release page allocated by get_user_pages() + page_cache_release(page); + + len -= bytes; + buf += bytes; + addr += bytes; + + *text = per_cpu(translate_text, cpu); + retval = 1; + } + + // On app_process startup, /proc/pid/cmdline is initially "zygote" then "<pre-initialized>" but changes after an initial startup period + if (strcmp(*text, "zygote") == 0 || strcmp(*text, "<pre-initialized>") == 0) + retval = 0; + +outsem: + up_read(&mm->mmap_sem); +outmm: + mmput(mm); +out: + return retval; +} + +static inline uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq) +{ + unsigned long flags, cookie; + uint64_t key; + + key = gator_chksum_crc32(text); + key = (key << 32) | (uint32_t)task->tgid; + + cookie = cookiemap_exists(key); + if (cookie) { + return cookie; + } + + if (strcmp(text, "app_process") == 0) { + if (!translate_app_process(&text, cpu, task, from_wq)) + return INVALID_COOKIE; + } + + // Can be called from interrupt handler or from work queue or from scheduler trace + local_irq_save(flags); + + cookie = INVALID_COOKIE; + if (marshal_cookie_header(text)) { + cookie = per_cpu(cookie_next_key, cpu) += nr_cpu_ids; + cookiemap_add(key, cookie); + marshal_cookie(cookie, text); + } + + local_irq_restore(flags); + + return cookie; +} + +static int get_exec_cookie(int cpu, struct task_struct *task) +{ + struct mm_struct *mm = task->mm; + const char *text; + + // kernel threads have no address space + if (!mm) + return NO_COOKIE; + + if (task && task->mm && task->mm->exe_file) { + text = task->mm->exe_file->f_path.dentry->d_name.name; + return get_cookie(cpu, task, text, false); + } + + return INVALID_COOKIE; +} + +static unsigned long get_address_cookie(int cpu, struct task_struct *task, unsigned long addr, off_t *offset) +{ + unsigned long cookie = NO_COOKIE; + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + const char *text; + + if (!mm) + return cookie; + + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + if (addr < vma->vm_start || addr >= vma->vm_end) + continue; + + if (vma->vm_file) { + text = vma->vm_file->f_path.dentry->d_name.name; + cookie = get_cookie(cpu, task, text, false); + *offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start; + } else { + /* must be an anonymous map */ + *offset = addr; + } + + break; + } + + if (!vma) + cookie = INVALID_COOKIE; + + return cookie; +} + +static int cookies_initialize(void) +{ + uint32_t crc, poly; + int i, j, cpu, size, err = 0; + + int translate_buffer_size = 512; // must be a power of 2 + translate_buffer_mask = translate_buffer_size / sizeof(per_cpu(translate_buffer, 0)[0]) - 1; + + for_each_present_cpu(cpu) { + per_cpu(cookie_next_key, cpu) = nr_cpu_ids + cpu; + + size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint64_t); + per_cpu(cookie_keys, cpu) = (uint64_t *)kmalloc(size, GFP_KERNEL); + if (!per_cpu(cookie_keys, cpu)) { + err = -ENOMEM; + goto cookie_setup_error; + } + memset(per_cpu(cookie_keys, cpu), 0, size); + + size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint32_t); + per_cpu(cookie_values, cpu) = (uint32_t *)kmalloc(size, GFP_KERNEL); + if (!per_cpu(cookie_values, cpu)) { + err = -ENOMEM; + goto cookie_setup_error; + } + memset(per_cpu(cookie_values, cpu), 0, size); + + per_cpu(translate_buffer, cpu) = (void **)kmalloc(translate_buffer_size, GFP_KERNEL); + if (!per_cpu(translate_buffer, cpu)) { + err = -ENOMEM; + goto cookie_setup_error; + } + + per_cpu(translate_buffer_write, cpu) = 0; + per_cpu(translate_buffer_read, cpu) = 0; + + per_cpu(translate_text, cpu) = (char *)kmalloc(TRANSLATE_SIZE, GFP_KERNEL); + if (!per_cpu(translate_text, cpu)) { + err = -ENOMEM; + goto cookie_setup_error; + } + } + + // build CRC32 table + poly = 0x04c11db7; + gator_crc32_table = (uint32_t *)kmalloc(256 * sizeof(uint32_t), GFP_KERNEL); + for (i = 0; i < 256; i++) { + crc = i; + for (j = 8; j > 0; j--) { + if (crc & 1) { + crc = (crc >> 1) ^ poly; + } else { + crc >>= 1; + } + } + gator_crc32_table[i] = crc; + } + + setup_timer(&app_process_wake_up_timer, app_process_wake_up_handler, 0); + +cookie_setup_error: + return err; +} + +static void cookies_release(void) +{ + int cpu; + + for_each_present_cpu(cpu) { + kfree(per_cpu(cookie_keys, cpu)); + per_cpu(cookie_keys, cpu) = NULL; + + kfree(per_cpu(cookie_values, cpu)); + per_cpu(cookie_values, cpu) = NULL; + + kfree(per_cpu(translate_buffer, cpu)); + per_cpu(translate_buffer, cpu) = NULL; + per_cpu(translate_buffer_read, cpu) = 0; + per_cpu(translate_buffer_write, cpu) = 0; + + kfree(per_cpu(translate_text, cpu)); + per_cpu(translate_text, cpu) = NULL; + } + + del_timer_sync(&app_process_wake_up_timer); + kfree(gator_crc32_table); + gator_crc32_table = NULL; +} diff --git a/drivers/gator/gator_events.sh b/drivers/gator/gator_events.sh new file mode 100755 index 00000000000..5467dd6d17d --- /dev/null +++ b/drivers/gator/gator_events.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +EVENTS=`grep gator_events_init *.c | sed 's/.\+gator_events_init(\(.\+\)).\+/\1/'` + +( + echo /\* This file is auto generated \*/ + echo + for EVENT in $EVENTS; do + echo __weak int $EVENT\(void\)\; + done + echo + echo static int \(*gator_events_list[]\)\(void\) = { + for EVENT in $EVENTS; do + echo \ $EVENT, + done + echo }\; +) > $1.tmp + +cmp -s $1 $1.tmp && rm $1.tmp || mv $1.tmp $1 diff --git a/drivers/gator/gator_events_armv6.c b/drivers/gator/gator_events_armv6.c new file mode 100644 index 00000000000..4f1bca6e2db --- /dev/null +++ b/drivers/gator/gator_events_armv6.c @@ -0,0 +1,244 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "gator.h" + +// gator_events_perf_pmu.c is used if perf is supported +#if GATOR_NO_PERF_SUPPORT + +static const char *pmnc_name; + +/* + * Per-CPU PMCR + */ +#define PMCR_E (1 << 0) /* Enable */ +#define PMCR_P (1 << 1) /* Count reset */ +#define PMCR_C (1 << 2) /* Cycle counter reset */ +#define PMCR_OFL_PMN0 (1 << 8) /* Count reg 0 overflow */ +#define PMCR_OFL_PMN1 (1 << 9) /* Count reg 1 overflow */ +#define PMCR_OFL_CCNT (1 << 10) /* Cycle counter overflow */ + +#define PMN0 0 +#define PMN1 1 +#define CCNT 2 +#define CNTMAX (CCNT+1) + +static int pmnc_counters = 0; +static unsigned long pmnc_enabled[CNTMAX]; +static unsigned long pmnc_event[CNTMAX]; +static unsigned long pmnc_key[CNTMAX]; + +static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt); + +static inline void armv6_pmnc_write(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0x0ffff77f; + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r" (val)); +} + +static inline u32 armv6_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r" (val)); + return val; +} + +static void armv6_pmnc_reset_counter(unsigned int cnt) +{ + u32 val = 0; + switch (cnt) { + case CCNT: + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r" (val)); + break; + case PMN0: + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r" (val)); + break; + case PMN1: + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r" (val)); + break; + } +} + +int gator_events_armv6_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int i; + + pmnc_counters = 3; + + for (i = PMN0; i <= CCNT; i++) { + char buf[40]; + if (i == CCNT) { + snprintf(buf, sizeof buf, "ARM_%s_ccnt", pmnc_name); + } else { + snprintf(buf, sizeof buf, "ARM_%s_cnt%d", pmnc_name, i); + } + dir = gatorfs_mkdir(sb, root, buf); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]); + if (i != CCNT) { + gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]); + } + } + + return 0; +} + +static int gator_events_armv6_online(int **buffer, bool migrate) +{ + unsigned int cnt, len = 0, cpu = smp_processor_id(); + u32 pmnc; + + if (armv6_pmnc_read() & PMCR_E) { + armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E); + } + + /* initialize PMNC, reset overflow, D bit, C bit and P bit. */ + armv6_pmnc_write(PMCR_OFL_PMN0 | PMCR_OFL_PMN1 | PMCR_OFL_CCNT | + PMCR_C | PMCR_P); + + /* configure control register */ + for (pmnc = 0, cnt = PMN0; cnt <= CCNT; cnt++) { + unsigned long event; + + if (!pmnc_enabled[cnt]) + continue; + + event = pmnc_event[cnt] & 255; + + // Set event (if destined for PMNx counters) + if (cnt == PMN0) { + pmnc |= event << 20; + } else if (cnt == PMN1) { + pmnc |= event << 12; + } + + // Reset counter + armv6_pmnc_reset_counter(cnt); + } + armv6_pmnc_write(pmnc | PMCR_E); + + // return zero values, no need to read as the counters were just reset + for (cnt = PMN0; cnt <= CCNT; cnt++) { + if (pmnc_enabled[cnt]) { + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = 0; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static int gator_events_armv6_offline(int **buffer, bool migrate) +{ + unsigned int cnt; + + armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E); + for (cnt = PMN0; cnt <= CCNT; cnt++) { + armv6_pmnc_reset_counter(cnt); + } + + return 0; +} + +static void gator_events_armv6_stop(void) +{ + unsigned int cnt; + + for (cnt = PMN0; cnt <= CCNT; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + } +} + +static int gator_events_armv6_read(int **buffer) +{ + int cnt, len = 0; + int cpu = smp_processor_id(); + + // a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled + if (!(armv6_pmnc_read() & PMCR_E)) { + return 0; + } + + for (cnt = PMN0; cnt <= CCNT; cnt++) { + if (pmnc_enabled[cnt]) { + u32 value = 0; + switch (cnt) { + case CCNT: + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r" (value)); + break; + case PMN0: + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r" (value)); + break; + case PMN1: + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r" (value)); + break; + } + armv6_pmnc_reset_counter(cnt); + + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = value; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static struct gator_interface gator_events_armv6_interface = { + .create_files = gator_events_armv6_create_files, + .stop = gator_events_armv6_stop, + .online = gator_events_armv6_online, + .offline = gator_events_armv6_offline, + .read = gator_events_armv6_read, +}; + +int gator_events_armv6_init(void) +{ + unsigned int cnt; + + switch (gator_cpuid()) { + case ARM1136: + case ARM1156: + case ARM1176: + pmnc_name = "ARM11"; + break; + case ARM11MPCORE: + pmnc_name = "ARM11MPCore"; + break; + default: + return -1; + } + + for (cnt = PMN0; cnt <= CCNT; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + pmnc_key[cnt] = gator_events_get_key(); + } + + return gator_events_install(&gator_events_armv6_interface); +} + +gator_events_init(gator_events_armv6_init); + +#else +int gator_events_armv6_init(void) +{ + return -1; +} +#endif diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c new file mode 100644 index 00000000000..58f29566eea --- /dev/null +++ b/drivers/gator/gator_events_armv7.c @@ -0,0 +1,319 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Disabling interrupts + * Many of the functions below disable interrupts via local_irq_save(). This disabling of interrupts is done to prevent any race conditions + * between multiple entities (e.g. hrtimer interrupts and event based interrupts) calling the same functions. As accessing the pmu involves + * several steps (disable, select, read, enable), these steps must be performed atomically. Normal synchronization routines cannot be used + * as these functions are being called from interrupt context. + */ + +#include "gator.h" + +// gator_events_perf_pmu.c is used if perf is supported +#if GATOR_NO_PERF_SUPPORT + +// Per-CPU PMNC: config reg +#define PMNC_E (1 << 0) /* Enable all counters */ +#define PMNC_P (1 << 1) /* Reset all counters */ +#define PMNC_C (1 << 2) /* Cycle counter reset */ +#define PMNC_MASK 0x3f /* Mask for writable bits */ + +// ccnt reg +#define CCNT_REG (1 << 31) + +#define CCNT 0 +#define CNT0 1 +#define CNTMAX (6+1) + +static const char *pmnc_name; +static int pmnc_counters; + +static unsigned long pmnc_enabled[CNTMAX]; +static unsigned long pmnc_event[CNTMAX]; +static unsigned long pmnc_key[CNTMAX]; + +static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt); + +inline void armv7_pmnc_write(u32 val) +{ + val &= PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (val)); +} + +inline u32 armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + return val; +} + +inline u32 armv7_ccnt_read(u32 reset_value) +{ + unsigned long flags; + u32 newval = -reset_value; + u32 den = CCNT_REG; + u32 val; + + local_irq_save(flags); + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den)); // disable + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); // read + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (newval)); // new value + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den)); // enable + local_irq_restore(flags); + + return val; +} + +inline u32 armv7_cntn_read(unsigned int cnt, u32 reset_value) +{ + unsigned long flags; + u32 newval = -reset_value; + u32 sel = (cnt - CNT0); + u32 den = 1 << sel; + u32 oldval; + + local_irq_save(flags); + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den)); // disable + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (sel)); // select + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (oldval)); // read + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (newval)); // new value + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den)); // enable + local_irq_restore(flags); + + return oldval; +} + +static inline void armv7_pmnc_disable_interrupt(unsigned int cnt) +{ + u32 val = cnt ? (1 << (cnt - CNT0)) : (1 << 31); + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); +} + +inline u32 armv7_pmnc_reset_interrupt(void) +{ + // Get and reset overflow status flags + u32 flags; + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (flags)); + flags &= 0x8000003f; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (flags)); + return flags; +} + +static inline u32 armv7_pmnc_enable_counter(unsigned int cnt) +{ + u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG; + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + return cnt; +} + +static inline u32 armv7_pmnc_disable_counter(unsigned int cnt) +{ + u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG; + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + return cnt; +} + +static inline int armv7_pmnc_select_counter(unsigned int cnt) +{ + u32 val = (cnt - CNT0); + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + return cnt; +} + +static inline void armv7_pmnc_write_evtsel(unsigned int cnt, u32 val) +{ + if (armv7_pmnc_select_counter(cnt) == cnt) { + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } +} + +static int gator_events_armv7_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int i; + + for (i = 0; i < pmnc_counters; i++) { + char buf[40]; + if (i == 0) { + snprintf(buf, sizeof buf, "ARM_%s_ccnt", pmnc_name); + } else { + snprintf(buf, sizeof buf, "ARM_%s_cnt%d", pmnc_name, i - 1); + } + dir = gatorfs_mkdir(sb, root, buf); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]); + if (i > 0) { + gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]); + } + } + + return 0; +} + +static int gator_events_armv7_online(int **buffer, bool migrate) +{ + unsigned int cnt, len = 0, cpu = smp_processor_id(); + + if (armv7_pmnc_read() & PMNC_E) { + armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E); + } + + // Initialize & Reset PMNC: C bit and P bit + armv7_pmnc_write(PMNC_P | PMNC_C); + + // Reset overflow flags + armv7_pmnc_reset_interrupt(); + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + unsigned long event; + + if (!pmnc_enabled[cnt]) + continue; + + // Disable counter + armv7_pmnc_disable_counter(cnt); + + event = pmnc_event[cnt] & 255; + + // Set event (if destined for PMNx counters), we don't need to set the event if it's a cycle count + if (cnt != CCNT) + armv7_pmnc_write_evtsel(cnt, event); + + armv7_pmnc_disable_interrupt(cnt); + + // Reset counter + cnt ? armv7_cntn_read(cnt, 0) : armv7_ccnt_read(0); + + // Enable counter + armv7_pmnc_enable_counter(cnt); + } + + // enable + armv7_pmnc_write(armv7_pmnc_read() | PMNC_E); + + // return zero values, no need to read as the counters were just reset + for (cnt = 0; cnt < pmnc_counters; cnt++) { + if (pmnc_enabled[cnt]) { + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = 0; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static int gator_events_armv7_offline(int **buffer, bool migrate) +{ + // disable all counters, including PMCCNTR; overflow IRQs will not be signaled + armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E); + + return 0; +} + +static void gator_events_armv7_stop(void) +{ + unsigned int cnt; + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + } +} + +static int gator_events_armv7_read(int **buffer) +{ + int cnt, len = 0; + int cpu = smp_processor_id(); + + // a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled + if (!(armv7_pmnc_read() & PMNC_E)) { + return 0; + } + + for (cnt = 0; cnt < pmnc_counters; cnt++) { + if (pmnc_enabled[cnt]) { + int value; + if (cnt == CCNT) { + value = armv7_ccnt_read(0); + } else { + value = armv7_cntn_read(cnt, 0); + } + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = value; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static struct gator_interface gator_events_armv7_interface = { + .create_files = gator_events_armv7_create_files, + .stop = gator_events_armv7_stop, + .online = gator_events_armv7_online, + .offline = gator_events_armv7_offline, + .read = gator_events_armv7_read, +}; + +int gator_events_armv7_init(void) +{ + unsigned int cnt; + + switch (gator_cpuid()) { + case CORTEX_A5: + pmnc_name = "Cortex-A5"; + pmnc_counters = 2; + break; + case CORTEX_A7: + pmnc_name = "Cortex-A7"; + pmnc_counters = 4; + break; + case CORTEX_A8: + pmnc_name = "Cortex-A8"; + pmnc_counters = 4; + break; + case CORTEX_A9: + pmnc_name = "Cortex-A9"; + pmnc_counters = 6; + break; + case CORTEX_A15: + pmnc_name = "Cortex-A15"; + pmnc_counters = 6; + break; + default: + return -1; + } + + pmnc_counters++; // CNT[n] + CCNT + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + pmnc_key[cnt] = gator_events_get_key(); + } + + return gator_events_install(&gator_events_armv7_interface); +} + +gator_events_init(gator_events_armv7_init); + +#else +int gator_events_armv7_init(void) +{ + return -1; +} +#endif diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c new file mode 100644 index 00000000000..56c6a673652 --- /dev/null +++ b/drivers/gator/gator_events_block.c @@ -0,0 +1,155 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" +#include <trace/events/block.h> + +#define BLOCK_RQ_WR 0 +#define BLOCK_RQ_RD 1 + +#define BLOCK_TOTAL (BLOCK_RQ_RD+1) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) +#define EVENTWRITE REQ_RW +#else +#define EVENTWRITE REQ_WRITE +#endif + +static ulong block_rq_wr_enabled; +static ulong block_rq_rd_enabled; +static ulong block_rq_wr_key; +static ulong block_rq_rd_key; +static atomic_t blockCnt[BLOCK_TOTAL]; +static int blockGet[BLOCK_TOTAL * 4]; + +GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq)) +{ + int write, size; + + if (!rq) + return; + + write = rq->cmd_flags & EVENTWRITE; + size = rq->resid_len; + + if (!size) + return; + + if (write) { + if (block_rq_wr_enabled) { + atomic_add(size, &blockCnt[BLOCK_RQ_WR]); + } + } else { + if (block_rq_rd_enabled) { + atomic_add(size, &blockCnt[BLOCK_RQ_RD]); + } + } +} + +static int gator_events_block_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + + /* block_complete_wr */ + dir = gatorfs_mkdir(sb, root, "Linux_block_rq_wr"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &block_rq_wr_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_wr_key); + + /* block_complete_rd */ + dir = gatorfs_mkdir(sb, root, "Linux_block_rq_rd"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &block_rq_rd_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_rd_key); + + return 0; +} + +static int gator_events_block_start(void) +{ + // register tracepoints + if (block_rq_wr_enabled || block_rq_rd_enabled) + if (GATOR_REGISTER_TRACE(block_rq_complete)) + goto fail_block_rq_exit; + pr_debug("gator: registered block event tracepoints\n"); + + return 0; + + // unregister tracepoints on error +fail_block_rq_exit: + pr_err("gator: block event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n"); + + return -1; +} + +static void gator_events_block_stop(void) +{ + if (block_rq_wr_enabled || block_rq_rd_enabled) + GATOR_UNREGISTER_TRACE(block_rq_complete); + pr_debug("gator: unregistered block event tracepoints\n"); + + block_rq_wr_enabled = 0; + block_rq_rd_enabled = 0; +} + +static int gator_events_block_read(int **buffer) +{ + int len, value, data = 0; + + if (!on_primary_core()) { + return 0; + } + + len = 0; + if (block_rq_wr_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_WR])) > 0) { + atomic_sub(value, &blockCnt[BLOCK_RQ_WR]); + blockGet[len++] = block_rq_wr_key; + blockGet[len++] = 0; // indicates to Streamline that value bytes were written now, not since the last message + blockGet[len++] = block_rq_wr_key; + blockGet[len++] = value; + data += value; + } + if (block_rq_rd_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_RD])) > 0) { + atomic_sub(value, &blockCnt[BLOCK_RQ_RD]); + blockGet[len++] = block_rq_rd_key; + blockGet[len++] = 0; // indicates to Streamline that value bytes were read now, not since the last message + blockGet[len++] = block_rq_rd_key; + blockGet[len++] = value; + data += value; + } + + if (buffer) + *buffer = blockGet; + + return len; +} + +static struct gator_interface gator_events_block_interface = { + .create_files = gator_events_block_create_files, + .start = gator_events_block_start, + .stop = gator_events_block_stop, + .read = gator_events_block_read, +}; + +int gator_events_block_init(void) +{ + block_rq_wr_enabled = 0; + block_rq_rd_enabled = 0; + + block_rq_wr_key = gator_events_get_key(); + block_rq_rd_key = gator_events_get_key(); + + return gator_events_install(&gator_events_block_interface); +} + +gator_events_init(gator_events_block_init); diff --git a/drivers/gator/gator_events_irq.c b/drivers/gator/gator_events_irq.c new file mode 100644 index 00000000000..b4df7faefff --- /dev/null +++ b/drivers/gator/gator_events_irq.c @@ -0,0 +1,167 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" +#include <trace/events/irq.h> + +#define HARDIRQ 0 +#define SOFTIRQ 1 +#define TOTALIRQ (SOFTIRQ+1) + +static ulong hardirq_enabled; +static ulong softirq_enabled; +static ulong hardirq_key; +static ulong softirq_key; +static DEFINE_PER_CPU(atomic_t[TOTALIRQ], irqCnt); +static DEFINE_PER_CPU(int[TOTALIRQ * 2], irqGet); + +GATOR_DEFINE_PROBE(irq_handler_exit, + TP_PROTO(int irq, struct irqaction *action, int ret)) +{ + atomic_inc(&per_cpu(irqCnt, get_physical_cpu())[HARDIRQ]); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) +GATOR_DEFINE_PROBE(softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec)) +#else +GATOR_DEFINE_PROBE(softirq_exit, TP_PROTO(unsigned int vec_nr)) +#endif +{ + atomic_inc(&per_cpu(irqCnt, get_physical_cpu())[SOFTIRQ]); +} + +static int gator_events_irq_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + + /* irq */ + dir = gatorfs_mkdir(sb, root, "Linux_irq_irq"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &hardirq_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &hardirq_key); + + /* soft irq */ + dir = gatorfs_mkdir(sb, root, "Linux_irq_softirq"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &softirq_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &softirq_key); + + return 0; +} + +static int gator_events_irq_online(int **buffer, bool migrate) +{ + int len = 0, cpu = get_physical_cpu(); + + // synchronization with the irq_exit functions is not necessary as the values are being reset + if (hardirq_enabled) { + atomic_set(&per_cpu(irqCnt, cpu)[HARDIRQ], 0); + per_cpu(irqGet, cpu)[len++] = hardirq_key; + per_cpu(irqGet, cpu)[len++] = 0; + } + + if (softirq_enabled) { + atomic_set(&per_cpu(irqCnt, cpu)[SOFTIRQ], 0); + per_cpu(irqGet, cpu)[len++] = softirq_key; + per_cpu(irqGet, cpu)[len++] = 0; + } + + if (buffer) + *buffer = per_cpu(irqGet, cpu); + + return len; +} + +static int gator_events_irq_start(void) +{ + // register tracepoints + if (hardirq_enabled) + if (GATOR_REGISTER_TRACE(irq_handler_exit)) + goto fail_hardirq_exit; + if (softirq_enabled) + if (GATOR_REGISTER_TRACE(softirq_exit)) + goto fail_softirq_exit; + pr_debug("gator: registered irq tracepoints\n"); + + return 0; + + // unregister tracepoints on error +fail_softirq_exit: + if (hardirq_enabled) + GATOR_UNREGISTER_TRACE(irq_handler_exit); +fail_hardirq_exit: + pr_err("gator: irq tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n"); + + return -1; +} + +static void gator_events_irq_stop(void) +{ + if (hardirq_enabled) + GATOR_UNREGISTER_TRACE(irq_handler_exit); + if (softirq_enabled) + GATOR_UNREGISTER_TRACE(softirq_exit); + pr_debug("gator: unregistered irq tracepoints\n"); + + hardirq_enabled = 0; + softirq_enabled = 0; +} + +static int gator_events_irq_read(int **buffer) +{ + int len, value; + int cpu = get_physical_cpu(); + + len = 0; + if (hardirq_enabled) { + value = atomic_read(&per_cpu(irqCnt, cpu)[HARDIRQ]); + atomic_sub(value, &per_cpu(irqCnt, cpu)[HARDIRQ]); + + per_cpu(irqGet, cpu)[len++] = hardirq_key; + per_cpu(irqGet, cpu)[len++] = value; + } + + if (softirq_enabled) { + value = atomic_read(&per_cpu(irqCnt, cpu)[SOFTIRQ]); + atomic_sub(value, &per_cpu(irqCnt, cpu)[SOFTIRQ]); + + per_cpu(irqGet, cpu)[len++] = softirq_key; + per_cpu(irqGet, cpu)[len++] = value; + } + + if (buffer) + *buffer = per_cpu(irqGet, cpu); + + return len; +} + +static struct gator_interface gator_events_irq_interface = { + .create_files = gator_events_irq_create_files, + .online = gator_events_irq_online, + .start = gator_events_irq_start, + .stop = gator_events_irq_stop, + .read = gator_events_irq_read, +}; + +int gator_events_irq_init(void) +{ + hardirq_key = gator_events_get_key(); + softirq_key = gator_events_get_key(); + + hardirq_enabled = 0; + softirq_enabled = 0; + + return gator_events_install(&gator_events_irq_interface); +} + +gator_events_init(gator_events_irq_init); diff --git a/drivers/gator/gator_events_l2c-310.c b/drivers/gator/gator_events_l2c-310.c new file mode 100644 index 00000000000..e646215f37f --- /dev/null +++ b/drivers/gator/gator_events_l2c-310.c @@ -0,0 +1,201 @@ +/** + * l2c310 (L2 Cache Controller) event counters for gator + * + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <asm/hardware/cache-l2x0.h> + +#include "gator.h" + +#define L2C310_COUNTERS_NUM 2 + +static struct { + unsigned long enabled; + unsigned long event; + unsigned long key; +} l2c310_counters[L2C310_COUNTERS_NUM]; + +static int l2c310_buffer[L2C310_COUNTERS_NUM * 2]; + +static void __iomem *l2c310_base; + +static void gator_events_l2c310_reset_counters(void) +{ + u32 val = readl(l2c310_base + L2X0_EVENT_CNT_CTRL); + + val |= ((1 << L2C310_COUNTERS_NUM) - 1) << 1; + + writel(val, l2c310_base + L2X0_EVENT_CNT_CTRL); +} + +static int gator_events_l2c310_create_files(struct super_block *sb, + struct dentry *root) +{ + int i; + + for (i = 0; i < L2C310_COUNTERS_NUM; i++) { + char buf[16]; + struct dentry *dir; + + snprintf(buf, sizeof(buf), "L2C-310_cnt%d", i); + dir = gatorfs_mkdir(sb, root, buf); + if (WARN_ON(!dir)) + return -1; + gatorfs_create_ulong(sb, dir, "enabled", + &l2c310_counters[i].enabled); + gatorfs_create_ulong(sb, dir, "event", + &l2c310_counters[i].event); + gatorfs_create_ro_ulong(sb, dir, "key", + &l2c310_counters[i].key); + } + + return 0; +} + +static int gator_events_l2c310_start(void) +{ + static const unsigned long l2x0_event_cntx_cfg[L2C310_COUNTERS_NUM] = { + L2X0_EVENT_CNT0_CFG, + L2X0_EVENT_CNT1_CFG, + }; + int i; + + /* Counter event sources */ + for (i = 0; i < L2C310_COUNTERS_NUM; i++) + writel((l2c310_counters[i].event & 0xf) << 2, + l2c310_base + l2x0_event_cntx_cfg[i]); + + gator_events_l2c310_reset_counters(); + + /* Event counter enable */ + writel(1, l2c310_base + L2X0_EVENT_CNT_CTRL); + + return 0; +} + +static void gator_events_l2c310_stop(void) +{ + /* Event counter disable */ + writel(0, l2c310_base + L2X0_EVENT_CNT_CTRL); +} + +static int gator_events_l2c310_read(int **buffer) +{ + static const unsigned long l2x0_event_cntx_val[L2C310_COUNTERS_NUM] = { + L2X0_EVENT_CNT0_VAL, + L2X0_EVENT_CNT1_VAL, + }; + int i; + int len = 0; + + if (!on_primary_core()) + return 0; + + for (i = 0; i < L2C310_COUNTERS_NUM; i++) { + if (l2c310_counters[i].enabled) { + l2c310_buffer[len++] = l2c310_counters[i].key; + l2c310_buffer[len++] = readl(l2c310_base + + l2x0_event_cntx_val[i]); + } + } + + /* l2c310 counters are saturating, not wrapping in case of overflow */ + gator_events_l2c310_reset_counters(); + + if (buffer) + *buffer = l2c310_buffer; + + return len; +} + +static struct gator_interface gator_events_l2c310_interface = { + .create_files = gator_events_l2c310_create_files, + .start = gator_events_l2c310_start, + .stop = gator_events_l2c310_stop, + .read = gator_events_l2c310_read, +}; + +#define L2C310_ADDR_PROBE (~0) + +MODULE_PARM_DESC(l2c310_addr, "L2C310 physical base address (0 to disable)"); +static unsigned long l2c310_addr = L2C310_ADDR_PROBE; +module_param(l2c310_addr, ulong, 0444); + +static void __iomem *gator_events_l2c310_probe(void) +{ + phys_addr_t variants[] = { +#if defined(CONFIG_ARCH_EXYNOS4) || defined(CONFIG_ARCH_S5PV310) + 0x10502000, +#endif +#if defined(CONFIG_ARCH_OMAP4) + 0x48242000, +#endif +#if defined(CONFIG_ARCH_TEGRA) + 0x50043000, +#endif +#if defined(CONFIG_ARCH_U8500) + 0xa0412000, +#endif +#if defined(CONFIG_ARCH_VEXPRESS) + 0x1e00a000, // A9x4 core tile (HBI-0191) + 0x2c0f0000, // New memory map tiles +#endif + }; + int i; + +#if defined(CONFIG_OF) + if (of_have_populated_dt()) + return of_iomap(of_find_compatible_node(NULL, + NULL, "arm,pl310-cache"), 0); +#endif + + for (i = 0; i < ARRAY_SIZE(variants); i++) { + void __iomem *base = ioremap(variants[i], SZ_4K); + + if (base) { + u32 cache_id = readl(base + L2X0_CACHE_ID); + + if ((cache_id & 0xff0003c0) == 0x410000c0) + return base; + + iounmap(base); + } + } + + return NULL; +} + +int gator_events_l2c310_init(void) +{ + int i; + + if (gator_cpuid() != CORTEX_A5 && gator_cpuid() != CORTEX_A9) + return -1; + + if (l2c310_addr == L2C310_ADDR_PROBE) + l2c310_base = gator_events_l2c310_probe(); + else if (l2c310_addr) + l2c310_base = ioremap(l2c310_addr, SZ_4K); + + if (!l2c310_base) + return -1; + + for (i = 0; i < L2C310_COUNTERS_NUM; i++) { + l2c310_counters[i].enabled = 0; + l2c310_counters[i].key = gator_events_get_key(); + } + + return gator_events_install(&gator_events_l2c310_interface); +} + +gator_events_init(gator_events_l2c310_init); diff --git a/drivers/gator/gator_events_mali_400.c b/drivers/gator/gator_events_mali_400.c new file mode 100644 index 00000000000..c5fd989886c --- /dev/null +++ b/drivers/gator/gator_events_mali_400.c @@ -0,0 +1,738 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "gator.h" + +#include <linux/module.h> +#include <linux/time.h> +#include <linux/math64.h> + +#include "linux/mali_linux_trace.h" + +#include "gator_events_mali_common.h" +#include "gator_events_mali_400.h" + +#if !defined(GATOR_MALI_INTERFACE_STYLE) +/* + * At the moment, we only have users with the old style interface, so + * make our life easier by making it the default... + */ +#define GATOR_MALI_INTERFACE_STYLE (2) +#endif + +/* + * There are (currently) three different variants of the comms between gator and Mali: + * 1 (deprecated): No software counter support + * 2 (deprecated): Tracepoint called for each separate s/w counter value as it appears + * 3 (default): Single tracepoint for all s/w counters in a bundle. + * Interface style 3 is the default if no other is specified. 1 and 2 will be eliminated when + * existing Mali DDKs are upgraded. + */ + +#if !defined(GATOR_MALI_INTERFACE_STYLE) +#define GATOR_MALI_INTERFACE_STYLE (3) +#endif + +/* + * List of possible actions allowing DDK to be controlled by Streamline. + * The following numbers are used by DDK to control the frame buffer dumping. + */ +#define FBDUMP_CONTROL_ENABLE (1) +#define FBDUMP_CONTROL_RATE (2) +#define SW_EVENTS_ENABLE (3) +#define FBDUMP_CONTROL_RESIZE_FACTOR (4) + +/* + * Check that the MALI_SUPPORT define is set to one of the allowable device codes. + */ +#if (MALI_SUPPORT != MALI_400) +#error MALI_SUPPORT set to an invalid device code: expecting MALI_400 +#endif + +/* + * The number of fragment processors. Update to suit your hardware implementation. + */ +#define NUM_FP_UNITS (4) + +enum counters { + /* Timeline activity */ + ACTIVITY_VP = 0, + ACTIVITY_FP0, + ACTIVITY_FP1, + ACTIVITY_FP2, + ACTIVITY_FP3, + + /* L2 cache counters */ + COUNTER_L2_C0, + COUNTER_L2_C1, + + /* Vertex processor counters */ + COUNTER_VP_C0, + COUNTER_VP_C1, + + /* Fragment processor counters */ + COUNTER_FP0_C0, + COUNTER_FP0_C1, + COUNTER_FP1_C0, + COUNTER_FP1_C1, + COUNTER_FP2_C0, + COUNTER_FP2_C1, + COUNTER_FP3_C0, + COUNTER_FP3_C1, + + /* EGL Software Counters */ + COUNTER_EGL_BLIT_TIME, + + /* GLES Software Counters */ + COUNTER_GLES_DRAW_ELEMENTS_CALLS, + COUNTER_GLES_DRAW_ELEMENTS_NUM_INDICES, + COUNTER_GLES_DRAW_ELEMENTS_NUM_TRANSFORMED, + COUNTER_GLES_DRAW_ARRAYS_CALLS, + COUNTER_GLES_DRAW_ARRAYS_NUM_TRANSFORMED, + COUNTER_GLES_DRAW_POINTS, + COUNTER_GLES_DRAW_LINES, + COUNTER_GLES_DRAW_LINE_LOOP, + COUNTER_GLES_DRAW_LINE_STRIP, + COUNTER_GLES_DRAW_TRIANGLES, + COUNTER_GLES_DRAW_TRIANGLE_STRIP, + COUNTER_GLES_DRAW_TRIANGLE_FAN, + COUNTER_GLES_NON_VBO_DATA_COPY_TIME, + COUNTER_GLES_UNIFORM_BYTES_COPIED_TO_MALI, + COUNTER_GLES_UPLOAD_TEXTURE_TIME, + COUNTER_GLES_UPLOAD_VBO_TIME, + COUNTER_GLES_NUM_FLUSHES, + COUNTER_GLES_NUM_VSHADERS_GENERATED, + COUNTER_GLES_NUM_FSHADERS_GENERATED, + COUNTER_GLES_VSHADER_GEN_TIME, + COUNTER_GLES_FSHADER_GEN_TIME, + COUNTER_GLES_INPUT_TRIANGLES, + COUNTER_GLES_VXCACHE_HIT, + COUNTER_GLES_VXCACHE_MISS, + COUNTER_GLES_VXCACHE_COLLISION, + COUNTER_GLES_CULLED_TRIANGLES, + COUNTER_GLES_CULLED_LINES, + COUNTER_GLES_BACKFACE_TRIANGLES, + COUNTER_GLES_GBCLIP_TRIANGLES, + COUNTER_GLES_GBCLIP_LINES, + COUNTER_GLES_TRIANGLES_DRAWN, + COUNTER_GLES_DRAWCALL_TIME, + COUNTER_GLES_TRIANGLES_COUNT, + COUNTER_GLES_INDEPENDENT_TRIANGLES_COUNT, + COUNTER_GLES_STRIP_TRIANGLES_COUNT, + COUNTER_GLES_FAN_TRIANGLES_COUNT, + COUNTER_GLES_LINES_COUNT, + COUNTER_GLES_INDEPENDENT_LINES_COUNT, + COUNTER_GLES_STRIP_LINES_COUNT, + COUNTER_GLES_LOOP_LINES_COUNT, + + COUNTER_FILMSTRIP, + COUNTER_FREQUENCY, + COUNTER_VOLTAGE, + + NUMBER_OF_EVENTS +}; + +#define FIRST_ACTIVITY_EVENT ACTIVITY_VP +#define LAST_ACTIVITY_EVENT ACTIVITY_FP3 + +#define FIRST_HW_COUNTER COUNTER_L2_C0 +#define LAST_HW_COUNTER COUNTER_FP3_C1 + +#define FIRST_SW_COUNTER COUNTER_EGL_BLIT_TIME +#define LAST_SW_COUNTER COUNTER_GLES_LOOP_LINES_COUNT + +#define FIRST_SPECIAL_COUNTER COUNTER_FILMSTRIP +#define LAST_SPECIAL_COUNTER COUNTER_VOLTAGE + +/* gatorfs variables for counter enable state, + * the event the counter should count and the + * 'key' (a unique id set by gatord and returned + * by gator.ko) + */ +static unsigned long counter_enabled[NUMBER_OF_EVENTS]; +static unsigned long counter_event[NUMBER_OF_EVENTS]; +static unsigned long counter_key[NUMBER_OF_EVENTS]; + +/* The data we have recorded */ +static u32 counter_data[NUMBER_OF_EVENTS]; +/* The address to sample (or 0 if samples are sent to us) */ +static u32 *counter_address[NUMBER_OF_EVENTS]; + +/* An array used to return the data we recorded + * as key,value pairs hence the *2 + */ +static unsigned long counter_dump[NUMBER_OF_EVENTS * 2]; +static unsigned long counter_prev[NUMBER_OF_EVENTS]; + +/* Note whether tracepoints have been registered */ +static int trace_registered; + +/** + * Calculate the difference and handle the overflow. + */ +static u32 get_difference(u32 start, u32 end) +{ + if (start - end >= 0) { + return start - end; + } + + // Mali counters are unsigned 32 bit values that wrap. + return (4294967295u - end) + start; +} + +/** + * Returns non-zero if the given counter ID is an activity counter. + */ +static inline int is_activity_counter(unsigned int event_id) +{ + return (event_id >= FIRST_ACTIVITY_EVENT && + event_id <= LAST_ACTIVITY_EVENT); +} + +/** + * Returns non-zero if the given counter ID is a hardware counter. + */ +static inline int is_hw_counter(unsigned int event_id) +{ + return (event_id >= FIRST_HW_COUNTER && event_id <= LAST_HW_COUNTER); +} + +#if GATOR_MALI_INTERFACE_STYLE == 2 +/** + * Returns non-zero if the given counter ID is a software counter. + */ +static inline int is_sw_counter(unsigned int event_id) +{ + return (event_id >= FIRST_SW_COUNTER && event_id <= LAST_SW_COUNTER); +} +#endif + +#if GATOR_MALI_INTERFACE_STYLE == 2 +/* + * The Mali DDK uses s64 types to contain software counter values, but gator + * can only use a maximum of 32 bits. This function scales a software counter + * to an appropriate range. + */ +static u32 scale_sw_counter_value(unsigned int event_id, signed long long value) +{ + u32 scaled_value; + + switch (event_id) { + case COUNTER_GLES_UPLOAD_TEXTURE_TIME: + case COUNTER_GLES_UPLOAD_VBO_TIME: + scaled_value = (u32)div_s64(value, 1000000); + break; + default: + scaled_value = (u32)value; + break; + } + + return scaled_value; +} +#endif + +/* Probe for continuously sampled counter */ +#if 0 //WE_DONT_CURRENTLY_USE_THIS_SO_SUPPRESS_WARNING +GATOR_DEFINE_PROBE(mali_sample_address, TP_PROTO(unsigned int event_id, u32 *addr)) +{ + /* Turning on too many pr_debug statements in frequently called functions + * can cause stability and/or performance problems + */ + //pr_debug("gator: mali_sample_address %d %d\n", event_id, addr); + if (event_id >= ACTIVITY_VP && event_id <= COUNTER_FP3_C1) { + counter_address[event_id] = addr; + } +} +#endif + +/* Probe for hardware counter events */ +GATOR_DEFINE_PROBE(mali_hw_counter, TP_PROTO(unsigned int event_id, unsigned int value)) +{ + /* Turning on too many pr_debug statements in frequently called functions + * can cause stability and/or performance problems + */ + //pr_debug("gator: mali_hw_counter %d %d\n", event_id, value); + if (is_hw_counter(event_id)) { + counter_data[event_id] = value; + } +} + +#if GATOR_MALI_INTERFACE_STYLE == 2 +GATOR_DEFINE_PROBE(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long long value)) +{ + if (is_sw_counter(event_id)) { + counter_data[event_id] = scale_sw_counter_value(event_id, value); + } +} +#endif /* GATOR_MALI_INTERFACE_STYLE == 2 */ + +#if GATOR_MALI_INTERFACE_STYLE == 3 +GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters)) +{ + u32 i; + + /* Copy over the values for those counters which are enabled. */ + for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) { + if (counter_enabled[i]) { + counter_data[i] = (u32)(counters[i - FIRST_SW_COUNTER]); + } + } +} +#endif /* GATOR_MALI_INTERFACE_STYLE == 3 */ + +static int create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int event; + int n_fp = NUM_FP_UNITS; + + const char *mali_name = gator_mali_get_mali_name(); + + /* + * Create the filesystem entries for vertex processor, fragment processor + * and L2 cache timeline and hardware counters. Software counters get + * special handling after this block. + */ + for (event = FIRST_ACTIVITY_EVENT; event <= LAST_HW_COUNTER; event++) { + char buf[40]; + + /* + * We can skip this event if it's for a non-existent fragment + * processor. + */ + if (((event - ACTIVITY_FP0 >= n_fp) && (event < COUNTER_L2_C0)) + || (((event - COUNTER_FP0_C0) / 2 >= n_fp))) { + continue; + } + + /* Otherwise, set up the filesystem entry for this event. */ + switch (event) { + case ACTIVITY_VP: + snprintf(buf, sizeof buf, "ARM_%s_VP_active", mali_name); + break; + case ACTIVITY_FP0: + case ACTIVITY_FP1: + case ACTIVITY_FP2: + case ACTIVITY_FP3: + snprintf(buf, sizeof buf, "ARM_%s_FP%d_active", + mali_name, event - ACTIVITY_FP0); + break; + case COUNTER_L2_C0: + case COUNTER_L2_C1: + snprintf(buf, sizeof buf, "ARM_%s_L2_cnt%d", + mali_name, event - COUNTER_L2_C0); + break; + case COUNTER_VP_C0: + case COUNTER_VP_C1: + snprintf(buf, sizeof buf, "ARM_%s_VP_cnt%d", + mali_name, event - COUNTER_VP_C0); + break; + case COUNTER_FP0_C0: + case COUNTER_FP0_C1: + case COUNTER_FP1_C0: + case COUNTER_FP1_C1: + case COUNTER_FP2_C0: + case COUNTER_FP2_C1: + case COUNTER_FP3_C0: + case COUNTER_FP3_C1: + snprintf(buf, sizeof buf, "ARM_%s_FP%d_cnt%d", + mali_name, (event - COUNTER_FP0_C0) / 2, + (event - COUNTER_FP0_C0) % 2); + break; + default: + printk("gator: trying to create file for non-existent counter (%d)\n", event); + continue; + } + + dir = gatorfs_mkdir(sb, root, buf); + + if (!dir) { + return -1; + } + + gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]); + + /* Only create an event node for counters that can change what they count */ + if (event >= COUNTER_L2_C0) { + gatorfs_create_ulong(sb, dir, "event", &counter_event[event]); + } + + gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]); + } + + /* Now set up the software counter entries */ + for (event = FIRST_SW_COUNTER; event <= LAST_SW_COUNTER; event++) { + char buf[40]; + + snprintf(buf, sizeof(buf), "ARM_%s_SW_%d", mali_name, event); + + dir = gatorfs_mkdir(sb, root, buf); + + if (!dir) { + return -1; + } + + gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]); + gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]); + } + + /* Now set up the special counter entries */ + for (event = FIRST_SPECIAL_COUNTER; event <= LAST_SPECIAL_COUNTER; event++) { + char buf[40]; + + switch (event) { + case COUNTER_FILMSTRIP: + snprintf(buf, sizeof(buf), "ARM_%s_Filmstrip_cnt0", mali_name); + break; + + case COUNTER_FREQUENCY: + snprintf(buf, sizeof(buf), "ARM_%s_Frequency", mali_name); + break; + + case COUNTER_VOLTAGE: + snprintf(buf, sizeof(buf), "ARM_%s_Voltage", mali_name); + break; + + default: + break; + } + + dir = gatorfs_mkdir(sb, root, buf); + + if (!dir) { + return -1; + } + + gatorfs_create_ulong(sb, dir, "event", &counter_event[event]); + gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]); + gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]); + } + + return 0; +} + +/* + * Local store for the get_counters entry point into the DDK. + * This is stored here since it is used very regularly. + */ +static mali_profiling_get_counters_type *mali_get_counters = NULL; + +/* + * Examine list of software counters and determine if any one is enabled. + * Returns 1 if any counter is enabled, 0 if none is. + */ +static int is_any_sw_counter_enabled(void) +{ + unsigned int i; + + for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) { + if (counter_enabled[i]) { + return 1; /* At least one counter is enabled */ + } + } + + return 0; /* No s/w counters enabled */ +} + +static void mali_counter_initialize(void) +{ + /* If a Mali driver is present and exporting the appropriate symbol + * then we can request the HW counters (of which there are only 2) + * be configured to count the desired events + */ + mali_profiling_set_event_type *mali_set_hw_event; + mali_osk_fb_control_set_type *mali_set_fb_event; + mali_profiling_control_type *mali_control; + + mali_set_hw_event = symbol_get(_mali_profiling_set_event); + + if (mali_set_hw_event) { + int i; + + pr_debug("gator: mali online _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event); + + for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++) { + if (counter_enabled[i]) { + mali_set_hw_event(i, counter_event[i]); + } else { + mali_set_hw_event(i, 0xFFFFFFFF); + } + } + + symbol_put(_mali_profiling_set_event); + } else { + printk("gator: mali online _mali_profiling_set_event symbol not found\n"); + } + + mali_set_fb_event = symbol_get(_mali_osk_fb_control_set); + + if (mali_set_fb_event) { + pr_debug("gator: mali online _mali_osk_fb_control_set symbol @ %p\n", mali_set_fb_event); + + mali_set_fb_event(0, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0)); + + symbol_put(_mali_osk_fb_control_set); + } else { + printk("gator: mali online _mali_osk_fb_control_set symbol not found\n"); + } + + /* Generic control interface for Mali DDK. */ + mali_control = symbol_get(_mali_profiling_control); + if (mali_control) { + /* The event attribute in the XML file keeps the actual frame rate. */ + unsigned int rate = counter_event[COUNTER_FILMSTRIP] & 0xff; + unsigned int resize_factor = (counter_event[COUNTER_FILMSTRIP] >> 8) & 0xff; + + pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control); + + mali_control(SW_EVENTS_ENABLE, (is_any_sw_counter_enabled() ? 1 : 0)); + mali_control(FBDUMP_CONTROL_ENABLE, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0)); + mali_control(FBDUMP_CONTROL_RATE, rate); + mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor); + + pr_debug("gator: sent mali_control enabled=%d, rate=%d\n", (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0), rate); + + symbol_put(_mali_profiling_control); + } else { + printk("gator: mali online _mali_profiling_control symbol not found\n"); + } + + mali_get_counters = symbol_get(_mali_profiling_get_counters); + if (mali_get_counters) { + pr_debug("gator: mali online _mali_profiling_get_counters symbol @ %p\n", mali_get_counters); + counter_prev[COUNTER_L2_C0] = 0; + counter_prev[COUNTER_L2_C1] = 0; + } else { + pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined"); + } +} + +static void mali_counter_deinitialize(void) +{ + mali_profiling_set_event_type *mali_set_hw_event; + mali_osk_fb_control_set_type *mali_set_fb_event; + mali_profiling_control_type *mali_control; + + mali_set_hw_event = symbol_get(_mali_profiling_set_event); + + if (mali_set_hw_event) { + int i; + + pr_debug("gator: mali offline _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event); + for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++) { + mali_set_hw_event(i, 0xFFFFFFFF); + } + + symbol_put(_mali_profiling_set_event); + } else { + printk("gator: mali offline _mali_profiling_set_event symbol not found\n"); + } + + mali_set_fb_event = symbol_get(_mali_osk_fb_control_set); + + if (mali_set_fb_event) { + pr_debug("gator: mali offline _mali_osk_fb_control_set symbol @ %p\n", mali_set_fb_event); + + mali_set_fb_event(0, 0); + + symbol_put(_mali_osk_fb_control_set); + } else { + printk("gator: mali offline _mali_osk_fb_control_set symbol not found\n"); + } + + /* Generic control interface for Mali DDK. */ + mali_control = symbol_get(_mali_profiling_control); + + if (mali_control) { + pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_set_fb_event); + + /* Reset the DDK state - disable counter collection */ + mali_control(SW_EVENTS_ENABLE, 0); + + mali_control(FBDUMP_CONTROL_ENABLE, 0); + + symbol_put(_mali_profiling_control); + } else { + printk("gator: mali offline _mali_profiling_control symbol not found\n"); + } + + if (mali_get_counters) { + symbol_put(_mali_profiling_get_counters); + } + +} + +static int start(void) +{ + // register tracepoints + if (GATOR_REGISTER_TRACE(mali_hw_counter)) { + printk("gator: mali_hw_counter tracepoint failed to activate\n"); + return -1; + } + +#if GATOR_MALI_INTERFACE_STYLE == 1 + /* None. */ +#elif GATOR_MALI_INTERFACE_STYLE == 2 + /* For patched Mali driver. */ + if (GATOR_REGISTER_TRACE(mali_sw_counter)) { + printk("gator: mali_sw_counter tracepoint failed to activate\n"); + return -1; + } +#elif GATOR_MALI_INTERFACE_STYLE == 3 +/* For Mali drivers with built-in support. */ + if (GATOR_REGISTER_TRACE(mali_sw_counters)) { + printk("gator: mali_sw_counters tracepoint failed to activate\n"); + return -1; + } +#else +#error Unknown GATOR_MALI_INTERFACE_STYLE option. +#endif + + trace_registered = 1; + + mali_counter_initialize(); + return 0; +} + +static void stop(void) +{ + unsigned int cnt; + + pr_debug("gator: mali stop\n"); + + if (trace_registered) { + GATOR_UNREGISTER_TRACE(mali_hw_counter); + +#if GATOR_MALI_INTERFACE_STYLE == 1 + /* None. */ +#elif GATOR_MALI_INTERFACE_STYLE == 2 + /* For patched Mali driver. */ + GATOR_UNREGISTER_TRACE(mali_sw_counter); +#elif GATOR_MALI_INTERFACE_STYLE == 3 + /* For Mali drivers with built-in support. */ + GATOR_UNREGISTER_TRACE(mali_sw_counters); +#else +#error Unknown GATOR_MALI_INTERFACE_STYLE option. +#endif + + pr_debug("gator: mali timeline tracepoint deactivated\n"); + + trace_registered = 0; + } + + for (cnt = FIRST_ACTIVITY_EVENT; cnt < NUMBER_OF_EVENTS; cnt++) { + counter_enabled[cnt] = 0; + counter_event[cnt] = 0; + counter_address[cnt] = NULL; + } + + mali_counter_deinitialize(); +} + +static int read(int **buffer) +{ + int cnt, len = 0; + + if (!on_primary_core()) + return 0; + + // Read the L2 C0 and C1 here. + if (counter_enabled[COUNTER_L2_C0] || counter_enabled[COUNTER_L2_C1]) { + u32 src0 = 0; + u32 val0 = 0; + u32 src1 = 0; + u32 val1 = 0; + + // Poke the driver to get the counter values + if (mali_get_counters) { + mali_get_counters(&src0, &val0, &src1, &val1); + } + + if (counter_enabled[COUNTER_L2_C0]) { + // Calculate and save src0's counter val0 + counter_dump[len++] = counter_key[COUNTER_L2_C0]; + counter_dump[len++] = get_difference(val0, counter_prev[COUNTER_L2_C0]); + } + + if (counter_enabled[COUNTER_L2_C1]) { + // Calculate and save src1's counter val1 + counter_dump[len++] = counter_key[COUNTER_L2_C1]; + counter_dump[len++] = get_difference(val1, counter_prev[COUNTER_L2_C1]); + } + + // Save the previous values for the counters. + counter_prev[COUNTER_L2_C0] = val0; + counter_prev[COUNTER_L2_C1] = val1; + } + + // Process other (non-timeline) counters. + for (cnt = COUNTER_VP_C0; cnt <= LAST_SW_COUNTER; cnt++) { + if (counter_enabled[cnt]) { + counter_dump[len++] = counter_key[cnt]; + counter_dump[len++] = counter_data[cnt]; + + counter_data[cnt] = 0; + } + } + + /* + * Add in the voltage and frequency counters if enabled. Note that, since these are + * actually passed as events, the counter value should not be cleared. + */ + cnt = COUNTER_FREQUENCY; + if (counter_enabled[cnt]) { + counter_dump[len++] = counter_key[cnt]; + counter_dump[len++] = counter_data[cnt]; + } + + cnt = COUNTER_VOLTAGE; + if (counter_enabled[cnt]) { + counter_dump[len++] = counter_key[cnt]; + counter_dump[len++] = counter_data[cnt]; + } + + if (buffer) { + *buffer = (int *)counter_dump; + } + + return len; +} + +static struct gator_interface gator_events_mali_interface = { + .create_files = create_files, + .start = start, + .stop = stop, + .read = read, +}; + +extern void gator_events_mali_log_dvfs_event(unsigned int frequency_mhz, unsigned int voltage_mv) +{ + counter_data[COUNTER_FREQUENCY] = frequency_mhz; + counter_data[COUNTER_VOLTAGE] = voltage_mv; +} + +int gator_events_mali_init(void) +{ + unsigned int cnt; + + pr_debug("gator: mali init\n"); + + for (cnt = FIRST_ACTIVITY_EVENT; cnt < NUMBER_OF_EVENTS; cnt++) { + counter_enabled[cnt] = 0; + counter_event[cnt] = 0; + counter_key[cnt] = gator_events_get_key(); + counter_address[cnt] = NULL; + counter_data[cnt] = 0; + } + + trace_registered = 0; + + return gator_events_install(&gator_events_mali_interface); +} + +gator_events_init(gator_events_mali_init); diff --git a/drivers/gator/gator_events_mali_400.h b/drivers/gator/gator_events_mali_400.h new file mode 100644 index 00000000000..43aec49547c --- /dev/null +++ b/drivers/gator/gator_events_mali_400.h @@ -0,0 +1,18 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/* + * Header contains common definitions for the Mali-400 processors. + */ +#if !defined(GATOR_EVENTS_MALI_400_H) +#define GATOR_EVENTS_MALI_400_H + +extern void gator_events_mali_log_dvfs_event(unsigned int d0, unsigned int d1); + +#endif /* GATOR_EVENTS_MALI_400_H */ diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c new file mode 100644 index 00000000000..22a517d65a4 --- /dev/null +++ b/drivers/gator/gator_events_mali_common.c @@ -0,0 +1,74 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include "gator_events_mali_common.h" + +static u32 gator_mali_get_id(void) +{ + return MALI_SUPPORT; +} + +extern const char *gator_mali_get_mali_name(void) +{ + u32 id = gator_mali_get_id(); + + switch (id) { + case MALI_T6xx: + return "Mali-T6xx"; + case MALI_400: + return "Mali-400"; + default: + pr_debug("gator: Mali-T6xx: unknown Mali ID (%d)\n", id); + return "Mali-Unknown"; + } +} + +extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter) +{ + int err; + char buf[255]; + struct dentry *dir; + + /* If the counter name is empty ignore it */ + if (strlen(event_name) != 0) { + /* Set up the filesystem entry for this event. */ + snprintf(buf, sizeof(buf), "ARM_%s_%s", mali_name, event_name); + + dir = gatorfs_mkdir(sb, root, buf); + + if (dir == NULL) { + pr_debug("gator: Mali-T6xx: error creating file system for: %s (%s)", event_name, buf); + return -1; + } + + err = gatorfs_create_ulong(sb, dir, "enabled", &counter->enabled); + if (err != 0) { + pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ulong for: %s (%s)", event_name, buf); + return -1; + } + err = gatorfs_create_ro_ulong(sb, dir, "key", &counter->key); + if (err != 0) { + pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf); + return -1; + } + } + + return 0; +} + +extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int n_counters) +{ + unsigned int cnt; + + for (cnt = 0; cnt < n_counters; cnt++) { + mali_counter *counter = &counters[cnt]; + + counter->key = gator_events_get_key(); + counter->enabled = 0; + } +} diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h new file mode 100644 index 00000000000..27eaacc4649 --- /dev/null +++ b/drivers/gator/gator_events_mali_common.h @@ -0,0 +1,88 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#if !defined(GATOR_EVENTS_MALI_COMMON_H) +#define GATOR_EVENTS_MALI_COMMON_H + +#include "gator.h" + +#include <linux/module.h> +#include <linux/time.h> +#include <linux/math64.h> +#include <linux/slab.h> +#include <asm/io.h> + +/* Device codes for each known GPU */ +#define MALI_400 (0x0b07) +#define MALI_T6xx (0x0056) + +/* Ensure that MALI_SUPPORT has been defined to something. */ +#ifndef MALI_SUPPORT +#error MALI_SUPPORT not defined! +#endif + +/* Values for the supported activity event types */ +#define ACTIVITY_START (1) +#define ACTIVITY_STOP (2) + +/* + * Runtime state information for a counter. + */ +typedef struct { + unsigned long key; /* 'key' (a unique id set by gatord and returned by gator.ko) */ + unsigned long enabled; /* counter enable state */ +} mali_counter; + +/* + * Mali-400 + */ +typedef void mali_profiling_set_event_type(unsigned int, unsigned int); +typedef void mali_osk_fb_control_set_type(unsigned int, unsigned int); +typedef void mali_profiling_control_type(unsigned int, unsigned int); +typedef void mali_profiling_get_counters_type(unsigned int *, unsigned int *, unsigned int *, unsigned int *); + +/* + * Driver entry points for functions called directly by gator. + */ +extern void _mali_profiling_set_event(unsigned int, unsigned int); +extern void _mali_osk_fb_control_set(unsigned int, unsigned int); +extern void _mali_profiling_control(unsigned int, unsigned int); +extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigned int *, unsigned int *); + +/** + * Returns a name which identifies the GPU type (eg Mali-400, Mali-T6xx). + * + * @return The name as a constant string. + */ +extern const char *gator_mali_get_mali_name(void); + +/** + * Creates a filesystem entry under /dev/gator relating to the specified event name and key, and + * associate the key/enable values with this entry point. + * + * @param mali_name A name related to the type of GPU, obtained from a call to gator_mali_get_mali_name() + * @param event_name The name of the event. + * @param sb Linux super block + * @param root Directory under which the entry will be created. + * @param counter_key Ptr to location which will be associated with the counter key. + * @param counter_enabled Ptr to location which will be associated with the counter enable state. + * + * @return 0 if entry point was created, non-zero if not. + */ +extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter); + +/** + * Initializes the counter array. + * + * @param keys The array of counters + * @param n_counters The number of entries in each of the arrays. + */ +extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int n_counters); + +#endif /* GATOR_EVENTS_MALI_COMMON_H */ diff --git a/drivers/gator/gator_events_mali_t6xx.c b/drivers/gator/gator_events_mali_t6xx.c new file mode 100644 index 00000000000..2576a99a126 --- /dev/null +++ b/drivers/gator/gator_events_mali_t6xx.c @@ -0,0 +1,512 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" + +#include <linux/module.h> +#include <linux/time.h> +#include <linux/math64.h> +#include <linux/slab.h> +#include <asm/io.h> + +#include "linux/mali_linux_trace.h" + +#include "gator_events_mali_common.h" + +/* + * Check that the MALI_SUPPORT define is set to one of the allowable device codes. + */ +#if (MALI_SUPPORT != MALI_T6xx) +#error MALI_SUPPORT set to an invalid device code: expecting MALI_T6xx +#endif + +/* Counters for Mali-T6xx: + * + * - Timeline events + * They are tracepoints, but instead of reporting a number they report a START/STOP event. + * They are reported in Streamline as number of microseconds while that particular counter was active. + * + * - SW counters + * They are tracepoints reporting a particular number. + * They are accumulated in sw_counter_data array until they are passed to Streamline, then they are zeroed. + * + * - Accumulators + * They are the same as software counters but their value is not zeroed. + */ + +/* Timeline (start/stop) activity */ +static const char *timeline_event_names[] = { + "PM_SHADER_0", + "PM_SHADER_1", + "PM_SHADER_2", + "PM_SHADER_3", + "PM_SHADER_4", + "PM_SHADER_5", + "PM_SHADER_6", + "PM_SHADER_7", + "PM_TILER_0", + "PM_L2_0", + "PM_L2_1", + "MMU_AS_0", + "MMU_AS_1", + "MMU_AS_2", + "MMU_AS_3" +}; + +enum { + PM_SHADER_0 = 0, + PM_SHADER_1, + PM_SHADER_2, + PM_SHADER_3, + PM_SHADER_4, + PM_SHADER_5, + PM_SHADER_6, + PM_SHADER_7, + PM_TILER_0, + PM_L2_0, + PM_L2_1, + MMU_AS_0, + MMU_AS_1, + MMU_AS_2, + MMU_AS_3 +}; +/* The number of shader blocks in the enum above */ +#define NUM_PM_SHADER (8) + +/* Software Counters */ +static const char *software_counter_names[] = { + "MMU_PAGE_FAULT_0", + "MMU_PAGE_FAULT_1", + "MMU_PAGE_FAULT_2", + "MMU_PAGE_FAULT_3" +}; + +enum { + MMU_PAGE_FAULT_0 = 0, + MMU_PAGE_FAULT_1, + MMU_PAGE_FAULT_2, + MMU_PAGE_FAULT_3 +}; + +/* Software Counters */ +static const char *accumulators_names[] = { + "TOTAL_ALLOC_PAGES" +}; + +enum { + TOTAL_ALLOC_PAGES = 0 +}; + +#define FIRST_TIMELINE_EVENT (0) +#define NUMBER_OF_TIMELINE_EVENTS (sizeof(timeline_event_names) / sizeof(timeline_event_names[0])) +#define FIRST_SOFTWARE_COUNTER (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS) +#define NUMBER_OF_SOFTWARE_COUNTERS (sizeof(software_counter_names) / sizeof(software_counter_names[0])) +#define FIRST_ACCUMULATOR (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS) +#define NUMBER_OF_ACCUMULATORS (sizeof(accumulators_names) / sizeof(accumulators_names[0])) +#define NUMBER_OF_EVENTS (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS) + +/* + * gatorfs variables for counter enable state + */ +static mali_counter counters[NUMBER_OF_EVENTS]; + +/* An array used to return the data we recorded + * as key,value pairs hence the *2 + */ +static unsigned long counter_dump[NUMBER_OF_EVENTS * 2]; + +/* + * Array holding counter start times (in ns) for each counter. A zero here + * indicates that the activity monitored by this counter is not running. + */ +static struct timespec timeline_event_starttime[NUMBER_OF_TIMELINE_EVENTS]; + +/* The data we have recorded */ +static unsigned int timeline_data[NUMBER_OF_TIMELINE_EVENTS]; +static unsigned int sw_counter_data[NUMBER_OF_SOFTWARE_COUNTERS]; +static unsigned int accumulators_data[NUMBER_OF_ACCUMULATORS]; + +/* Hold the previous timestamp, used to calculate the sample interval. */ +static struct timespec prev_timestamp; + +/** + * Returns the timespan (in microseconds) between the two specified timestamps. + * + * @param start Ptr to the start timestamp + * @param end Ptr to the end timestamp + * + * @return Number of microseconds between the two timestamps (can be negative if start follows end). + */ +static inline long get_duration_us(const struct timespec *start, const struct timespec *end) +{ + long event_duration_us = (end->tv_nsec - start->tv_nsec) / 1000; + event_duration_us += (end->tv_sec - start->tv_sec) * 1000000; + + return event_duration_us; +} + +static void record_timeline_event(unsigned int timeline_index, unsigned int type) +{ + struct timespec event_timestamp; + struct timespec *event_start = &timeline_event_starttime[timeline_index]; + + switch (type) { + case ACTIVITY_START: + /* Get the event time... */ + getnstimeofday(&event_timestamp); + + /* Remember the start time if the activity is not already started */ + if (event_start->tv_sec == 0) { + *event_start = event_timestamp; /* Structure copy */ + } + break; + + case ACTIVITY_STOP: + /* if the counter was started... */ + if (event_start->tv_sec != 0) { + /* Get the event time... */ + getnstimeofday(&event_timestamp); + + /* Accumulate the duration in us */ + timeline_data[timeline_index] += get_duration_us(event_start, &event_timestamp); + + /* Reset the start time to indicate the activity is stopped. */ + event_start->tv_sec = 0; + } + break; + + default: + /* Other activity events are ignored. */ + break; + } +} + +/* + * Documentation about the following tracepoints is in mali_linux_trace.h + */ + +GATOR_DEFINE_PROBE(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long long value)) +{ +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define BIT_AT(value, pos) ((value >> pos) & 1) + + static unsigned long long previous_shader_bitmask = 0; + static unsigned long long previous_tiler_bitmask = 0; + static unsigned long long previous_l2_bitmask = 0; + + switch (event_id) { + case SHADER_PRESENT_LO: + { + unsigned long long changed_bitmask = previous_shader_bitmask ^ value; + int pos; + + for (pos = 0; pos < NUM_PM_SHADER; ++pos) { + if (BIT_AT(changed_bitmask, pos)) { + record_timeline_event(PM_SHADER_0 + pos, BIT_AT(value, pos) ? ACTIVITY_START : ACTIVITY_STOP); + } + } + + previous_shader_bitmask = value; + break; + } + + case TILER_PRESENT_LO: + { + unsigned long long changed = previous_tiler_bitmask ^ value; + + if (BIT_AT(changed, 0)) { + record_timeline_event(PM_TILER_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP); + } + + previous_tiler_bitmask = value; + break; + } + + case L2_PRESENT_LO: + { + unsigned long long changed = previous_l2_bitmask ^ value; + + if (BIT_AT(changed, 0)) { + record_timeline_event(PM_L2_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP); + } + if (BIT_AT(changed, 4)) { + record_timeline_event(PM_L2_1, BIT_AT(value, 4) ? ACTIVITY_START : ACTIVITY_STOP); + } + + previous_l2_bitmask = value; + break; + } + + default: + /* No other blocks are supported at present */ + break; + } + +#undef SHADER_PRESENT_LO +#undef TILER_PRESENT_LO +#undef L2_PRESENT_LO +#undef BIT_AT +} + +GATOR_DEFINE_PROBE(mali_page_fault_insert_pages, TP_PROTO(int event_id, unsigned long value)) +{ + /* We add to the previous since we may receive many tracepoints in one sample period */ + sw_counter_data[MMU_PAGE_FAULT_0 + event_id] += value; +} + +GATOR_DEFINE_PROBE(mali_mmu_as_in_use, TP_PROTO(int event_id)) +{ + record_timeline_event(MMU_AS_0 + event_id, ACTIVITY_START); +} + +GATOR_DEFINE_PROBE(mali_mmu_as_released, TP_PROTO(int event_id)) +{ + record_timeline_event(MMU_AS_0 + event_id, ACTIVITY_STOP); +} + +GATOR_DEFINE_PROBE(mali_total_alloc_pages_change, TP_PROTO(long long int event_id)) +{ + accumulators_data[TOTAL_ALLOC_PAGES] = event_id; +} + +static int create_files(struct super_block *sb, struct dentry *root) +{ + int event; + /* + * Create the filesystem for all events + */ + int counter_index = 0; + const char *mali_name = gator_mali_get_mali_name(); + + for (event = FIRST_TIMELINE_EVENT; event < FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS; event++) { + if (gator_mali_create_file_system(mali_name, timeline_event_names[counter_index], sb, root, &counters[event]) != 0) { + return -1; + } + counter_index++; + } + counter_index = 0; + for (event = FIRST_SOFTWARE_COUNTER; event < FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS; event++) { + if (gator_mali_create_file_system(mali_name, software_counter_names[counter_index], sb, root, &counters[event]) != 0) { + return -1; + } + counter_index++; + } + counter_index = 0; + for (event = FIRST_ACCUMULATOR; event < FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS; event++) { + if (gator_mali_create_file_system(mali_name, accumulators_names[counter_index], sb, root, &counters[event]) != 0) { + return -1; + } + counter_index++; + } + + return 0; +} + +static int register_tracepoints(void) +{ + if (GATOR_REGISTER_TRACE(mali_pm_status)) { + pr_debug("gator: Mali-T6xx: mali_pm_status tracepoint failed to activate\n"); + return 0; + } + + if (GATOR_REGISTER_TRACE(mali_page_fault_insert_pages)) { + pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages tracepoint failed to activate\n"); + return 0; + } + + if (GATOR_REGISTER_TRACE(mali_mmu_as_in_use)) { + pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use tracepoint failed to activate\n"); + return 0; + } + + if (GATOR_REGISTER_TRACE(mali_mmu_as_released)) { + pr_debug("gator: Mali-T6xx: mali_mmu_as_released tracepoint failed to activate\n"); + return 0; + } + + if (GATOR_REGISTER_TRACE(mali_total_alloc_pages_change)) { + pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint failed to activate\n"); + return 0; + } + + pr_debug("gator: Mali-T6xx: start\n"); + pr_debug("gator: Mali-T6xx: mali_pm_status probe is at %p\n", &probe_mali_pm_status); + pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages probe is at %p\n", &probe_mali_page_fault_insert_pages); + pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use probe is at %p\n", &probe_mali_mmu_as_in_use); + pr_debug("gator: Mali-T6xx: mali_mmu_as_released probe is at %p\n", &probe_mali_mmu_as_released); + pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change probe is at %p\n", &probe_mali_total_alloc_pages_change); + + return 1; +} + +static int start(void) +{ + unsigned int cnt; + + /* Clean all data for the next capture */ + for (cnt = 0; cnt < NUMBER_OF_TIMELINE_EVENTS; cnt++) { + timeline_event_starttime[cnt].tv_sec = timeline_event_starttime[cnt].tv_nsec = 0; + timeline_data[cnt] = 0; + } + + for (cnt = 0; cnt < NUMBER_OF_SOFTWARE_COUNTERS; cnt++) { + sw_counter_data[cnt] = 0; + } + + for (cnt = 0; cnt < NUMBER_OF_ACCUMULATORS; cnt++) { + accumulators_data[cnt] = 0; + } + + /* Register tracepoints */ + if (register_tracepoints() == 0) { + return -1; + } + + /* + * Set the first timestamp for calculating the sample interval. The first interval could be quite long, + * since it will be the time between 'start' and the first 'read'. + * This means that timeline values will be divided by a big number for the first sample. + */ + getnstimeofday(&prev_timestamp); + + return 0; +} + +static void stop(void) +{ + pr_debug("gator: Mali-T6xx: stop\n"); + + /* + * It is safe to unregister traces even if they were not successfully + * registered, so no need to check. + */ + GATOR_UNREGISTER_TRACE(mali_pm_status); + pr_debug("gator: Mali-T6xx: mali_pm_status tracepoint deactivated\n"); + + GATOR_UNREGISTER_TRACE(mali_page_fault_insert_pages); + pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages tracepoint deactivated\n"); + + GATOR_UNREGISTER_TRACE(mali_mmu_as_in_use); + pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use tracepoint deactivated\n"); + + GATOR_UNREGISTER_TRACE(mali_mmu_as_released); + pr_debug("gator: Mali-T6xx: mali_mmu_as_released tracepoint deactivated\n"); + + GATOR_UNREGISTER_TRACE(mali_total_alloc_pages_change); + pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint deactivated\n"); +} + +static int read(int **buffer) +{ + int cnt; + int len = 0; + long sample_interval_us = 0; + struct timespec read_timestamp; + + if (!on_primary_core()) { + return 0; + } + + /* Get the start of this sample period. */ + getnstimeofday(&read_timestamp); + + /* + * Calculate the sample interval if the previous sample time is valid. + * We use tv_sec since it will not be 0. + */ + if (prev_timestamp.tv_sec != 0) { + sample_interval_us = get_duration_us(&prev_timestamp, &read_timestamp); + } + + /* Structure copy. Update the previous timestamp. */ + prev_timestamp = read_timestamp; + + /* + * Report the timeline counters (ACTIVITY_START/STOP) + */ + for (cnt = FIRST_TIMELINE_EVENT; cnt < (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS); cnt++) { + mali_counter *counter = &counters[cnt]; + if (counter->enabled) { + const int index = cnt - FIRST_TIMELINE_EVENT; + unsigned int value; + + /* If the activity is still running, reset its start time to the start of this sample period + * to correct the count. Add the time up to the end of the sample onto the count. */ + if (timeline_event_starttime[index].tv_sec != 0) { + const long event_duration = get_duration_us(&timeline_event_starttime[index], &read_timestamp); + timeline_data[index] += event_duration; + timeline_event_starttime[index] = read_timestamp; /* Activity is still running. */ + } + + if (sample_interval_us != 0) { + /* Convert the counter to a percent-of-sample value */ + value = (timeline_data[index] * 100) / sample_interval_us; + } else { + pr_debug("gator: Mali-T6xx: setting value to zero\n"); + value = 0; + } + + /* Clear the counter value ready for the next sample. */ + timeline_data[index] = 0; + + counter_dump[len++] = counter->key; + counter_dump[len++] = value; + } + } + + /* Report the software counters */ + for (cnt = FIRST_SOFTWARE_COUNTER; cnt < (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS); cnt++) { + const mali_counter *counter = &counters[cnt]; + if (counter->enabled) { + const int index = cnt - FIRST_SOFTWARE_COUNTER; + counter_dump[len++] = counter->key; + counter_dump[len++] = sw_counter_data[index]; + /* Set the value to zero for the next time */ + sw_counter_data[index] = 0; + } + } + + /* Report the accumulators */ + for (cnt = FIRST_ACCUMULATOR; cnt < (FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS); cnt++) { + const mali_counter *counter = &counters[cnt]; + if (counter->enabled) { + const int index = cnt - FIRST_ACCUMULATOR; + counter_dump[len++] = counter->key; + counter_dump[len++] = accumulators_data[index]; + /* Do not zero the accumulator */ + } + } + + /* Update the buffer */ + if (buffer) { + *buffer = (int *)counter_dump; + } + + return len; +} + +static struct gator_interface gator_events_mali_t6xx_interface = { + .create_files = create_files, + .start = start, + .stop = stop, + .read = read +}; + +extern int gator_events_mali_t6xx_init(void) +{ + pr_debug("gator: Mali-T6xx: sw_counters init\n"); + + gator_mali_initialise_counters(counters, NUMBER_OF_EVENTS); + + return gator_events_install(&gator_events_mali_t6xx_interface); +} + +gator_events_init(gator_events_mali_t6xx_init); diff --git a/drivers/gator/gator_events_mali_t6xx_hw.c b/drivers/gator/gator_events_mali_t6xx_hw.c new file mode 100644 index 00000000000..fb2e15cecbb --- /dev/null +++ b/drivers/gator/gator_events_mali_t6xx_hw.c @@ -0,0 +1,722 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" + +#include <linux/module.h> +#include <linux/time.h> +#include <linux/math64.h> +#include <linux/slab.h> +#include <asm/io.h> + +/* Mali T6xx DDK includes */ +#include "linux/mali_linux_trace.h" +#include "kbase/src/common/mali_kbase.h" +#include "kbase/src/linux/mali_kbase_mem_linux.h" + +#include "gator_events_mali_common.h" + +/* + * Mali-T6xx + */ +typedef struct kbase_device *kbase_find_device_type(int); +typedef kbase_context *kbase_create_context_type(kbase_device *); +typedef void kbase_destroy_context_type(kbase_context *); +typedef void *kbase_va_alloc_type(kbase_context *, u32); +typedef void kbase_va_free_type(kbase_context *, void *); +typedef mali_error kbase_instr_hwcnt_enable_type(kbase_context *, kbase_uk_hwcnt_setup *); +typedef mali_error kbase_instr_hwcnt_disable_type(kbase_context *); +typedef mali_error kbase_instr_hwcnt_clear_type(kbase_context *); +typedef mali_error kbase_instr_hwcnt_dump_irq_type(kbase_context *); +typedef mali_bool kbase_instr_hwcnt_dump_complete_type(kbase_context *, mali_bool *); + +static kbase_find_device_type *kbase_find_device_symbol; +static kbase_create_context_type *kbase_create_context_symbol; +static kbase_va_alloc_type *kbase_va_alloc_symbol; +static kbase_instr_hwcnt_enable_type *kbase_instr_hwcnt_enable_symbol; +static kbase_instr_hwcnt_clear_type *kbase_instr_hwcnt_clear_symbol; +static kbase_instr_hwcnt_dump_irq_type *kbase_instr_hwcnt_dump_irq_symbol; +static kbase_instr_hwcnt_dump_complete_type *kbase_instr_hwcnt_dump_complete_symbol; +static kbase_instr_hwcnt_disable_type *kbase_instr_hwcnt_disable_symbol; +static kbase_va_free_type *kbase_va_free_symbol; +static kbase_destroy_context_type *kbase_destroy_context_symbol; + +/** The interval between reads, in ns. + * + * Earlier we introduced + * a 'hold off for 1ms after last read' to resolve MIDBASE-2178 and MALINE-724. + * However, the 1ms hold off is too long if no context switches occur as there is a race + * between this value and the tick of the read clock in gator which is also 1ms. If we 'miss' the + * current read, the counter values are effectively 'spread' over 2ms and the values seen are half + * what they should be (since Streamline averages over sample time). In the presence of context switches + * this spread can vary and markedly affect the counters. Currently there is no 'proper' solution to + * this, but empirically we have found that reducing the minimum read interval to 950us causes the + * counts to be much more stable. + */ +static const int READ_INTERVAL_NSEC = 950000; + +#if GATOR_TEST +#include "gator_events_mali_t6xx_hw_test.c" +#endif + +/* Blocks for HW counters */ +enum { + JM_BLOCK = 0, + TILER_BLOCK, + SHADER_BLOCK, + MMU_BLOCK +}; + +/* Counters for Mali-T6xx: + * + * - HW counters, 4 blocks + * For HW counters we need strings to create /dev/gator/events files. + * Enums are not needed because the position of the HW name in the array is the same + * of the corresponding value in the received block of memory. + * HW counters are requested by calculating a bitmask, passed then to the driver. + * Every millisecond a HW counters dump is requested, and if the previous has been completed they are read. + */ + +/* Hardware Counters */ +static const char *const hardware_counter_names[] = { + /* Job Manager */ + "", + "", + "", + "", + "MESSAGES_SENT", + "MESSAGES_RECEIVED", + "GPU_ACTIVE", /* 6 */ + "IRQ_ACTIVE", + "JS0_JOBS", + "JS0_TASKS", + "JS0_ACTIVE", + "", + "JS0_WAIT_READ", + "JS0_WAIT_ISSUE", + "JS0_WAIT_DEPEND", + "JS0_WAIT_FINISH", + "JS1_JOBS", + "JS1_TASKS", + "JS1_ACTIVE", + "", + "JS1_WAIT_READ", + "JS1_WAIT_ISSUE", + "JS1_WAIT_DEPEND", + "JS1_WAIT_FINISH", + "JS2_JOBS", + "JS2_TASKS", + "JS2_ACTIVE", + "", + "JS2_WAIT_READ", + "JS2_WAIT_ISSUE", + "JS2_WAIT_DEPEND", + "JS2_WAIT_FINISH", + "JS3_JOBS", + "JS3_TASKS", + "JS3_ACTIVE", + "", + "JS3_WAIT_READ", + "JS3_WAIT_ISSUE", + "JS3_WAIT_DEPEND", + "JS3_WAIT_FINISH", + "JS4_JOBS", + "JS4_TASKS", + "JS4_ACTIVE", + "", + "JS4_WAIT_READ", + "JS4_WAIT_ISSUE", + "JS4_WAIT_DEPEND", + "JS4_WAIT_FINISH", + "JS5_JOBS", + "JS5_TASKS", + "JS5_ACTIVE", + "", + "JS5_WAIT_READ", + "JS5_WAIT_ISSUE", + "JS5_WAIT_DEPEND", + "JS5_WAIT_FINISH", + "JS6_JOBS", + "JS6_TASKS", + "JS6_ACTIVE", + "", + "JS6_WAIT_READ", + "JS6_WAIT_ISSUE", + "JS6_WAIT_DEPEND", + "JS6_WAIT_FINISH", + + /*Tiler */ + "", + "", + "", + "JOBS_PROCESSED", + "TRIANGLES", + "QUADS", + "POLYGONS", + "POINTS", + "LINES", + "VCACHE_HIT", + "VCACHE_MISS", + "FRONT_FACING", + "BACK_FACING", + "PRIM_VISIBLE", + "PRIM_CULLED", + "PRIM_CLIPPED", + "LEVEL0", + "LEVEL1", + "LEVEL2", + "LEVEL3", + "LEVEL4", + "LEVEL5", + "LEVEL6", + "LEVEL7", + "COMMAND_1", + "COMMAND_2", + "COMMAND_3", + "COMMAND_4", + "COMMAND_4_7", + "COMMAND_8_15", + "COMMAND_16_63", + "COMMAND_64", + "COMPRESS_IN", + "COMPRESS_OUT", + "COMPRESS_FLUSH", + "TIMESTAMPS", + "PCACHE_HIT", + "PCACHE_MISS", + "PCACHE_LINE", + "PCACHE_STALL", + "WRBUF_HIT", + "WRBUF_MISS", + "WRBUF_LINE", + "WRBUF_PARTIAL", + "WRBUF_STALL", + "ACTIVE", + "LOADING_DESC", + "INDEX_WAIT", + "INDEX_RANGE_WAIT", + "VERTEX_WAIT", + "PCACHE_WAIT", + "WRBUF_WAIT", + "BUS_READ", + "BUS_WRITE", + "", + "", + "", + "", + "", + "UTLB_STALL", + "UTLB_REPLAY_MISS", + "UTLB_REPLAY_FULL", + "UTLB_NEW_MISS", + "UTLB_HIT", + + /* Shader Core */ + "", + "", + "", + "SHADER_CORE_ACTIVE", + "FRAG_ACTIVE", + "FRAG_PRIMATIVES", + "FRAG_PRIMATIVES_DROPPED", + "FRAG_CYCLE_DESC", + "FRAG_CYCLES_PLR", + "FRAG_CYCLES_VERT", + "FRAG_CYCLES_TRISETUP", + "FRAG_CYCLES_RAST", + "FRAG_THREADS", + "FRAG_DUMMY_THREADS", + "FRAG_QUADS_RAST", + "FRAG_QUADS_EZS_TEST", + "FRAG_QUADS_EZS_KILLED", + "FRAG_QUADS_LZS_TEST", + "FRAG_QUADS_LZS_KILLED", + "FRAG_CYCLE_NO_TILE", + "FRAG_NUM_TILES", + "FRAG_TRANS_ELIM", + "COMPUTE_ACTIVE", + "COMPUTE_TASKS", + "COMPUTE_THREADS", + "COMPUTE_CYCLES_DESC", + "TRIPIPE_ACTIVE", + "ARITH_WORDS", + "ARITH_CYCLES_REG", + "ARITH_CYCLES_L0", + "ARITH_FRAG_DEPEND", + "LS_WORDS", + "LS_ISSUES", + "LS_RESTARTS", + "LS_REISSUES_MISS", + "LS_REISSUES_VD", + "LS_REISSUE_ATTRIB_MISS", + "LS_NO_WB", + "TEX_WORDS", + "TEX_BUBBLES", + "TEX_WORDS_L0", + "TEX_WORDS_DESC", + "TEX_THREADS", + "TEX_RECIRC_FMISS", + "TEX_RECIRC_DESC", + "TEX_RECIRC_MULTI", + "TEX_RECIRC_PMISS", + "TEX_RECIRC_CONF", + "LSC_READ_HITS", + "LSC_READ_MISSES", + "LSC_WRITE_HITS", + "LSC_WRITE_MISSES", + "LSC_ATOMIC_HITS", + "LSC_ATOMIC_MISSES", + "LSC_LINE_FETCHES", + "LSC_DIRTY_LINE", + "LSC_SNOOPS", + "AXI_TLB_STALL", + "AXI_TLB_MIESS", + "AXI_TLB_TRANSACTION", + "LS_TLB_MISS", + "LS_TLB_HIT", + "AXI_BEATS_READ", + "AXI_BEATS_WRITTEN", + + /*L2 and MMU */ + "", + "", + "", + "", + "MMU_TABLE_WALK", + "MMU_REPLAY_MISS", + "MMU_REPLAY_FULL", + "MMU_NEW_MISS", + "MMU_HIT", + "", + "", + "", + "", + "", + "", + "", + "UTLB_STALL", + "UTLB_REPLAY_MISS", + "UTLB_REPLAY_FULL", + "UTLB_NEW_MISS", + "UTLB_HIT", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "L2_WRITE_BEATS", + "L2_READ_BEATS", + "L2_ANY_LOOKUP", + "L2_READ_LOOKUP", + "L2_SREAD_LOOKUP", + "L2_READ_REPLAY", + "L2_READ_SNOOP", + "L2_READ_HIT", + "L2_CLEAN_MISS", + "L2_WRITE_LOOKUP", + "L2_SWRITE_LOOKUP", + "L2_WRITE_REPLAY", + "L2_WRITE_SNOOP", + "L2_WRITE_HIT", + "L2_EXT_READ_FULL", + "L2_EXT_READ_HALF", + "L2_EXT_WRITE_FULL", + "L2_EXT_WRITE_HALF", + "L2_EXT_READ", + "L2_EXT_READ_LINE", + "L2_EXT_WRITE", + "L2_EXT_WRITE_LINE", + "L2_EXT_WRITE_SMALL", + "L2_EXT_BARRIER", + "L2_EXT_AR_STALL", + "L2_EXT_R_BUF_FULL", + "L2_EXT_RD_BUF_FULL", + "L2_EXT_R_RAW", + "L2_EXT_W_STALL", + "L2_EXT_W_BUF_FULL", + "L2_EXT_R_W_HAZARD", + "L2_TAG_HAZARD", + "L2_SNOOP_FULL", + "L2_REPLAY_FULL" +}; + +#define NUMBER_OF_HARDWARE_COUNTERS (sizeof(hardware_counter_names) / sizeof(hardware_counter_names[0])) + +#define GET_HW_BLOCK(c) (((c) >> 6) & 0x3) +#define GET_COUNTER_OFFSET(c) ((c) & 0x3f) + +/* Memory to dump hardware counters into */ +static void *kernel_dump_buffer; + +/* kbase context and device */ +static kbase_context *kbcontext = NULL; +static struct kbase_device *kbdevice = NULL; + +/* + * The following function has no external prototype in older DDK revisions. When the DDK + * is updated then this should be removed. + */ +struct kbase_device *kbase_find_device(int minor); + +static volatile bool kbase_device_busy = false; +static unsigned int num_hardware_counters_enabled; + +/* + * gatorfs variables for counter enable state + */ +static mali_counter counters[NUMBER_OF_HARDWARE_COUNTERS]; + +/* An array used to return the data we recorded + * as key,value pairs hence the *2 + */ +static unsigned long counter_dump[NUMBER_OF_HARDWARE_COUNTERS * 2]; + +#define SYMBOL_GET(FUNCTION, ERROR_COUNT) \ + if(FUNCTION ## _symbol) \ + { \ + printk("gator: mali " #FUNCTION " symbol was already registered\n"); \ + (ERROR_COUNT)++; \ + } \ + else \ + { \ + FUNCTION ## _symbol = symbol_get(FUNCTION); \ + if(! FUNCTION ## _symbol) \ + { \ + printk("gator: mali online " #FUNCTION " symbol not found\n"); \ + (ERROR_COUNT)++; \ + } \ + } + +#define SYMBOL_CLEANUP(FUNCTION) \ + if(FUNCTION ## _symbol) \ + { \ + symbol_put(FUNCTION); \ + FUNCTION ## _symbol = NULL; \ + } + +/** + * Execute symbol_get for all the Mali symbols and check for success. + * @return the number of symbols not loaded. + */ +static int init_symbols(void) +{ + int error_count = 0; + SYMBOL_GET(kbase_find_device, error_count); + SYMBOL_GET(kbase_create_context, error_count); + SYMBOL_GET(kbase_va_alloc, error_count); + SYMBOL_GET(kbase_instr_hwcnt_enable, error_count); + SYMBOL_GET(kbase_instr_hwcnt_clear, error_count); + SYMBOL_GET(kbase_instr_hwcnt_dump_irq, error_count); + SYMBOL_GET(kbase_instr_hwcnt_dump_complete, error_count); + SYMBOL_GET(kbase_instr_hwcnt_disable, error_count); + SYMBOL_GET(kbase_va_free, error_count); + SYMBOL_GET(kbase_destroy_context, error_count); + + return error_count; +} + +/** + * Execute symbol_put for all the registered Mali symbols. + */ +static void clean_symbols(void) +{ + SYMBOL_CLEANUP(kbase_find_device); + SYMBOL_CLEANUP(kbase_create_context); + SYMBOL_CLEANUP(kbase_va_alloc); + SYMBOL_CLEANUP(kbase_instr_hwcnt_enable); + SYMBOL_CLEANUP(kbase_instr_hwcnt_clear); + SYMBOL_CLEANUP(kbase_instr_hwcnt_dump_irq); + SYMBOL_CLEANUP(kbase_instr_hwcnt_dump_complete); + SYMBOL_CLEANUP(kbase_instr_hwcnt_disable); + SYMBOL_CLEANUP(kbase_va_free); + SYMBOL_CLEANUP(kbase_destroy_context); +} + +/** + * Determines whether a read should take place + * @param current_time The current time, obtained from getnstimeofday() + * @param prev_time_s The number of seconds at the previous read attempt. + * @param next_read_time_ns The time (in ns) when the next read should be allowed. + * + * Note that this function has been separated out here to allow it to be tested. + */ +static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns) +{ + /* If the current ns count rolls over a second, roll the next read time too. */ + if (current_time->tv_sec != *prev_time_s) { + *next_read_time_ns = *next_read_time_ns - NSEC_PER_SEC; + } + + /* Abort the read if the next read time has not arrived. */ + if (current_time->tv_nsec < *next_read_time_ns) { + return 0; + } + + /* Set the next read some fixed time after this one, and update the read timestamp. */ + *next_read_time_ns = current_time->tv_nsec + READ_INTERVAL_NSEC; + + *prev_time_s = current_time->tv_sec; + return 1; +} + +static int start(void) +{ + kbase_uk_hwcnt_setup setup; + mali_error err; + int cnt; + u16 bitmask[] = { 0, 0, 0, 0 }; + + /* Setup HW counters */ + num_hardware_counters_enabled = 0; + + if (NUMBER_OF_HARDWARE_COUNTERS != 256) { + pr_debug("Unexpected number of hardware counters defined: expecting 256, got %d\n", NUMBER_OF_HARDWARE_COUNTERS); + } + + /* Calculate enable bitmasks based on counters_enabled array */ + for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) { + const mali_counter *counter = &counters[cnt]; + if (counter->enabled) { + int block = GET_HW_BLOCK(cnt); + int enable_bit = GET_COUNTER_OFFSET(cnt) / 4; + bitmask[block] |= (1 << enable_bit); + pr_debug("gator: Mali-T6xx: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt); + num_hardware_counters_enabled++; + } + } + + /* Create a kbase context for HW counters */ + if (num_hardware_counters_enabled > 0) { + if (init_symbols() > 0) { + clean_symbols(); + /* No Mali driver code entrypoints found - not a fault. */ + return 0; + } + + kbdevice = kbase_find_device_symbol(-1); + + /* If we already got a context, fail */ + if (kbcontext) { + pr_debug("gator: Mali-T6xx: error context already present\n"); + goto out; + } + + /* kbcontext will only be valid after all the Mali symbols are loaded successfully */ + kbcontext = kbase_create_context_symbol(kbdevice); + if (!kbcontext) { + pr_debug("gator: Mali-T6xx: error creating kbase context\n"); + goto out; + } + + /* + * The amount of memory needed to store the dump (bytes) + * DUMP_SIZE = number of core groups + * * number of blocks (always 8 for midgard) + * * number of counters per block (always 64 for midgard) + * * number of bytes per counter (always 4 in midgard) + * For a Mali-T6xx with a single core group = 1 * 8 * 64 * 4 + */ + kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 2048); + if (!kernel_dump_buffer) { + pr_debug("gator: Mali-T6xx: error trying to allocate va\n"); + goto destroy_context; + } + + setup.dump_buffer = (uintptr_t)kernel_dump_buffer; + setup.jm_bm = bitmask[JM_BLOCK]; + setup.tiler_bm = bitmask[TILER_BLOCK]; + setup.shader_bm = bitmask[SHADER_BLOCK]; + setup.mmu_l2_bm = bitmask[MMU_BLOCK]; + /* These counters do not exist on Mali-T60x */ + setup.l3_cache_bm = 0; + + /* Use kbase API to enable hardware counters and provide dump buffer */ + err = kbase_instr_hwcnt_enable_symbol(kbcontext, &setup); + if (err != MALI_ERROR_NONE) { + pr_debug("gator: Mali-T6xx: can't setup hardware counters\n"); + goto free_buffer; + } + pr_debug("gator: Mali-T6xx: hardware counters enabled\n"); + kbase_instr_hwcnt_clear_symbol(kbcontext); + pr_debug("gator: Mali-T6xx: hardware counters cleared \n"); + + kbase_device_busy = false; + } + + return 0; + +free_buffer: + kbase_va_free_symbol(kbcontext, kernel_dump_buffer); + +destroy_context: + kbase_destroy_context_symbol(kbcontext); + +out: + clean_symbols(); + return -1; +} + +static void stop(void) +{ + unsigned int cnt; + kbase_context *temp_kbcontext; + + pr_debug("gator: Mali-T6xx: stop\n"); + + /* Set all counters as disabled */ + for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) { + counters[cnt].enabled = 0; + } + + /* Destroy the context for HW counters */ + if (num_hardware_counters_enabled > 0 && kbcontext != NULL) { + /* + * Set the global variable to NULL before destroying it, because + * other function will check this before using it. + */ + temp_kbcontext = kbcontext; + kbcontext = NULL; + + kbase_instr_hwcnt_disable_symbol(temp_kbcontext); + kbase_va_free_symbol(temp_kbcontext, kernel_dump_buffer); + kbase_destroy_context_symbol(temp_kbcontext); + + pr_debug("gator: Mali-T6xx: hardware counters stopped\n"); + + clean_symbols(); + } +} + +static int read(int **buffer) +{ + int cnt; + int len = 0; + u32 value = 0; + mali_bool success; + + struct timespec current_time; + static u32 prev_time_s = 0; + static s32 next_read_time_ns = 0; + + if (!on_primary_core()) { + return 0; + } + + getnstimeofday(¤t_time); + + /* + * Discard reads unless a respectable time has passed. This reduces the load on the GPU without sacrificing + * accuracy on the Streamline display. + */ + if (!is_read_scheduled(¤t_time, &prev_time_s, &next_read_time_ns)) { + return 0; + } + + /* + * Report the HW counters + * Only process hardware counters if at least one of the hardware counters is enabled. + */ + if (num_hardware_counters_enabled > 0) { + const unsigned int vithar_blocks[] = { + 0x700, /* VITHAR_JOB_MANAGER, Block 0 */ + 0x400, /* VITHAR_TILER, Block 1 */ + 0x000, /* VITHAR_SHADER_CORE, Block 2 */ + 0x500 /* VITHAR_MEMORY_SYSTEM, Block 3 */ + }; + + if (!kbcontext) { + return -1; + } + + /* Mali symbols can be called safely since a kbcontext is valid */ + if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success) == MALI_TRUE) { + kbase_device_busy = false; + + if (success == MALI_TRUE) { + for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) { + const mali_counter *counter = &counters[cnt]; + if (counter->enabled) { + const int block = GET_HW_BLOCK(cnt); + const int counter_offset = GET_COUNTER_OFFSET(cnt); + const u32 *counter_block = (u32 *) ((uintptr_t)kernel_dump_buffer + vithar_blocks[block]); + const u32 *counter_address = counter_block + counter_offset; + + value = *counter_address; + + if (block == SHADER_BLOCK) { + /* (counter_address + 0x000) has already been accounted-for above. */ + value += *(counter_address + 0x100); + value += *(counter_address + 0x200); + value += *(counter_address + 0x300); + } + + counter_dump[len++] = counter->key; + counter_dump[len++] = value; + } + } + } + } + + if (!kbase_device_busy) { + kbase_device_busy = true; + kbase_instr_hwcnt_dump_irq_symbol(kbcontext); + } + } + + /* Update the buffer */ + if (buffer) { + *buffer = (int *)counter_dump; + } + + return len; +} + +static int create_files(struct super_block *sb, struct dentry *root) +{ + unsigned int event; + /* + * Create the filesystem for all events + */ + int counter_index = 0; + const char *mali_name = gator_mali_get_mali_name(); + + for (event = 0; event < NUMBER_OF_HARDWARE_COUNTERS; event++) { + if (gator_mali_create_file_system(mali_name, hardware_counter_names[counter_index], sb, root, &counters[event]) != 0) + return -1; + counter_index++; + } + + return 0; +} + +static struct gator_interface gator_events_mali_t6xx_interface = { + .create_files = create_files, + .start = start, + .stop = stop, + .read = read +}; + +int gator_events_mali_t6xx_hw_init(void) +{ + pr_debug("gator: Mali-T6xx: sw_counters init\n"); + +#if GATOR_TEST + test_all_is_read_scheduled(); +#endif + + gator_mali_initialise_counters(counters, NUMBER_OF_HARDWARE_COUNTERS); + + return gator_events_install(&gator_events_mali_t6xx_interface); +} + +gator_events_init(gator_events_mali_t6xx_hw_init); diff --git a/drivers/gator/gator_events_mali_t6xx_hw_test.c b/drivers/gator/gator_events_mali_t6xx_hw_test.c new file mode 100644 index 00000000000..efb32ddf548 --- /dev/null +++ b/drivers/gator/gator_events_mali_t6xx_hw_test.c @@ -0,0 +1,55 @@ +/** + * Copyright (C) ARM Limited 2012-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/** + * Test functions for mali_t600_hw code. + */ + +static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns); + +static int test_is_read_scheduled(u32 s, u32 ns, u32 prev_s, s32 next_ns, int expected_result, s32 expected_next_ns) +{ + struct timespec current_time; + u32 prev_time_s = prev_s; + s32 next_read_time_ns = next_ns; + + current_time.tv_sec = s; + current_time.tv_nsec = ns; + + if (is_read_scheduled(¤t_time, &prev_time_s, &next_read_time_ns) != expected_result) { + printk("Failed do_read(%u, %u, %u, %d): expected %d\n", s, ns, prev_s, next_ns, expected_result); + return 0; + } + + if (next_read_time_ns != expected_next_ns) { + printk("Failed: next_read_ns expected=%d, actual=%d\n", expected_next_ns, next_read_time_ns); + return 0; + } + + return 1; +} + +static void test_all_is_read_scheduled(void) +{ + const int HIGHEST_NS = 999999999; + int n_tests_passed = 0; + + printk("gator: running tests on %s\n", __FILE__); + + n_tests_passed += test_is_read_scheduled(0, 0, 0, 0, 1, READ_INTERVAL_NSEC); /* Null time */ + n_tests_passed += test_is_read_scheduled(100, 1000, 0, 0, 1, READ_INTERVAL_NSEC + 1000); /* Initial values */ + + n_tests_passed += test_is_read_scheduled(100, HIGHEST_NS, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500); + n_tests_passed += test_is_read_scheduled(101, 0001, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500 - NSEC_PER_SEC); + n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500 - NSEC_PER_SEC, 1, 600 + READ_INTERVAL_NSEC); + + n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500, 1, 600 + READ_INTERVAL_NSEC); + + printk("gator: %d tests passed\n", n_tests_passed); +} diff --git a/drivers/gator/gator_events_meminfo.c b/drivers/gator/gator_events_meminfo.c new file mode 100644 index 00000000000..c1e360d1289 --- /dev/null +++ b/drivers/gator/gator_events_meminfo.c @@ -0,0 +1,240 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" +#include <linux/workqueue.h> +#include <trace/events/kmem.h> +#include <linux/hardirq.h> + +#define MEMINFO_MEMFREE 0 +#define MEMINFO_MEMUSED 1 +#define MEMINFO_BUFFERRAM 2 +#define MEMINFO_TOTAL 3 + +static ulong meminfo_global_enabled; +static ulong meminfo_enabled[MEMINFO_TOTAL]; +static ulong meminfo_key[MEMINFO_TOTAL]; +static unsigned long long meminfo_buffer[MEMINFO_TOTAL * 2]; +static int meminfo_length = 0; +static unsigned int mem_event = 0; +static bool new_data_avail; + +static void wq_sched_handler(struct work_struct *wsptr); +DECLARE_WORK(work, wq_sched_handler); +static struct timer_list meminfo_wake_up_timer; +static void meminfo_wake_up_handler(unsigned long unused_data); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) +GATOR_DEFINE_PROBE(mm_page_free_direct, TP_PROTO(struct page *page, unsigned int order)) +#else +GATOR_DEFINE_PROBE(mm_page_free, TP_PROTO(struct page *page, unsigned int order)) +#endif +{ + mem_event++; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) +GATOR_DEFINE_PROBE(mm_pagevec_free, TP_PROTO(struct page *page, int cold)) +#else +GATOR_DEFINE_PROBE(mm_page_free_batched, TP_PROTO(struct page *page, int cold)) +#endif +{ + mem_event++; +} + +GATOR_DEFINE_PROBE(mm_page_alloc, TP_PROTO(struct page *page, unsigned int order, gfp_t gfp_flags, int migratetype)) +{ + mem_event++; +} + +static int gator_events_meminfo_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int i; + + for (i = 0; i < MEMINFO_TOTAL; i++) { + switch (i) { + case MEMINFO_MEMFREE: + dir = gatorfs_mkdir(sb, root, "Linux_meminfo_memfree"); + break; + case MEMINFO_MEMUSED: + dir = gatorfs_mkdir(sb, root, "Linux_meminfo_memused"); + break; + case MEMINFO_BUFFERRAM: + dir = gatorfs_mkdir(sb, root, "Linux_meminfo_bufferram"); + break; + default: + return -1; + } + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &meminfo_enabled[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &meminfo_key[i]); + } + + return 0; +} + +static int gator_events_meminfo_start(void) +{ + int i; + + new_data_avail = true; + for (i = 0; i < MEMINFO_TOTAL; i++) { + if (meminfo_enabled[i]) { + meminfo_global_enabled = 1; + } + } + + if (meminfo_global_enabled == 0) + return 0; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) + if (GATOR_REGISTER_TRACE(mm_page_free_direct)) +#else + if (GATOR_REGISTER_TRACE(mm_page_free)) +#endif + goto mm_page_free_exit; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) + if (GATOR_REGISTER_TRACE(mm_pagevec_free)) +#else + if (GATOR_REGISTER_TRACE(mm_page_free_batched)) +#endif + goto mm_page_free_batched_exit; + if (GATOR_REGISTER_TRACE(mm_page_alloc)) + goto mm_page_alloc_exit; + + setup_timer(&meminfo_wake_up_timer, meminfo_wake_up_handler, 0); + return 0; + +mm_page_alloc_exit: +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) + GATOR_UNREGISTER_TRACE(mm_pagevec_free); +#else + GATOR_UNREGISTER_TRACE(mm_page_free_batched); +#endif +mm_page_free_batched_exit: +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) + GATOR_UNREGISTER_TRACE(mm_page_free_direct); +#else + GATOR_UNREGISTER_TRACE(mm_page_free); +#endif +mm_page_free_exit: + return -1; +} + +static void gator_events_meminfo_stop(void) +{ + int i; + + if (meminfo_global_enabled) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) + GATOR_UNREGISTER_TRACE(mm_page_free_direct); + GATOR_UNREGISTER_TRACE(mm_pagevec_free); +#else + GATOR_UNREGISTER_TRACE(mm_page_free); + GATOR_UNREGISTER_TRACE(mm_page_free_batched); +#endif + GATOR_UNREGISTER_TRACE(mm_page_alloc); + + del_timer_sync(&meminfo_wake_up_timer); + } + + meminfo_global_enabled = 0; + for (i = 0; i < MEMINFO_TOTAL; i++) { + meminfo_enabled[i] = 0; + } +} + +// Must be run in process context as the kernel function si_meminfo() can sleep +static void wq_sched_handler(struct work_struct *wsptr) +{ + struct sysinfo info; + int i, len; + unsigned long long value; + + meminfo_length = len = 0; + + si_meminfo(&info); + for (i = 0; i < MEMINFO_TOTAL; i++) { + if (meminfo_enabled[i]) { + switch (i) { + case MEMINFO_MEMFREE: + value = info.freeram * PAGE_SIZE; + break; + case MEMINFO_MEMUSED: + value = (info.totalram - info.freeram) * PAGE_SIZE; + break; + case MEMINFO_BUFFERRAM: + value = info.bufferram * PAGE_SIZE; + break; + default: + value = 0; + break; + } + meminfo_buffer[len++] = (unsigned long long)meminfo_key[i]; + meminfo_buffer[len++] = value; + } + } + + meminfo_length = len; + new_data_avail = true; +} + +static void meminfo_wake_up_handler(unsigned long unused_data) +{ + // had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater + schedule_work(&work); +} + +static int gator_events_meminfo_read(long long **buffer) +{ + static unsigned int last_mem_event = 0; + + if (!on_primary_core() || !meminfo_global_enabled) + return 0; + + if (last_mem_event != mem_event) { + last_mem_event = mem_event; + mod_timer(&meminfo_wake_up_timer, jiffies + 1); + } + + if (!new_data_avail) + return 0; + + new_data_avail = false; + + if (buffer) + *buffer = meminfo_buffer; + + return meminfo_length; +} + +static struct gator_interface gator_events_meminfo_interface = { + .create_files = gator_events_meminfo_create_files, + .start = gator_events_meminfo_start, + .stop = gator_events_meminfo_stop, + .read64 = gator_events_meminfo_read, +}; + +int gator_events_meminfo_init(void) +{ + int i; + + meminfo_global_enabled = 0; + for (i = 0; i < MEMINFO_TOTAL; i++) { + meminfo_enabled[i] = 0; + meminfo_key[i] = gator_events_get_key(); + } + + return gator_events_install(&gator_events_meminfo_interface); +} + +gator_events_init(gator_events_meminfo_init); diff --git a/drivers/gator/gator_events_mmaped.c b/drivers/gator/gator_events_mmaped.c new file mode 100644 index 00000000000..0027564ea4d --- /dev/null +++ b/drivers/gator/gator_events_mmaped.c @@ -0,0 +1,229 @@ +/* + * Example events provider + * + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Similar entries to those below must be present in the events.xml file. + * To add them to the events.xml, create an events-mmap.xml with the + * following contents and rebuild gatord: + * + * <counter_set name="mmaped_cnt" count="3"/> + * <category name="mmaped" counter_set="mmaped_cnt" per_cpu="no"> + * <event event="0x0" title="Simulated" name="Sine" display="maximum" average_selection="yes" description="Sort-of-sine"/> + * <event event="0x1" title="Simulated" name="Triangle" display="maximum" average_selection="yes" description="Triangular wave"/> + * <event event="0x2" title="Simulated" name="PWM" display="maximum" average_selection="yes" description="PWM Signal"/> + * </category> + */ + +#include <linux/init.h> +#include <linux/io.h> +#include <linux/ratelimit.h> + +#include "gator.h" + +#define MMAPED_COUNTERS_NUM 3 + +static struct { + unsigned long enabled; + unsigned long event; + unsigned long key; +} mmaped_counters[MMAPED_COUNTERS_NUM]; + +static int mmaped_buffer[MMAPED_COUNTERS_NUM * 2]; + +#ifdef TODO +static void __iomem *mmaped_base; +#endif + +#ifndef TODO +static s64 prev_time; +#endif + +/* Adds mmaped_cntX directories and enabled, event, and key files to /dev/gator/events */ +static int gator_events_mmaped_create_files(struct super_block *sb, + struct dentry *root) +{ + int i; + + for (i = 0; i < MMAPED_COUNTERS_NUM; i++) { + char buf[16]; + struct dentry *dir; + + snprintf(buf, sizeof(buf), "mmaped_cnt%d", i); + dir = gatorfs_mkdir(sb, root, buf); + if (WARN_ON(!dir)) + return -1; + gatorfs_create_ulong(sb, dir, "enabled", + &mmaped_counters[i].enabled); + gatorfs_create_ulong(sb, dir, "event", + &mmaped_counters[i].event); + gatorfs_create_ro_ulong(sb, dir, "key", + &mmaped_counters[i].key); + } + + return 0; +} + +static int gator_events_mmaped_start(void) +{ +#ifdef TODO + for (i = 0; i < MMAPED_COUNTERS_NUM; i++) + writel(mmaped_counters[i].event, + mmaped_base + COUNTERS_CONFIG_OFFSET[i]); + + writel(ENABLED, COUNTERS_CONTROL_OFFSET); +#endif + +#ifndef TODO + struct timespec ts; + getnstimeofday(&ts); + prev_time = timespec_to_ns(&ts); +#endif + + return 0; +} + +static void gator_events_mmaped_stop(void) +{ +#ifdef TODO + writel(DISABLED, COUNTERS_CONTROL_OFFSET); +#endif +} + +#ifndef TODO +/* This function "simulates" counters, generating values of fancy + * functions like sine or triangle... */ +static int mmaped_simulate(int counter, int delta_in_us) +{ + int result = 0; + + switch (counter) { + case 0: /* sort-of-sine */ + { + static int t = 0; + int x; + + t += delta_in_us; + if (t > 2048000) + t = 0; + + if (t % 1024000 < 512000) + x = 512000 - (t % 512000); + else + x = t % 512000; + + result = 32 * x / 512000; + result = result * result; + + if (t < 1024000) + result = 1922 - result; + } + break; + case 1: /* triangle */ + { + static int v, d = 1; + + v = v + d * delta_in_us; + if (v < 0) { + v = 0; + d = 1; + } else if (v > 1000000) { + v = 1000000; + d = -1; + } + + result = v; + } + break; + case 2: /* PWM signal */ + { + static int dc, x, t = 0; + + t += delta_in_us; + if (t > 1000000) + t = 0; + if (x / 1000000 != (x + delta_in_us) / 1000000) + dc = (dc + 100000) % 1000000; + x += delta_in_us; + + result = t < dc ? 0 : 10; + } + break; + } + + return result; +} +#endif + +static int gator_events_mmaped_read(int **buffer) +{ + int i; + int len = 0; +#ifndef TODO + int delta_in_us; + struct timespec ts; + s64 time; +#endif + + /* System wide counters - read from one core only */ + if (!on_primary_core()) + return 0; + +#ifndef TODO + getnstimeofday(&ts); + time = timespec_to_ns(&ts); + delta_in_us = (int)(time - prev_time) / 1000; + prev_time = time; +#endif + + for (i = 0; i < MMAPED_COUNTERS_NUM; i++) { + if (mmaped_counters[i].enabled) { + mmaped_buffer[len++] = mmaped_counters[i].key; +#ifdef TODO + mmaped_buffer[len++] = + readl(mmaped_base + COUNTERS_VALUE_OFFSET[i]); +#else + mmaped_buffer[len++] = + mmaped_simulate(mmaped_counters[i].event, + delta_in_us); +#endif + } + } + + if (buffer) + *buffer = mmaped_buffer; + + return len; +} + +static struct gator_interface gator_events_mmaped_interface = { + .create_files = gator_events_mmaped_create_files, + .start = gator_events_mmaped_start, + .stop = gator_events_mmaped_stop, + .read = gator_events_mmaped_read, +}; + +/* Must not be static! */ +int __init gator_events_mmaped_init(void) +{ + int i; + +#ifdef TODO + mmaped_base = ioremap(COUNTERS_PHYS_ADDR, SZ_4K); + if (!mmaped_base) + return -ENOMEM; +#endif + + for (i = 0; i < MMAPED_COUNTERS_NUM; i++) { + mmaped_counters[i].enabled = 0; + mmaped_counters[i].key = gator_events_get_key(); + } + + return gator_events_install(&gator_events_mmaped_interface); +} + +gator_events_init(gator_events_mmaped_init); diff --git a/drivers/gator/gator_events_net.c b/drivers/gator/gator_events_net.c new file mode 100644 index 00000000000..80cdee41ae3 --- /dev/null +++ b/drivers/gator/gator_events_net.c @@ -0,0 +1,171 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" +#include <linux/netdevice.h> +#include <linux/hardirq.h> + +#define NETRX 0 +#define NETTX 1 +#define TOTALNET 2 + +static ulong netrx_enabled; +static ulong nettx_enabled; +static ulong netrx_key; +static ulong nettx_key; +static int rx_total, tx_total; +static ulong netPrev[TOTALNET]; +static int netGet[TOTALNET * 4]; + +static struct timer_list net_wake_up_timer; + +// Must be run in process context as the kernel function dev_get_stats() can sleep +static void get_network_stats(struct work_struct *wsptr) +{ + int rx = 0, tx = 0; + struct net_device *dev; + + for_each_netdev(&init_net, dev) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) + const struct net_device_stats *stats = dev_get_stats(dev); +#else + struct rtnl_link_stats64 temp; + const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); +#endif + rx += stats->rx_bytes; + tx += stats->tx_bytes; + } + rx_total = rx; + tx_total = tx; +} + +DECLARE_WORK(wq_get_stats, get_network_stats); + +static void net_wake_up_handler(unsigned long unused_data) +{ + // had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater + schedule_work(&wq_get_stats); +} + +static void calculate_delta(int *rx, int *tx) +{ + int rx_calc, tx_calc; + + rx_calc = (int)(rx_total - netPrev[NETRX]); + if (rx_calc < 0) + rx_calc = 0; + netPrev[NETRX] += rx_calc; + + tx_calc = (int)(tx_total - netPrev[NETTX]); + if (tx_calc < 0) + tx_calc = 0; + netPrev[NETTX] += tx_calc; + + *rx = rx_calc; + *tx = tx_calc; +} + +static int gator_events_net_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + + dir = gatorfs_mkdir(sb, root, "Linux_net_rx"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &netrx_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &netrx_key); + + dir = gatorfs_mkdir(sb, root, "Linux_net_tx"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &nettx_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &nettx_key); + + return 0; +} + +static int gator_events_net_start(void) +{ + get_network_stats(0); + netPrev[NETRX] = rx_total; + netPrev[NETTX] = tx_total; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) + setup_timer(&net_wake_up_timer, net_wake_up_handler, 0); +#else + setup_deferrable_timer_on_stack(&net_wake_up_timer, net_wake_up_handler, 0); +#endif + return 0; +} + +static void gator_events_net_stop(void) +{ + del_timer_sync(&net_wake_up_timer); + netrx_enabled = 0; + nettx_enabled = 0; +} + +static int gator_events_net_read(int **buffer) +{ + int len, rx_delta, tx_delta; + static int last_rx_delta = 0, last_tx_delta = 0; + + if (!on_primary_core()) + return 0; + + if (!netrx_enabled && !nettx_enabled) + return 0; + + mod_timer(&net_wake_up_timer, jiffies + 1); + + calculate_delta(&rx_delta, &tx_delta); + + len = 0; + if (netrx_enabled && last_rx_delta != rx_delta) { + last_rx_delta = rx_delta; + netGet[len++] = netrx_key; + netGet[len++] = 0; // indicates to Streamline that rx_delta bytes were transmitted now, not since the last message + netGet[len++] = netrx_key; + netGet[len++] = rx_delta; + } + + if (nettx_enabled && last_tx_delta != tx_delta) { + last_tx_delta = tx_delta; + netGet[len++] = nettx_key; + netGet[len++] = 0; // indicates to Streamline that tx_delta bytes were transmitted now, not since the last message + netGet[len++] = nettx_key; + netGet[len++] = tx_delta; + } + + if (buffer) + *buffer = netGet; + + return len; +} + +static struct gator_interface gator_events_net_interface = { + .create_files = gator_events_net_create_files, + .start = gator_events_net_start, + .stop = gator_events_net_stop, + .read = gator_events_net_read, +}; + +int gator_events_net_init(void) +{ + netrx_key = gator_events_get_key(); + nettx_key = gator_events_get_key(); + + netrx_enabled = 0; + nettx_enabled = 0; + + return gator_events_install(&gator_events_net_interface); +} + +gator_events_init(gator_events_net_init); diff --git a/drivers/gator/gator_events_perf_pmu.c b/drivers/gator/gator_events_perf_pmu.c new file mode 100644 index 00000000000..34a6bc78756 --- /dev/null +++ b/drivers/gator/gator_events_perf_pmu.c @@ -0,0 +1,500 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/slab.h> +#include <linux/perf_event.h> +#include "gator.h" + +// gator_events_armvX.c is used for Linux 2.6.x +#if GATOR_PERF_PMU_SUPPORT + +extern bool event_based_sampling; + +#define CNTMAX 16 +#define CCI_400 4 +// + 1 for the cci-400 cycles counter +#define UCCNT (CCI_400 + 1) + +struct gator_attr { + char name[40]; + unsigned long enabled; + unsigned long type; + unsigned long event; + unsigned long count; + unsigned long key; +}; + +static struct gator_attr attrs[CNTMAX]; +static int attr_count; +static struct gator_attr uc_attrs[UCCNT]; +static int uc_attr_count; + +struct gator_event { + int curr; + int prev; + int prev_delta; + bool zero; + struct perf_event *pevent; + struct perf_event_attr *pevent_attr; +}; + +static DEFINE_PER_CPU(struct gator_event[CNTMAX], events); +static struct gator_event uc_events[UCCNT]; +static DEFINE_PER_CPU(int[(CNTMAX + UCCNT)*2], perf_cnt); + +static void gator_events_perf_pmu_stop(void); + +static int __create_files(struct super_block *sb, struct dentry *root, struct gator_attr *const attr) +{ + struct dentry *dir; + + if (attr->name[0] == '\0') { + return 0; + } + dir = gatorfs_mkdir(sb, root, attr->name); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &attr->enabled); + gatorfs_create_ulong(sb, dir, "count", &attr->count); + gatorfs_create_ro_ulong(sb, dir, "key", &attr->key); + gatorfs_create_ulong(sb, dir, "event", &attr->event); + + return 0; +} + +static int gator_events_perf_pmu_create_files(struct super_block *sb, struct dentry *root) +{ + int cnt; + + for (cnt = 0; cnt < attr_count; cnt++) { + if (__create_files(sb, root, &attrs[cnt]) != 0) { + return -1; + } + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + if (__create_files(sb, root, &uc_attrs[cnt]) != 0) { + return -1; + } + } + + return 0; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) +static void ebs_overflow_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs) +#else +static void ebs_overflow_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) +#endif +{ + gator_backtrace_handler(regs); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) +static void dummy_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs) +#else +static void dummy_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) +#endif +{ +// Required as perf_event_create_kernel_counter() requires an overflow handler, even though all we do is poll +} + +static int gator_events_perf_pmu_read(int **buffer); + +static int gator_events_perf_pmu_online(int **buffer, bool migrate) +{ + return gator_events_perf_pmu_read(buffer); +} + +static void __online_dispatch(int cpu, bool migrate, struct gator_attr *const attr, struct gator_event *const event) +{ + perf_overflow_handler_t handler; + + event->zero = true; + + if (event->pevent != NULL || event->pevent_attr == 0 || migrate) { + return; + } + + if (attr->count > 0) { + handler = ebs_overflow_handler; + } else { + handler = dummy_handler; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler); +#else + event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler, 0); +#endif + if (IS_ERR(event->pevent)) { + pr_debug("gator: unable to online a counter on cpu %d\n", cpu); + event->pevent = NULL; + return; + } + + if (event->pevent->state != PERF_EVENT_STATE_ACTIVE) { + pr_debug("gator: inactive counter on cpu %d\n", cpu); + perf_event_release_kernel(event->pevent); + event->pevent = NULL; + return; + } +} + +static void gator_events_perf_pmu_online_dispatch(int cpu, bool migrate) +{ + int cnt; + + cpu = pcpu_to_lcpu(cpu); + + for (cnt = 0; cnt < attr_count; cnt++) { + __online_dispatch(cpu, migrate, &attrs[cnt], &per_cpu(events, cpu)[cnt]); + } + + if (cpu == 0) { + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __online_dispatch(cpu, migrate, &uc_attrs[cnt], &uc_events[cnt]); + } + } +} + +static void __offline_dispatch(int cpu, struct gator_event *const event) +{ + struct perf_event *pe = NULL; + + if (event->pevent) { + pe = event->pevent; + event->pevent = NULL; + } + + if (pe) { + perf_event_release_kernel(pe); + } +} + +static void gator_events_perf_pmu_offline_dispatch(int cpu, bool migrate) +{ + int cnt; + + if (migrate) { + return; + } + cpu = pcpu_to_lcpu(cpu); + + for (cnt = 0; cnt < attr_count; cnt++) { + __offline_dispatch(cpu, &per_cpu(events, cpu)[cnt]); + } + + if (cpu == 0) { + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __offline_dispatch(cpu, &uc_events[cnt]); + } + } +} + +static int __check_ebs(struct gator_attr *const attr) +{ + if (attr->count > 0) { + if (!event_based_sampling) { + event_based_sampling = true; + } else { + printk(KERN_WARNING "gator: Only one ebs counter is allowed\n"); + return -1; + } + } + + return 0; +} + +static int __start(struct gator_attr *const attr, struct gator_event *const event) +{ + u32 size = sizeof(struct perf_event_attr); + + event->pevent = NULL; + if (!attr->enabled) { // Skip disabled counters + return 0; + } + + event->prev = 0; + event->curr = 0; + event->prev_delta = 0; + event->pevent_attr = kmalloc(size, GFP_KERNEL); + if (!event->pevent_attr) { + gator_events_perf_pmu_stop(); + return -1; + } + + memset(event->pevent_attr, 0, size); + event->pevent_attr->type = attr->type; + event->pevent_attr->size = size; + event->pevent_attr->config = attr->event; + event->pevent_attr->sample_period = attr->count; + event->pevent_attr->pinned = 1; + + return 0; +} + +static int gator_events_perf_pmu_start(void) +{ + int cnt, cpu; + + event_based_sampling = false; + for (cnt = 0; cnt < attr_count; cnt++) { + if (__check_ebs(&attrs[cnt]) != 0) { + return -1; + } + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + if (__check_ebs(&uc_attrs[cnt]) != 0) { + return -1; + } + } + + for_each_present_cpu(cpu) { + for (cnt = 0; cnt < attr_count; cnt++) { + if (__start(&attrs[cnt], &per_cpu(events, cpu)[cnt]) != 0) { + return -1; + } + } + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + if (__start(&uc_attrs[cnt], &uc_events[cnt]) != 0) { + return -1; + } + } + + return 0; +} + +static void __event_stop(struct gator_event *const event) +{ + if (event->pevent_attr) { + kfree(event->pevent_attr); + event->pevent_attr = NULL; + } +} + +static void __attr_stop(struct gator_attr *const attr) +{ + attr->enabled = 0; + attr->event = 0; + attr->count = 0; +} + +static void gator_events_perf_pmu_stop(void) +{ + unsigned int cnt, cpu; + + for_each_present_cpu(cpu) { + for (cnt = 0; cnt < attr_count; cnt++) { + __event_stop(&per_cpu(events, cpu)[cnt]); + } + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __event_stop(&uc_events[cnt]); + } + + for (cnt = 0; cnt < attr_count; cnt++) { + __attr_stop(&attrs[cnt]); + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __attr_stop(&uc_attrs[cnt]); + } +} + +static void __read(int *const len, int cpu, struct gator_attr *const attr, struct gator_event *const event) +{ + int delta; + + struct perf_event *const ev = event->pevent; + if (ev != NULL && ev->state == PERF_EVENT_STATE_ACTIVE) { + /* After creating the perf counter in __online_dispatch, there + * is a race condition between gator_events_perf_pmu_online and + * gator_events_perf_pmu_read. So have + * gator_events_perf_pmu_online call gator_events_perf_pmu_read + * and in __read check to see if it's the first call after + * __online_dispatch and if so, run the online code. + */ + if (event->zero) { + ev->pmu->read(ev); + event->prev = event->curr = local64_read(&ev->count); + event->prev_delta = 0; + per_cpu(perf_cnt, cpu)[(*len)++] = attr->key; + per_cpu(perf_cnt, cpu)[(*len)++] = 0; + event->zero = false; + } else { + ev->pmu->read(ev); + event->curr = local64_read(&ev->count); + delta = event->curr - event->prev; + if (delta != 0 || delta != event->prev_delta) { + event->prev_delta = delta; + event->prev = event->curr; + per_cpu(perf_cnt, cpu)[(*len)++] = attr->key; + if (delta < 0) { + delta *= -1; + } + per_cpu(perf_cnt, cpu)[(*len)++] = delta; + } + } + } +} + +static int gator_events_perf_pmu_read(int **buffer) +{ + int cnt, len = 0; + const int cpu = get_logical_cpu(); + + for (cnt = 0; cnt < attr_count; cnt++) { + __read(&len, cpu, &attrs[cnt], &per_cpu(events, cpu)[cnt]); + } + + if (cpu == 0) { + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __read(&len, cpu, &uc_attrs[cnt], &uc_events[cnt]); + } + } + + if (buffer) { + *buffer = per_cpu(perf_cnt, cpu); + } + + return len; +} + +static struct gator_interface gator_events_perf_pmu_interface = { + .create_files = gator_events_perf_pmu_create_files, + .start = gator_events_perf_pmu_start, + .stop = gator_events_perf_pmu_stop, + .online = gator_events_perf_pmu_online, + .online_dispatch = gator_events_perf_pmu_online_dispatch, + .offline_dispatch = gator_events_perf_pmu_offline_dispatch, + .read = gator_events_perf_pmu_read, +}; + +static void __attr_init(struct gator_attr *const attr) +{ + attr->name[0] = '\0'; + attr->enabled = 0; + attr->type = 0; + attr->event = 0; + attr->count = 0; + attr->key = gator_events_get_key(); +} + +static void gator_events_perf_pmu_cci_init(const int type) +{ + int cnt; + + strncpy(uc_attrs[uc_attr_count].name, "cci-400_ccnt", sizeof(uc_attrs[uc_attr_count].name)); + uc_attrs[uc_attr_count].type = type; + ++uc_attr_count; + + for (cnt = 0; cnt < CCI_400; ++cnt, ++uc_attr_count) { + struct gator_attr *const attr = &uc_attrs[uc_attr_count]; + snprintf(attr->name, sizeof(attr->name), "cci-400_cnt%d", cnt); + attr->type = type; + } +} + +static void gator_events_perf_pmu_cpu_init(const struct gator_cpu *const gator_cpu, const int type) +{ + int cnt; + + snprintf(attrs[attr_count].name, sizeof(attrs[attr_count].name), "%s_ccnt", gator_cpu->pmnc_name); + attrs[attr_count].type = type; + ++attr_count; + + for (cnt = 0; cnt < gator_cpu->pmnc_counters; ++cnt, ++attr_count) { + struct gator_attr *const attr = &attrs[attr_count]; + snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", gator_cpu->pmnc_name, cnt); + attr->type = type; + } +} + +int gator_events_perf_pmu_init(void) +{ + struct perf_event_attr pea; + struct perf_event *pe; + const struct gator_cpu *gator_cpu; + int type; + int cpu; + int cnt; + bool found_cpu = false; + + for (cnt = 0; cnt < CNTMAX; cnt++) { + __attr_init(&attrs[cnt]); + } + for (cnt = 0; cnt < UCCNT; cnt++) { + __attr_init(&uc_attrs[cnt]); + } + + memset(&pea, 0, sizeof(pea)); + pea.size = sizeof(pea); + pea.config = 0xFF; + attr_count = 0; + uc_attr_count = 0; + for (type = PERF_TYPE_MAX; type < 0x20; ++type) { + pea.type = type; + + // A particular PMU may work on some but not all cores, so try on each core + pe = NULL; + for_each_present_cpu(cpu) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler); +#else + pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler, 0); +#endif + if (!IS_ERR(pe)) { + break; + } + } + // Assume that valid PMUs are contigious + if (IS_ERR(pe)) { + break; + } + + if (pe->pmu != NULL && type == pe->pmu->type) { + if (strcmp("CCI", pe->pmu->name) == 0) { + gator_events_perf_pmu_cci_init(type); + } else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) { + found_cpu = true; + gator_events_perf_pmu_cpu_init(gator_cpu, type); + } + } + + perf_event_release_kernel(pe); + } + + if (!found_cpu) { + const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(gator_cpuid()); + if (gator_cpu == NULL) { + return -1; + } + gator_events_perf_pmu_cpu_init(gator_cpu, PERF_TYPE_RAW); + } + + if (attr_count > CNTMAX) { + printk(KERN_ERR "gator: Too many perf counters\n"); + return -1; + } + + if (uc_attr_count > UCCNT) { + printk(KERN_ERR "gator: Too many perf uncore counters\n"); + return -1; + } + + return gator_events_install(&gator_events_perf_pmu_interface); +} + +gator_events_init(gator_events_perf_pmu_init); +#endif diff --git a/drivers/gator/gator_events_sched.c b/drivers/gator/gator_events_sched.c new file mode 100644 index 00000000000..461a0511143 --- /dev/null +++ b/drivers/gator/gator_events_sched.c @@ -0,0 +1,115 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include "gator.h" +#include <trace/events/sched.h> + +#define SCHED_SWITCH 0 +#define SCHED_TOTAL (SCHED_SWITCH+1) + +static ulong sched_switch_enabled; +static ulong sched_switch_key; +static DEFINE_PER_CPU(int[SCHED_TOTAL], schedCnt); +static DEFINE_PER_CPU(int[SCHED_TOTAL * 2], schedGet); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35) +GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next)) +#else +GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_struct *next)) +#endif +{ + unsigned long flags; + + // disable interrupts to synchronize with gator_events_sched_read() + // spinlocks not needed since percpu buffers are used + local_irq_save(flags); + per_cpu(schedCnt, get_physical_cpu())[SCHED_SWITCH]++; + local_irq_restore(flags); +} + +static int gator_events_sched_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + + /* switch */ + dir = gatorfs_mkdir(sb, root, "Linux_sched_switch"); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &sched_switch_enabled); + gatorfs_create_ro_ulong(sb, dir, "key", &sched_switch_key); + + return 0; +} + +static int gator_events_sched_start(void) +{ + // register tracepoints + if (sched_switch_enabled) + if (GATOR_REGISTER_TRACE(sched_switch)) + goto sched_switch_exit; + pr_debug("gator: registered scheduler event tracepoints\n"); + + return 0; + + // unregister tracepoints on error +sched_switch_exit: + pr_err("gator: scheduler event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n"); + + return -1; +} + +static void gator_events_sched_stop(void) +{ + if (sched_switch_enabled) + GATOR_UNREGISTER_TRACE(sched_switch); + pr_debug("gator: unregistered scheduler event tracepoints\n"); + + sched_switch_enabled = 0; +} + +static int gator_events_sched_read(int **buffer) +{ + unsigned long flags; + int len, value; + int cpu = get_physical_cpu(); + + len = 0; + if (sched_switch_enabled) { + local_irq_save(flags); + value = per_cpu(schedCnt, cpu)[SCHED_SWITCH]; + per_cpu(schedCnt, cpu)[SCHED_SWITCH] = 0; + local_irq_restore(flags); + per_cpu(schedGet, cpu)[len++] = sched_switch_key; + per_cpu(schedGet, cpu)[len++] = value; + } + + if (buffer) + *buffer = per_cpu(schedGet, cpu); + + return len; +} + +static struct gator_interface gator_events_sched_interface = { + .create_files = gator_events_sched_create_files, + .start = gator_events_sched_start, + .stop = gator_events_sched_stop, + .read = gator_events_sched_read, +}; + +int gator_events_sched_init(void) +{ + sched_switch_enabled = 0; + + sched_switch_key = gator_events_get_key(); + + return gator_events_install(&gator_events_sched_interface); +} + +gator_events_init(gator_events_sched_init); diff --git a/drivers/gator/gator_events_scorpion.c b/drivers/gator/gator_events_scorpion.c new file mode 100644 index 00000000000..aaf306a4b4c --- /dev/null +++ b/drivers/gator/gator_events_scorpion.c @@ -0,0 +1,676 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "gator.h" + +// gator_events_perf_pmu.c is used if perf is supported +#if GATOR_NO_PERF_SUPPORT + +static const char *pmnc_name; +static int pmnc_counters; + +// Per-CPU PMNC: config reg +#define PMNC_E (1 << 0) /* Enable all counters */ +#define PMNC_P (1 << 1) /* Reset all counters */ +#define PMNC_C (1 << 2) /* Cycle counter reset */ +#define PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define PMNC_X (1 << 4) /* Export to ETM */ +#define PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug */ +#define PMNC_MASK 0x3f /* Mask for writable bits */ + +// ccnt reg +#define CCNT_REG (1 << 31) + +#define CCNT 0 +#define CNT0 1 +#define CNTMAX (4+1) + +static unsigned long pmnc_enabled[CNTMAX]; +static unsigned long pmnc_event[CNTMAX]; +static unsigned long pmnc_key[CNTMAX]; + +static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt); + +enum scorpion_perf_types { + SCORPION_ICACHE_EXPL_INV = 0x4c, + SCORPION_ICACHE_MISS = 0x4d, + SCORPION_ICACHE_ACCESS = 0x4e, + SCORPION_ICACHE_CACHEREQ_L2 = 0x4f, + SCORPION_ICACHE_NOCACHE_L2 = 0x50, + SCORPION_HIQUP_NOPED = 0x51, + SCORPION_DATA_ABORT = 0x52, + SCORPION_IRQ = 0x53, + SCORPION_FIQ = 0x54, + SCORPION_ALL_EXCPT = 0x55, + SCORPION_UNDEF = 0x56, + SCORPION_SVC = 0x57, + SCORPION_SMC = 0x58, + SCORPION_PREFETCH_ABORT = 0x59, + SCORPION_INDEX_CHECK = 0x5a, + SCORPION_NULL_CHECK = 0x5b, + SCORPION_EXPL_ICIALLU = 0x5c, + SCORPION_IMPL_ICIALLU = 0x5d, + SCORPION_NONICIALLU_BTAC_INV = 0x5e, + SCORPION_ICIMVAU_IMPL_ICIALLU = 0x5f, + SCORPION_SPIPE_ONLY_CYCLES = 0x60, + SCORPION_XPIPE_ONLY_CYCLES = 0x61, + SCORPION_DUAL_CYCLES = 0x62, + SCORPION_DISPATCH_ANY_CYCLES = 0x63, + SCORPION_FIFO_FULLBLK_CMT = 0x64, + SCORPION_FAIL_COND_INST = 0x65, + SCORPION_PASS_COND_INST = 0x66, + SCORPION_ALLOW_VU_CLK = 0x67, + SCORPION_VU_IDLE = 0x68, + SCORPION_ALLOW_L2_CLK = 0x69, + SCORPION_L2_IDLE = 0x6a, + SCORPION_DTLB_IMPL_INV_SCTLR_DACR = 0x6b, + SCORPION_DTLB_EXPL_INV = 0x6c, + SCORPION_DTLB_MISS = 0x6d, + SCORPION_DTLB_ACCESS = 0x6e, + SCORPION_ITLB_MISS = 0x6f, + SCORPION_ITLB_IMPL_INV = 0x70, + SCORPION_ITLB_EXPL_INV = 0x71, + SCORPION_UTLB_D_MISS = 0x72, + SCORPION_UTLB_D_ACCESS = 0x73, + SCORPION_UTLB_I_MISS = 0x74, + SCORPION_UTLB_I_ACCESS = 0x75, + SCORPION_UTLB_INV_ASID = 0x76, + SCORPION_UTLB_INV_MVA = 0x77, + SCORPION_UTLB_INV_ALL = 0x78, + SCORPION_S2_HOLD_RDQ_UNAVAIL = 0x79, + SCORPION_S2_HOLD = 0x7a, + SCORPION_S2_HOLD_DEV_OP = 0x7b, + SCORPION_S2_HOLD_ORDER = 0x7c, + SCORPION_S2_HOLD_BARRIER = 0x7d, + SCORPION_VIU_DUAL_CYCLE = 0x7e, + SCORPION_VIU_SINGLE_CYCLE = 0x7f, + SCORPION_VX_PIPE_WAR_STALL_CYCLES = 0x80, + SCORPION_VX_PIPE_WAW_STALL_CYCLES = 0x81, + SCORPION_VX_PIPE_RAW_STALL_CYCLES = 0x82, + SCORPION_VX_PIPE_LOAD_USE_STALL = 0x83, + SCORPION_VS_PIPE_WAR_STALL_CYCLES = 0x84, + SCORPION_VS_PIPE_WAW_STALL_CYCLES = 0x85, + SCORPION_VS_PIPE_RAW_STALL_CYCLES = 0x86, + SCORPION_EXCEPTIONS_INV_OPERATION = 0x87, + SCORPION_EXCEPTIONS_DIV_BY_ZERO = 0x88, + SCORPION_COND_INST_FAIL_VX_PIPE = 0x89, + SCORPION_COND_INST_FAIL_VS_PIPE = 0x8a, + SCORPION_EXCEPTIONS_OVERFLOW = 0x8b, + SCORPION_EXCEPTIONS_UNDERFLOW = 0x8c, + SCORPION_EXCEPTIONS_DENORM = 0x8d, +#ifdef CONFIG_ARCH_MSM_SCORPIONMP + SCORPIONMP_NUM_BARRIERS = 0x8e, + SCORPIONMP_BARRIER_CYCLES = 0x8f, +#else + SCORPION_BANK_AB_HIT = 0x8e, + SCORPION_BANK_AB_ACCESS = 0x8f, + SCORPION_BANK_CD_HIT = 0x90, + SCORPION_BANK_CD_ACCESS = 0x91, + SCORPION_BANK_AB_DSIDE_HIT = 0x92, + SCORPION_BANK_AB_DSIDE_ACCESS = 0x93, + SCORPION_BANK_CD_DSIDE_HIT = 0x94, + SCORPION_BANK_CD_DSIDE_ACCESS = 0x95, + SCORPION_BANK_AB_ISIDE_HIT = 0x96, + SCORPION_BANK_AB_ISIDE_ACCESS = 0x97, + SCORPION_BANK_CD_ISIDE_HIT = 0x98, + SCORPION_BANK_CD_ISIDE_ACCESS = 0x99, + SCORPION_ISIDE_RD_WAIT = 0x9a, + SCORPION_DSIDE_RD_WAIT = 0x9b, + SCORPION_BANK_BYPASS_WRITE = 0x9c, + SCORPION_BANK_AB_NON_CASTOUT = 0x9d, + SCORPION_BANK_AB_L2_CASTOUT = 0x9e, + SCORPION_BANK_CD_NON_CASTOUT = 0x9f, + SCORPION_BANK_CD_L2_CASTOUT = 0xa0, +#endif + MSM_MAX_EVT +}; + +struct scorp_evt { + u32 evt_type; + u32 val; + u8 grp; + u32 evt_type_act; +}; + +static const struct scorp_evt sc_evt[] = { + {SCORPION_ICACHE_EXPL_INV, 0x80000500, 0, 0x4d}, + {SCORPION_ICACHE_MISS, 0x80050000, 0, 0x4e}, + {SCORPION_ICACHE_ACCESS, 0x85000000, 0, 0x4f}, + {SCORPION_ICACHE_CACHEREQ_L2, 0x86000000, 0, 0x4f}, + {SCORPION_ICACHE_NOCACHE_L2, 0x87000000, 0, 0x4f}, + {SCORPION_HIQUP_NOPED, 0x80080000, 0, 0x4e}, + {SCORPION_DATA_ABORT, 0x8000000a, 0, 0x4c}, + {SCORPION_IRQ, 0x80000a00, 0, 0x4d}, + {SCORPION_FIQ, 0x800a0000, 0, 0x4e}, + {SCORPION_ALL_EXCPT, 0x8a000000, 0, 0x4f}, + {SCORPION_UNDEF, 0x8000000b, 0, 0x4c}, + {SCORPION_SVC, 0x80000b00, 0, 0x4d}, + {SCORPION_SMC, 0x800b0000, 0, 0x4e}, + {SCORPION_PREFETCH_ABORT, 0x8b000000, 0, 0x4f}, + {SCORPION_INDEX_CHECK, 0x8000000c, 0, 0x4c}, + {SCORPION_NULL_CHECK, 0x80000c00, 0, 0x4d}, + {SCORPION_EXPL_ICIALLU, 0x8000000d, 0, 0x4c}, + {SCORPION_IMPL_ICIALLU, 0x80000d00, 0, 0x4d}, + {SCORPION_NONICIALLU_BTAC_INV, 0x800d0000, 0, 0x4e}, + {SCORPION_ICIMVAU_IMPL_ICIALLU, 0x8d000000, 0, 0x4f}, + + {SCORPION_SPIPE_ONLY_CYCLES, 0x80000600, 1, 0x51}, + {SCORPION_XPIPE_ONLY_CYCLES, 0x80060000, 1, 0x52}, + {SCORPION_DUAL_CYCLES, 0x86000000, 1, 0x53}, + {SCORPION_DISPATCH_ANY_CYCLES, 0x89000000, 1, 0x53}, + {SCORPION_FIFO_FULLBLK_CMT, 0x8000000d, 1, 0x50}, + {SCORPION_FAIL_COND_INST, 0x800d0000, 1, 0x52}, + {SCORPION_PASS_COND_INST, 0x8d000000, 1, 0x53}, + {SCORPION_ALLOW_VU_CLK, 0x8000000e, 1, 0x50}, + {SCORPION_VU_IDLE, 0x80000e00, 1, 0x51}, + {SCORPION_ALLOW_L2_CLK, 0x800e0000, 1, 0x52}, + {SCORPION_L2_IDLE, 0x8e000000, 1, 0x53}, + + {SCORPION_DTLB_IMPL_INV_SCTLR_DACR, 0x80000001, 2, 0x54}, + {SCORPION_DTLB_EXPL_INV, 0x80000100, 2, 0x55}, + {SCORPION_DTLB_MISS, 0x80010000, 2, 0x56}, + {SCORPION_DTLB_ACCESS, 0x81000000, 2, 0x57}, + {SCORPION_ITLB_MISS, 0x80000200, 2, 0x55}, + {SCORPION_ITLB_IMPL_INV, 0x80020000, 2, 0x56}, + {SCORPION_ITLB_EXPL_INV, 0x82000000, 2, 0x57}, + {SCORPION_UTLB_D_MISS, 0x80000003, 2, 0x54}, + {SCORPION_UTLB_D_ACCESS, 0x80000300, 2, 0x55}, + {SCORPION_UTLB_I_MISS, 0x80030000, 2, 0x56}, + {SCORPION_UTLB_I_ACCESS, 0x83000000, 2, 0x57}, + {SCORPION_UTLB_INV_ASID, 0x80000400, 2, 0x55}, + {SCORPION_UTLB_INV_MVA, 0x80040000, 2, 0x56}, + {SCORPION_UTLB_INV_ALL, 0x84000000, 2, 0x57}, + {SCORPION_S2_HOLD_RDQ_UNAVAIL, 0x80000800, 2, 0x55}, + {SCORPION_S2_HOLD, 0x88000000, 2, 0x57}, + {SCORPION_S2_HOLD_DEV_OP, 0x80000900, 2, 0x55}, + {SCORPION_S2_HOLD_ORDER, 0x80090000, 2, 0x56}, + {SCORPION_S2_HOLD_BARRIER, 0x89000000, 2, 0x57}, + + {SCORPION_VIU_DUAL_CYCLE, 0x80000001, 4, 0x5c}, + {SCORPION_VIU_SINGLE_CYCLE, 0x80000100, 4, 0x5d}, + {SCORPION_VX_PIPE_WAR_STALL_CYCLES, 0x80000005, 4, 0x5c}, + {SCORPION_VX_PIPE_WAW_STALL_CYCLES, 0x80000500, 4, 0x5d}, + {SCORPION_VX_PIPE_RAW_STALL_CYCLES, 0x80050000, 4, 0x5e}, + {SCORPION_VX_PIPE_LOAD_USE_STALL, 0x80000007, 4, 0x5c}, + {SCORPION_VS_PIPE_WAR_STALL_CYCLES, 0x80000008, 4, 0x5c}, + {SCORPION_VS_PIPE_WAW_STALL_CYCLES, 0x80000800, 4, 0x5d}, + {SCORPION_VS_PIPE_RAW_STALL_CYCLES, 0x80080000, 4, 0x5e}, + {SCORPION_EXCEPTIONS_INV_OPERATION, 0x8000000b, 4, 0x5c}, + {SCORPION_EXCEPTIONS_DIV_BY_ZERO, 0x80000b00, 4, 0x5d}, + {SCORPION_COND_INST_FAIL_VX_PIPE, 0x800b0000, 4, 0x5e}, + {SCORPION_COND_INST_FAIL_VS_PIPE, 0x8b000000, 4, 0x5f}, + {SCORPION_EXCEPTIONS_OVERFLOW, 0x8000000c, 4, 0x5c}, + {SCORPION_EXCEPTIONS_UNDERFLOW, 0x80000c00, 4, 0x5d}, + {SCORPION_EXCEPTIONS_DENORM, 0x8c000000, 4, 0x5f}, + +#ifdef CONFIG_ARCH_MSM_SCORPIONMP + {SCORPIONMP_NUM_BARRIERS, 0x80000e00, 3, 0x59}, + {SCORPIONMP_BARRIER_CYCLES, 0x800e0000, 3, 0x5a}, +#else + {SCORPION_BANK_AB_HIT, 0x80000001, 3, 0x58}, + {SCORPION_BANK_AB_ACCESS, 0x80000100, 3, 0x59}, + {SCORPION_BANK_CD_HIT, 0x80010000, 3, 0x5a}, + {SCORPION_BANK_CD_ACCESS, 0x81000000, 3, 0x5b}, + {SCORPION_BANK_AB_DSIDE_HIT, 0x80000002, 3, 0x58}, + {SCORPION_BANK_AB_DSIDE_ACCESS, 0x80000200, 3, 0x59}, + {SCORPION_BANK_CD_DSIDE_HIT, 0x80020000, 3, 0x5a}, + {SCORPION_BANK_CD_DSIDE_ACCESS, 0x82000000, 3, 0x5b}, + {SCORPION_BANK_AB_ISIDE_HIT, 0x80000003, 3, 0x58}, + {SCORPION_BANK_AB_ISIDE_ACCESS, 0x80000300, 3, 0x59}, + {SCORPION_BANK_CD_ISIDE_HIT, 0x80030000, 3, 0x5a}, + {SCORPION_BANK_CD_ISIDE_ACCESS, 0x83000000, 3, 0x5b}, + {SCORPION_ISIDE_RD_WAIT, 0x80000009, 3, 0x58}, + {SCORPION_DSIDE_RD_WAIT, 0x80090000, 3, 0x5a}, + {SCORPION_BANK_BYPASS_WRITE, 0x8000000a, 3, 0x58}, + {SCORPION_BANK_AB_NON_CASTOUT, 0x8000000c, 3, 0x58}, + {SCORPION_BANK_AB_L2_CASTOUT, 0x80000c00, 3, 0x59}, + {SCORPION_BANK_CD_NON_CASTOUT, 0x800c0000, 3, 0x5a}, + {SCORPION_BANK_CD_L2_CASTOUT, 0x8c000000, 3, 0x5b}, +#endif +}; + +static inline void scorpion_pmnc_write(u32 val) +{ + val &= PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (val)); +} + +static inline u32 scorpion_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + return val; +} + +static inline u32 scorpion_ccnt_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + return val; +} + +static inline u32 scorpion_cntn_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + return val; +} + +static inline u32 scorpion_pmnc_enable_counter(unsigned int cnt) +{ + u32 val; + + if (cnt >= CNTMAX) { + pr_err("gator: CPU%u enabling wrong PMNC counter %d\n", smp_processor_id(), cnt); + return -1; + } + + if (cnt == CCNT) + val = CCNT_REG; + else + val = (1 << (cnt - CNT0)); + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + + return cnt; +} + +static inline u32 scorpion_pmnc_disable_counter(unsigned int cnt) +{ + u32 val; + + if (cnt >= CNTMAX) { + pr_err("gator: CPU%u disabling wrong PMNC counter %d\n", smp_processor_id(), cnt); + return -1; + } + + if (cnt == CCNT) + val = CCNT_REG; + else + val = (1 << (cnt - CNT0)); + + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + + return cnt; +} + +static inline int scorpion_pmnc_select_counter(unsigned int cnt) +{ + u32 val; + + if ((cnt == CCNT) || (cnt >= CNTMAX)) { + pr_err("gator: CPU%u selecting wrong PMNC counter %d\n", smp_processor_id(), cnt); + return -1; + } + + val = (cnt - CNT0); + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + + return cnt; +} + +static u32 scorpion_read_lpm0(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c0, 0" : "=r" (val)); + return val; +} + +static void scorpion_write_lpm0(u32 val) +{ + asm volatile("mcr p15, 0, %0, c15, c0, 0" : : "r" (val)); +} + +static u32 scorpion_read_lpm1(void) +{ + u32 val; + asm volatile("mrc p15, 1, %0, c15, c0, 0" : "=r" (val)); + return val; +} + +static void scorpion_write_lpm1(u32 val) +{ + asm volatile("mcr p15, 1, %0, c15, c0, 0" : : "r" (val)); +} + +static u32 scorpion_read_lpm2(void) +{ + u32 val; + asm volatile("mrc p15, 2, %0, c15, c0, 0" : "=r" (val)); + return val; +} + +static void scorpion_write_lpm2(u32 val) +{ + asm volatile("mcr p15, 2, %0, c15, c0, 0" : : "r" (val)); +} + +static u32 scorpion_read_l2lpm(void) +{ + u32 val; + asm volatile("mrc p15, 3, %0, c15, c2, 0" : "=r" (val)); + return val; +} + +static void scorpion_write_l2lpm(u32 val) +{ + asm volatile("mcr p15, 3, %0, c15, c2, 0" : : "r" (val)); +} + +static u32 scorpion_read_vlpm(void) +{ + u32 val; + asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); + return val; +} + +static void scorpion_write_vlpm(u32 val) +{ + asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); +} + +struct scorpion_access_funcs { + u32(*read)(void); + void (*write)(u32); +}; + +struct scorpion_access_funcs scor_func[] = { + {scorpion_read_lpm0, scorpion_write_lpm0}, + {scorpion_read_lpm1, scorpion_write_lpm1}, + {scorpion_read_lpm2, scorpion_write_lpm2}, + {scorpion_read_l2lpm, scorpion_write_l2lpm}, + {scorpion_read_vlpm, scorpion_write_vlpm}, +}; + +u32 venum_orig_val; +u32 fp_orig_val; + +static void scorpion_pre_vlpm(void) +{ + u32 venum_new_val; + u32 fp_new_val; + + /* CPACR Enable CP10 access */ + asm volatile("mrc p15, 0, %0, c1, c0, 2" : "=r" (venum_orig_val)); + venum_new_val = venum_orig_val | 0x00300000; + asm volatile("mcr p15, 0, %0, c1, c0, 2" : : "r" (venum_new_val)); + /* Enable FPEXC */ + asm volatile("mrc p10, 7, %0, c8, c0, 0" : "=r" (fp_orig_val)); + fp_new_val = fp_orig_val | 0x40000000; + asm volatile("mcr p10, 7, %0, c8, c0, 0" : : "r" (fp_new_val)); +} + +static void scorpion_post_vlpm(void) +{ + /* Restore FPEXC */ + asm volatile("mcr p10, 7, %0, c8, c0, 0" : : "r" (fp_orig_val)); + /* Restore CPACR */ + asm volatile("mcr p15, 0, %0, c1, c0, 2" : : "r" (venum_orig_val)); +} + +#define COLMN0MASK 0x000000ff +#define COLMN1MASK 0x0000ff00 +#define COLMN2MASK 0x00ff0000 +static u32 scorpion_get_columnmask(u32 setval) +{ + if (setval & COLMN0MASK) + return 0xffffff00; + else if (setval & COLMN1MASK) + return 0xffff00ff; + else if (setval & COLMN2MASK) + return 0xff00ffff; + else + return 0x80ffffff; +} + +static void scorpion_evt_setup(u32 gr, u32 setval) +{ + u32 val; + if (gr == 4) + scorpion_pre_vlpm(); + val = scorpion_get_columnmask(setval) & scor_func[gr].read(); + val = val | setval; + scor_func[gr].write(val); + if (gr == 4) + scorpion_post_vlpm(); +} + +static int get_scorpion_evtinfo(unsigned int evt_type, struct scorp_evt *evtinfo) +{ + u32 idx; + if ((evt_type < 0x4c) || (evt_type >= MSM_MAX_EVT)) + return 0; + idx = evt_type - 0x4c; + if (sc_evt[idx].evt_type == evt_type) { + evtinfo->val = sc_evt[idx].val; + evtinfo->grp = sc_evt[idx].grp; + evtinfo->evt_type_act = sc_evt[idx].evt_type_act; + return 1; + } + return 0; +} + +static inline void scorpion_pmnc_write_evtsel(unsigned int cnt, u32 val) +{ + if (scorpion_pmnc_select_counter(cnt) == cnt) { + if (val < 0x40) { + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } else { + u32 zero = 0; + struct scorp_evt evtinfo; + // extract evtinfo.grp and evtinfo.tevt_type_act from val + if (get_scorpion_evtinfo(val, &evtinfo) == 0) + return; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (evtinfo.evt_type_act)); + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (zero)); + scorpion_evt_setup(evtinfo.grp, val); + } + } +} + +static void scorpion_pmnc_reset_counter(unsigned int cnt) +{ + u32 val = 0; + + if (cnt == CCNT) { + scorpion_pmnc_disable_counter(cnt); + + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (val)); + + if (pmnc_enabled[cnt] != 0) + scorpion_pmnc_enable_counter(cnt); + + } else if (cnt >= CNTMAX) { + pr_err("gator: CPU%u resetting wrong PMNC counter %d\n", smp_processor_id(), cnt); + } else { + scorpion_pmnc_disable_counter(cnt); + + if (scorpion_pmnc_select_counter(cnt) == cnt) + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (val)); + + if (pmnc_enabled[cnt] != 0) + scorpion_pmnc_enable_counter(cnt); + } +} + +static int gator_events_scorpion_create_files(struct super_block *sb, struct dentry *root) +{ + struct dentry *dir; + int i; + + for (i = 0; i < pmnc_counters; i++) { + char buf[40]; + if (i == 0) { + snprintf(buf, sizeof buf, "%s_ccnt", pmnc_name); + } else { + snprintf(buf, sizeof buf, "%s_cnt%d", pmnc_name, i - 1); + } + dir = gatorfs_mkdir(sb, root, buf); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]); + if (i > 0) { + gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]); + } + } + + return 0; +} + +static int gator_events_scorpion_online(int **buffer, bool migrate) +{ + unsigned int cnt, len = 0, cpu = smp_processor_id(); + + if (scorpion_pmnc_read() & PMNC_E) { + scorpion_pmnc_write(scorpion_pmnc_read() & ~PMNC_E); + } + + /* Initialize & Reset PMNC: C bit and P bit */ + scorpion_pmnc_write(PMNC_P | PMNC_C); + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + unsigned long event; + + if (!pmnc_enabled[cnt]) + continue; + + // disable counter + scorpion_pmnc_disable_counter(cnt); + + event = pmnc_event[cnt] & 255; + + // Set event (if destined for PMNx counters), We don't need to set the event if it's a cycle count + if (cnt != CCNT) + scorpion_pmnc_write_evtsel(cnt, event); + + // reset counter + scorpion_pmnc_reset_counter(cnt); + + // Enable counter, do not enable interrupt for this counter + scorpion_pmnc_enable_counter(cnt); + } + + // enable + scorpion_pmnc_write(scorpion_pmnc_read() | PMNC_E); + + // read the counters and toss the invalid data, return zero instead + for (cnt = 0; cnt < pmnc_counters; cnt++) { + if (pmnc_enabled[cnt]) { + if (cnt == CCNT) { + scorpion_ccnt_read(); + } else if (scorpion_pmnc_select_counter(cnt) == cnt) { + scorpion_cntn_read(); + } + scorpion_pmnc_reset_counter(cnt); + + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = 0; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static int gator_events_scorpion_offline(int **buffer, bool migrate) +{ + scorpion_pmnc_write(scorpion_pmnc_read() & ~PMNC_E); + return 0; +} + +static void gator_events_scorpion_stop(void) +{ + unsigned int cnt; + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + } +} + +static int gator_events_scorpion_read(int **buffer) +{ + int cnt, len = 0; + int cpu = smp_processor_id(); + + // a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled + if (!(scorpion_pmnc_read() & PMNC_E)) { + return 0; + } + + for (cnt = 0; cnt < pmnc_counters; cnt++) { + if (pmnc_enabled[cnt]) { + int value; + if (cnt == CCNT) { + value = scorpion_ccnt_read(); + } else if (scorpion_pmnc_select_counter(cnt) == cnt) { + value = scorpion_cntn_read(); + } else { + value = 0; + } + scorpion_pmnc_reset_counter(cnt); + + per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; + per_cpu(perfCnt, cpu)[len++] = value; + } + } + + if (buffer) + *buffer = per_cpu(perfCnt, cpu); + + return len; +} + +static struct gator_interface gator_events_scorpion_interface = { + .create_files = gator_events_scorpion_create_files, + .stop = gator_events_scorpion_stop, + .online = gator_events_scorpion_online, + .offline = gator_events_scorpion_offline, + .read = gator_events_scorpion_read, +}; + +int gator_events_scorpion_init(void) +{ + unsigned int cnt; + + switch (gator_cpuid()) { + case SCORPION: + pmnc_name = "Scorpion"; + pmnc_counters = 4; + break; + case SCORPIONMP: + pmnc_name = "ScorpionMP"; + pmnc_counters = 4; + break; + default: + return -1; + } + + pmnc_counters++; // CNT[n] + CCNT + + for (cnt = CCNT; cnt < CNTMAX; cnt++) { + pmnc_enabled[cnt] = 0; + pmnc_event[cnt] = 0; + pmnc_key[cnt] = gator_events_get_key(); + } + + return gator_events_install(&gator_events_scorpion_interface); +} + +gator_events_init(gator_events_scorpion_init); + +#else +int gator_events_scorpion_init(void) +{ + return -1; +} +#endif diff --git a/drivers/gator/gator_fs.c b/drivers/gator/gator_fs.c new file mode 100644 index 00000000000..fe6f83d547e --- /dev/null +++ b/drivers/gator/gator_fs.c @@ -0,0 +1,382 @@ +/** + * @file gatorfs.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * A simple filesystem for configuration and + * access of oprofile. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <asm/uaccess.h> + +#define gatorfs_MAGIC 0x24051020 +#define TMPBUFSIZE 50 +DEFINE_SPINLOCK(gatorfs_lock); + +static struct inode *gatorfs_get_inode(struct super_block *sb, int mode) +{ + struct inode *inode = new_inode(sb); + + if (inode) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 37) + inode->i_ino = get_next_ino(); +#endif + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return inode; +} + +static const struct super_operations s_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +ssize_t gatorfs_str_to_user(char const *str, char __user *buf, size_t count, loff_t *offset) +{ + return simple_read_from_buffer(buf, count, offset, str, strlen(str)); +} + +ssize_t gatorfs_ulong_to_user(unsigned long val, char __user *buf, size_t count, loff_t *offset) +{ + char tmpbuf[TMPBUFSIZE]; + size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", val); + if (maxlen > TMPBUFSIZE) + maxlen = TMPBUFSIZE; + return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen); +} + +ssize_t gatorfs_u64_to_user(u64 val, char __user *buf, size_t count, loff_t *offset) +{ + char tmpbuf[TMPBUFSIZE]; + size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%llu\n", val); + if (maxlen > TMPBUFSIZE) + maxlen = TMPBUFSIZE; + return simple_read_from_buffer(buf, count, offset, tmpbuf, maxlen); +} + +int gatorfs_ulong_from_user(unsigned long *val, char const __user *buf, size_t count) +{ + char tmpbuf[TMPBUFSIZE]; + unsigned long flags; + + if (!count) + return 0; + + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + memset(tmpbuf, 0x0, TMPBUFSIZE); + + if (copy_from_user(tmpbuf, buf, count)) + return -EFAULT; + + spin_lock_irqsave(&gatorfs_lock, flags); + *val = simple_strtoul(tmpbuf, NULL, 0); + spin_unlock_irqrestore(&gatorfs_lock, flags); + return 0; +} + +int gatorfs_u64_from_user(u64 *val, char const __user *buf, size_t count) +{ + char tmpbuf[TMPBUFSIZE]; + unsigned long flags; + + if (!count) + return 0; + + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + memset(tmpbuf, 0x0, TMPBUFSIZE); + + if (copy_from_user(tmpbuf, buf, count)) + return -EFAULT; + + spin_lock_irqsave(&gatorfs_lock, flags); + *val = simple_strtoull(tmpbuf, NULL, 0); + spin_unlock_irqrestore(&gatorfs_lock, flags); + return 0; +} + +static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + unsigned long *val = file->private_data; + return gatorfs_ulong_to_user(*val, buf, count, offset); +} + +static ssize_t u64_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + u64 *val = file->private_data; + return gatorfs_u64_to_user(*val, buf, count, offset); +} + +static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + unsigned long *value = file->private_data; + int retval; + + if (*offset) + return -EINVAL; + + retval = gatorfs_ulong_from_user(value, buf, count); + + if (retval) + return retval; + return count; +} + +static ssize_t u64_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + u64 *value = file->private_data; + int retval; + + if (*offset) + return -EINVAL; + + retval = gatorfs_u64_from_user(value, buf, count); + + if (retval) + return retval; + return count; +} + +static int default_open(struct inode *inode, struct file *filp) +{ + if (inode->i_private) + filp->private_data = inode->i_private; + return 0; +} + +static const struct file_operations ulong_fops = { + .read = ulong_read_file, + .write = ulong_write_file, + .open = default_open, +}; + +static const struct file_operations u64_fops = { + .read = u64_read_file, + .write = u64_write_file, + .open = default_open, +}; + +static const struct file_operations ulong_ro_fops = { + .read = ulong_read_file, + .open = default_open, +}; + +static const struct file_operations u64_ro_fops = { + .read = u64_read_file, + .open = default_open, +}; + +static struct dentry *__gatorfs_create_file(struct super_block *sb, + struct dentry *root, + char const *name, + const struct file_operations *fops, + int perm) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(root, name); + if (!dentry) + return NULL; + inode = gatorfs_get_inode(sb, S_IFREG | perm); + if (!inode) { + dput(dentry); + return NULL; + } + inode->i_fop = fops; + d_add(dentry, inode); + return dentry; +} + +int gatorfs_create_ulong(struct super_block *sb, struct dentry *root, + char const *name, unsigned long *val) +{ + struct dentry *d = __gatorfs_create_file(sb, root, name, + &ulong_fops, 0644); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + +int gatorfs_create_u64(struct super_block *sb, struct dentry *root, + char const *name, u64 *val) +{ + struct dentry *d = __gatorfs_create_file(sb, root, name, + &u64_fops, 0644); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + +int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root, + char const *name, unsigned long *val) +{ + struct dentry *d = __gatorfs_create_file(sb, root, name, + &ulong_ro_fops, 0444); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + +int gatorfs_create_ro_u64(struct super_block *sb, struct dentry *root, + char const *name, u64 * val) +{ + struct dentry *d = + __gatorfs_create_file(sb, root, name, &u64_ro_fops, 0444); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + +static ssize_t atomic_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + atomic_t *val = file->private_data; + return gatorfs_ulong_to_user(atomic_read(val), buf, count, offset); +} + +static const struct file_operations atomic_ro_fops = { + .read = atomic_read_file, + .open = default_open, +}; + +int gatorfs_create_ro_atomic(struct super_block *sb, struct dentry *root, + char const *name, atomic_t *val) +{ + struct dentry *d = __gatorfs_create_file(sb, root, name, + &atomic_ro_fops, 0444); + if (!d) + return -EFAULT; + + d->d_inode->i_private = val; + return 0; +} + +int gatorfs_create_file(struct super_block *sb, struct dentry *root, + char const *name, const struct file_operations *fops) +{ + if (!__gatorfs_create_file(sb, root, name, fops, 0644)) + return -EFAULT; + return 0; +} + +int gatorfs_create_file_perm(struct super_block *sb, struct dentry *root, + char const *name, + const struct file_operations *fops, int perm) +{ + if (!__gatorfs_create_file(sb, root, name, fops, perm)) + return -EFAULT; + return 0; +} + +struct dentry *gatorfs_mkdir(struct super_block *sb, + struct dentry *root, char const *name) +{ + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(root, name); + if (!dentry) + return NULL; + inode = gatorfs_get_inode(sb, S_IFDIR | 0755); + if (!inode) { + dput(dentry); + return NULL; + } + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + d_add(dentry, inode); + return dentry; +} + +static int gatorfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct inode *root_inode; + struct dentry *root_dentry; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = gatorfs_MAGIC; + sb->s_op = &s_ops; + sb->s_time_gran = 1; + + root_inode = gatorfs_get_inode(sb, S_IFDIR | 0755); + if (!root_inode) + return -ENOMEM; + root_inode->i_op = &simple_dir_inode_operations; + root_inode->i_fop = &simple_dir_operations; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0) + root_dentry = d_alloc_root(root_inode); +#else + root_dentry = d_make_root(root_inode); +#endif + + if (!root_dentry) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 4, 0) + iput(root_inode); +#endif + return -ENOMEM; + } + + sb->s_root = root_dentry; + + gator_op_create_files(sb, root_dentry); + + return 0; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) +static int gatorfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) +{ + return get_sb_single(fs_type, flags, data, gatorfs_fill_super, mnt); +} +#else +static struct dentry *gatorfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_nodev(fs_type, flags, data, gatorfs_fill_super); +} +#endif + +static struct file_system_type gatorfs_type = { + .owner = THIS_MODULE, + .name = "gatorfs", +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) + .get_sb = gatorfs_get_sb, +#else + .mount = gatorfs_mount, +#endif + + .kill_sb = kill_litter_super, +}; + +int __init gatorfs_register(void) +{ + return register_filesystem(&gatorfs_type); +} + +void gatorfs_unregister(void) +{ + unregister_filesystem(&gatorfs_type); +} diff --git a/drivers/gator/gator_hrtimer_gator.c b/drivers/gator/gator_hrtimer_gator.c new file mode 100644 index 00000000000..8c35d496dbb --- /dev/null +++ b/drivers/gator/gator_hrtimer_gator.c @@ -0,0 +1,82 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +// gator_hrtimer_perf.c is used if perf is supported +// update, gator_hrtimer_gator.c always used until issues resolved with perf hrtimers +#if 1 + +void (*callback)(void); +DEFINE_PER_CPU(struct hrtimer, percpu_hrtimer); +DEFINE_PER_CPU(int, hrtimer_is_active); +static ktime_t profiling_interval; +static void gator_hrtimer_online(void); +static void gator_hrtimer_offline(void); + +static enum hrtimer_restart gator_hrtimer_notify(struct hrtimer *hrtimer) +{ + hrtimer_forward_now(hrtimer, profiling_interval); + (*callback)(); + return HRTIMER_RESTART; +} + +static void gator_hrtimer_online(void) +{ + int cpu = get_logical_cpu(); + struct hrtimer *hrtimer = &per_cpu(percpu_hrtimer, cpu); + + if (per_cpu(hrtimer_is_active, cpu) || profiling_interval.tv64 == 0) + return; + + per_cpu(hrtimer_is_active, cpu) = 1; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = gator_hrtimer_notify; +#ifdef CONFIG_PREEMPT_RT_BASE + hrtimer->irqsafe = 1; +#endif + hrtimer_start(hrtimer, profiling_interval, HRTIMER_MODE_REL_PINNED); +} + +static void gator_hrtimer_offline(void) +{ + int cpu = get_logical_cpu(); + struct hrtimer *hrtimer = &per_cpu(percpu_hrtimer, cpu); + + if (!per_cpu(hrtimer_is_active, cpu)) + return; + + per_cpu(hrtimer_is_active, cpu) = 0; + hrtimer_cancel(hrtimer); +} + +static int gator_hrtimer_init(int interval, void (*func)(void)) +{ + int cpu; + + (callback) = (func); + + for_each_present_cpu(cpu) { + per_cpu(hrtimer_is_active, cpu) = 0; + } + + // calculate profiling interval + if (interval > 0) { + profiling_interval = ns_to_ktime(1000000000UL / interval); + } else { + profiling_interval.tv64 = 0; + } + + return 0; +} + +static void gator_hrtimer_shutdown(void) +{ + /* empty */ +} + +#endif diff --git a/drivers/gator/gator_hrtimer_perf.c b/drivers/gator/gator_hrtimer_perf.c new file mode 100644 index 00000000000..7b95399478e --- /dev/null +++ b/drivers/gator/gator_hrtimer_perf.c @@ -0,0 +1,113 @@ +/** + * Copyright (C) ARM Limited 2011-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +// gator_hrtimer_gator.c is used if perf is not supported +// update, gator_hrtimer_gator.c always used until issues resolved with perf hrtimers +#if 0 + +// Note: perf Cortex support added in 2.6.35 and PERF_COUNT_SW_CPU_CLOCK/hrtimer broken on 2.6.35 and 2.6.36 +// not relevant as this code is not active until 3.0.0, but wanted to document the issue + +void (*callback)(void); +static int profiling_interval; +static DEFINE_PER_CPU(struct perf_event *, perf_hrtimer); +static DEFINE_PER_CPU(struct perf_event_attr *, perf_hrtimer_attr); + +static void gator_hrtimer_shutdown(void); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) +static void hrtimer_overflow_handler(struct perf_event *event, int unused, struct perf_sample_data *data, struct pt_regs *regs) +#else +static void hrtimer_overflow_handler(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) +#endif +{ + (*callback)(); +} + +static int gator_online_single_hrtimer(int cpu) +{ + if (per_cpu(perf_hrtimer, cpu) != 0 || per_cpu(perf_hrtimer_attr, cpu) == 0) + return 0; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + per_cpu(perf_hrtimer, cpu) = perf_event_create_kernel_counter(per_cpu(perf_hrtimer_attr, cpu), cpu, 0, hrtimer_overflow_handler); +#else + per_cpu(perf_hrtimer, cpu) = perf_event_create_kernel_counter(per_cpu(perf_hrtimer_attr, cpu), cpu, 0, hrtimer_overflow_handler, 0); +#endif + if (IS_ERR(per_cpu(perf_hrtimer, cpu))) { + per_cpu(perf_hrtimer, cpu) = NULL; + return -1; + } + + if (per_cpu(perf_hrtimer, cpu)->state != PERF_EVENT_STATE_ACTIVE) { + perf_event_release_kernel(per_cpu(perf_hrtimer, cpu)); + per_cpu(perf_hrtimer, cpu) = NULL; + return -1; + } + + return 0; +} + +static void gator_hrtimer_online(int cpu) +{ + if (gator_online_single_hrtimer(cpu) < 0) { + pr_debug("gator: unable to online the hrtimer on cpu%d\n", cpu); + } +} + +static void gator_hrtimer_offline(int cpu) +{ + if (per_cpu(perf_hrtimer, cpu)) { + perf_event_release_kernel(per_cpu(perf_hrtimer, cpu)); + per_cpu(perf_hrtimer, cpu) = NULL; + } +} + +static int gator_hrtimer_init(int interval, void (*func)(void)) +{ + u32 size = sizeof(struct perf_event_attr); + int cpu; + + callback = func; + + // calculate profiling interval + profiling_interval = 1000000000 / interval; + + for_each_present_cpu(cpu) { + per_cpu(perf_hrtimer, cpu) = 0; + per_cpu(perf_hrtimer_attr, cpu) = kmalloc(size, GFP_KERNEL); + if (per_cpu(perf_hrtimer_attr, cpu) == 0) { + gator_hrtimer_shutdown(); + return -1; + } + + memset(per_cpu(perf_hrtimer_attr, cpu), 0, size); + per_cpu(perf_hrtimer_attr, cpu)->type = PERF_TYPE_SOFTWARE; + per_cpu(perf_hrtimer_attr, cpu)->size = size; + per_cpu(perf_hrtimer_attr, cpu)->config = PERF_COUNT_SW_CPU_CLOCK; + per_cpu(perf_hrtimer_attr, cpu)->sample_period = profiling_interval; + per_cpu(perf_hrtimer_attr, cpu)->pinned = 1; + } + + return 0; +} + +static void gator_hrtimer_shutdown(void) +{ + int cpu; + + for_each_present_cpu(cpu) { + if (per_cpu(perf_hrtimer_attr, cpu)) { + kfree(per_cpu(perf_hrtimer_attr, cpu)); + per_cpu(perf_hrtimer_attr, cpu) = NULL; + } + } +} + +#endif diff --git a/drivers/gator/gator_iks.c b/drivers/gator/gator_iks.c new file mode 100644 index 00000000000..6f45c548205 --- /dev/null +++ b/drivers/gator/gator_iks.c @@ -0,0 +1,144 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#if GATOR_IKS_SUPPORT + +#include <linux/of.h> +#include <asm/bL_switcher.h> +#include <asm/smp_plat.h> +#include <trace/events/power_cpu_migrate.h> + +static int mpidr_cpuids[NR_CPUS]; +static int __lcpu_to_pcpu[NR_CPUS]; + +static void calc_first_cluster_size(void) +{ + int len; + const u32 *val; + struct device_node *cn = NULL; + int mpidr_cpuids_count = 0; + + // Zero is a valid cpuid, so initialize the array to 0xff's + memset(&mpidr_cpuids, 0xff, sizeof(mpidr_cpuids)); + + while ((cn = of_find_node_by_type(cn, "cpu"))) { + BUG_ON(mpidr_cpuids_count >= NR_CPUS); + + val = of_get_property(cn, "reg", &len); + if (!val || len != 4) { + pr_err("%s missing reg property\n", cn->full_name); + continue; + } + + mpidr_cpuids[mpidr_cpuids_count] = be32_to_cpup(val); + ++mpidr_cpuids_count; + } + + BUG_ON(mpidr_cpuids_count != nr_cpu_ids); +} + +static int linearize_mpidr(int mpidr) +{ + int i; + for (i = 0; i < nr_cpu_ids; ++i) { + if (mpidr_cpuids[i] == mpidr) { + return i; + } + } + + BUG(); +} + +int lcpu_to_pcpu(const int lcpu) +{ + int pcpu; + BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0); + pcpu = __lcpu_to_pcpu[lcpu]; + BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0); + return pcpu; +} + +int pcpu_to_lcpu(const int pcpu) +{ + int lcpu; + BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0); + for (lcpu = 0; lcpu < nr_cpu_ids; ++lcpu) { + if (__lcpu_to_pcpu[lcpu] == pcpu) { + BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0); + return lcpu; + } + } + BUG(); +} + +static void gator_update_cpu_mapping(u32 cpu_hwid) +{ + int lcpu = smp_processor_id(); + int pcpu = linearize_mpidr(cpu_hwid & MPIDR_HWID_BITMASK); + BUG_ON(lcpu >= nr_cpu_ids || lcpu < 0); + BUG_ON(pcpu >= nr_cpu_ids || pcpu < 0); + __lcpu_to_pcpu[lcpu] = pcpu; +} + +GATOR_DEFINE_PROBE(cpu_migrate_begin, TP_PROTO(u64 timestamp, u32 cpu_hwid)) +{ + const int cpu = get_physical_cpu(); + + gator_timer_offline((void *)1); + gator_timer_offline_dispatch(cpu, true); +} + +GATOR_DEFINE_PROBE(cpu_migrate_finish, TP_PROTO(u64 timestamp, u32 cpu_hwid)) +{ + int cpu; + + gator_update_cpu_mapping(cpu_hwid); + + // get_physical_cpu must be called after gator_update_cpu_mapping + cpu = get_physical_cpu(); + gator_timer_online_dispatch(cpu, true); + gator_timer_online((void *)1); +} + +GATOR_DEFINE_PROBE(cpu_migrate_current, TP_PROTO(u64 timestamp, u32 cpu_hwid)) +{ + gator_update_cpu_mapping(cpu_hwid); +} + +static int gator_migrate_start(void) +{ + int retval = 0; + if (retval == 0) + retval = GATOR_REGISTER_TRACE(cpu_migrate_begin); + if (retval == 0) + retval = GATOR_REGISTER_TRACE(cpu_migrate_finish); + if (retval == 0) + retval = GATOR_REGISTER_TRACE(cpu_migrate_current); + if (retval == 0) { + // Initialize the logical to physical cpu mapping + memset(&__lcpu_to_pcpu, 0xff, sizeof(__lcpu_to_pcpu)); + bL_switcher_trace_trigger(); + } + return retval; +} + +static void gator_migrate_stop(void) +{ + GATOR_UNREGISTER_TRACE(cpu_migrate_current); + GATOR_UNREGISTER_TRACE(cpu_migrate_finish); + GATOR_UNREGISTER_TRACE(cpu_migrate_begin); +} + +#else + +#define calc_first_cluster_size() +#define gator_migrate_start() 0 +#define gator_migrate_stop() + +#endif diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c new file mode 100644 index 00000000000..88650f60bd5 --- /dev/null +++ b/drivers/gator/gator_main.c @@ -0,0 +1,1415 @@ +/** + * Copyright (C) ARM Limited 2010-2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +// This version must match the gator daemon version +static unsigned long gator_protocol_version = 13; + +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/sched.h> +#include <linux/irq.h> +#include <linux/vmalloc.h> +#include <linux/hardirq.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/suspend.h> +#include <linux/module.h> +#include <linux/perf_event.h> +#include <linux/utsname.h> +#include <asm/stacktrace.h> +#include <asm/uaccess.h> + +#include "gator.h" +#include "gator_events.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) +#error kernels prior to 2.6.32 are not supported +#endif + +#if !defined(CONFIG_GENERIC_TRACER) && !defined(CONFIG_TRACING) +#error gator requires the kernel to have CONFIG_GENERIC_TRACER or CONFIG_TRACING defined +#endif + +#ifndef CONFIG_PROFILING +#error gator requires the kernel to have CONFIG_PROFILING defined +#endif + +#ifndef CONFIG_HIGH_RES_TIMERS +#error gator requires the kernel to have CONFIG_HIGH_RES_TIMERS defined to support PC sampling +#endif + +#if defined(__arm__) && defined(CONFIG_SMP) && !defined(CONFIG_LOCAL_TIMERS) +#error gator requires the kernel to have CONFIG_LOCAL_TIMERS defined on SMP systems +#endif + +#if (GATOR_PERF_SUPPORT) && (!(GATOR_PERF_PMU_SUPPORT)) +#ifndef CONFIG_PERF_EVENTS +#warning gator requires the kernel to have CONFIG_PERF_EVENTS defined to support pmu hardware counters +#elif !defined CONFIG_HW_PERF_EVENTS +#warning gator requires the kernel to have CONFIG_HW_PERF_EVENTS defined to support pmu hardware counters +#endif +#endif + +/****************************************************************************** + * DEFINES + ******************************************************************************/ +#define SUMMARY_BUFFER_SIZE (1*1024) +#define BACKTRACE_BUFFER_SIZE (128*1024) +#define NAME_BUFFER_SIZE (64*1024) +#define COUNTER_BUFFER_SIZE (64*1024) // counters have the core as part of the data and the core value in the frame header may be discarded +#define BLOCK_COUNTER_BUFFER_SIZE (128*1024) +#define ANNOTATE_BUFFER_SIZE (64*1024) // annotate counters have the core as part of the data and the core value in the frame header may be discarded +#define SCHED_TRACE_BUFFER_SIZE (128*1024) |