diff options
472 files changed, 14980 insertions, 10133 deletions
diff --git a/.gitignore b/.gitignore index 42fa0d5626a9..f4c0b091dcf4 100644 --- a/.gitignore +++ b/.gitignore @@ -22,7 +22,6 @@ *.lst *.symtypes *.order -modules.builtin *.elf *.bin *.gz @@ -33,6 +32,8 @@ modules.builtin *.lzo *.patch *.gcno +modules.builtin +Module.symvers # # Top-level generic files @@ -44,7 +45,6 @@ modules.builtin /vmlinuz /System.map /Module.markers -/Module.symvers # # Debian directory (make deb-pkg) diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt index a5160d8cbb5f..b9ec668bfe62 100644 --- a/Documentation/devicetree/bindings/clock/sunxi.txt +++ b/Documentation/devicetree/bindings/clock/sunxi.txt @@ -20,12 +20,15 @@ Required properties: "allwinner,sun5i-a13-ahb-gates-clk" - for the AHB gates on A13 "allwinner,sun5i-a10s-ahb-gates-clk" - for the AHB gates on A10s "allwinner,sun7i-a20-ahb-gates-clk" - for the AHB gates on A20 + "allwinner,sun6i-a31-ar100-clk" - for the AR100 on A31 "allwinner,sun6i-a31-ahb1-mux-clk" - for the AHB1 multiplexer on A31 "allwinner,sun6i-a31-ahb1-gates-clk" - for the AHB1 gates on A31 "allwinner,sun4i-a10-apb0-clk" - for the APB0 clock + "allwinner,sun6i-a31-apb0-clk" - for the APB0 clock on A31 "allwinner,sun4i-a10-apb0-gates-clk" - for the APB0 gates on A10 "allwinner,sun5i-a13-apb0-gates-clk" - for the APB0 gates on A13 "allwinner,sun5i-a10s-apb0-gates-clk" - for the APB0 gates on A10s + "allwinner,sun6i-a31-apb0-gates-clk" - for the APB0 gates on A31 "allwinner,sun7i-a20-apb0-gates-clk" - for the APB0 gates on A20 "allwinner,sun4i-a10-apb1-clk" - for the APB1 clock "allwinner,sun4i-a10-apb1-mux-clk" - for the APB1 clock muxing @@ -41,6 +44,7 @@ Required properties: "allwinner,sun7i-a20-gmac-clk" - for the GMAC clock module on A20/A31 "allwinner,sun4i-a10-usb-clk" - for usb gates + resets on A10 / A20 "allwinner,sun5i-a13-usb-clk" - for usb gates + resets on A13 + "allwinner,sun6i-a31-usb-clk" - for usb gates + resets on A31 Required properties for all clocks: - reg : shall be the control register address for the clock. diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt index 7faf5a68b3be..ade4dd4c30f0 100644 --- a/Documentation/devicetree/bindings/clock/ti/apll.txt +++ b/Documentation/devicetree/bindings/clock/ti/apll.txt @@ -14,18 +14,32 @@ a subtype of a DPLL [2], although a simplified one at that. [2] Documentation/devicetree/bindings/clock/ti/dpll.txt Required properties: -- compatible : shall be "ti,dra7-apll-clock" +- compatible : shall be "ti,dra7-apll-clock" or "ti,omap2-apll-clock" - #clock-cells : from common clock binding; shall be set to 0. - clocks : link phandles of parent clocks (clk-ref and clk-bypass) - reg : address and length of the register set for controlling the APLL. It contains the information of registers in the following order: - "control" - contains the control register base address - "idlest" - contains the idlest register base address + "control" - contains the control register offset + "idlest" - contains the idlest register offset + "autoidle" - contains the autoidle register offset (OMAP2 only) +- ti,clock-frequency : static clock frequency for the clock (OMAP2 only) +- ti,idlest-shift : bit-shift for the idlest field (OMAP2 only) +- ti,bit-shift : bit-shift for enable and autoidle fields (OMAP2 only) Examples: - apll_pcie_ck: apll_pcie_ck@4a008200 { + apll_pcie_ck: apll_pcie_ck { #clock-cells = <0>; clocks = <&apll_pcie_in_clk_mux>, <&dpll_pcie_ref_ck>; - reg = <0x4a00821c 0x4>, <0x4a008220 0x4>; + reg = <0x021c>, <0x0220>; compatible = "ti,dra7-apll-clock"; }; + + apll96_ck: apll96_ck { + #clock-cells = <0>; + compatible = "ti,omap2-apll-clock"; + clocks = <&sys_ck>; + ti,bit-shift = <2>; + ti,idlest-shift = <8>; + ti,clock-frequency = <96000000>; + reg = <0x0500>, <0x0530>, <0x0520>; + }; diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt index 30bfdb7c9f18..df57009ff8e7 100644 --- a/Documentation/devicetree/bindings/clock/ti/dpll.txt +++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt @@ -24,12 +24,14 @@ Required properties: "ti,omap4-dpll-core-clock", "ti,omap4-dpll-m4xen-clock", "ti,omap4-dpll-j-type-clock", + "ti,omap5-mpu-dpll-clock", "ti,am3-dpll-no-gate-clock", "ti,am3-dpll-j-type-clock", "ti,am3-dpll-no-gate-j-type-clock", "ti,am3-dpll-clock", "ti,am3-dpll-core-clock", "ti,am3-dpll-x2-clock", + "ti,omap2-dpll-core-clock", - #clock-cells : from common clock binding; shall be set to 0. - clocks : link phandles of parent clocks, first entry lists reference clock @@ -41,6 +43,7 @@ Required properties: "mult-div1" - contains the multiplier / divider register base address "autoidle" - contains the autoidle register base address (optional) ti,am3-* dpll types do not have autoidle register + ti,omap2-* dpll type does not support idlest / autoidle registers Optional properties: - DPLL mode setting - defining any one or more of the following overrides @@ -73,3 +76,10 @@ Examples: clocks = <&sys_clkin_ck>, <&sys_clkin_ck>; reg = <0x90>, <0x5c>, <0x68>; }; + + dpll_ck: dpll_ck { + #clock-cells = <0>; + compatible = "ti,omap2-dpll-core-clock"; + clocks = <&sys_ck>, <&sys_ck>; + reg = <0x0500>, <0x0540>; + }; diff --git a/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt new file mode 100644 index 000000000000..585e8c191f50 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt @@ -0,0 +1,96 @@ +Device Tree Clock bindings for ATL (Audio Tracking Logic) of DRA7 SoC. + +The ATL IP is used to generate clock to be used to synchronize baseband and +audio codec. A single ATL IP provides four ATL clock instances sharing the same +functional clock but can be configured to provide different clocks. +ATL can maintain a clock averages to some desired frequency based on the bws/aws +signals - can compensate the drift between the two ws signal. + +In order to provide the support for ATL and it's output clocks (which can be used +internally within the SoC or external components) two sets of bindings is needed: + +Clock tree binding: +This binding uses the common clock binding[1]. +To be able to integrate the ATL clocks with DT clock tree. +Provides ccf level representation of the ATL clocks to be used by drivers. +Since the clock instances are part of a single IP this binding is used as a node +for the DT clock tree, the IP driver is needed to handle the actual configuration +of the IP. + +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt + +Required properties: +- compatible : shall be "ti,dra7-atl-clock" +- #clock-cells : from common clock binding; shall be set to 0. +- clocks : link phandles to functional clock of ATL + +Binding for the IP driver: +This binding is used to configure the IP driver which is going to handle the +configuration of the IP for the ATL clock instances. + +Required properties: +- compatible : shall be "ti,dra7-atl" +- reg : base address for the ATL IP +- ti,provided-clocks : List of phandles to the clocks associated with the ATL +- clocks : link phandles to functional clock of ATL +- clock-names : Shall be set to "fck" +- ti,hwmods : Shall be set to "atl" + +Optional properties: +Configuration of ATL instances: +- atl{0/1/2/3} { + - bws : Baseband word select signal selection + - aws : Audio word select signal selection +}; + +For valid word select signals, see the dt-bindings/clk/ti-dra7-atl.h include +file. + +Examples: +/* clock bindings for atl provided clocks */ +atl_clkin0_ck: atl_clkin0_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +atl_clkin1_ck: atl_clkin1_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +atl_clkin2_ck: atl_clkin2_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +atl_clkin3_ck: atl_clkin3_ck { + #clock-cells = <0>; + compatible = "ti,dra7-atl-clock"; + clocks = <&atl_gfclk_mux>; +}; + +/* binding for the IP */ +atl: atl@4843c000 { + compatible = "ti,dra7-atl"; + reg = <0x4843c000 0x3ff>; + ti,hwmods = "atl"; + ti,provided-clocks = <&atl_clkin0_ck>, <&atl_clkin1_ck>, + <&atl_clkin2_ck>, <&atl_clkin3_ck>; + clocks = <&atl_gfclk_mux>; + clock-names = "fck"; + status = "disabled"; +}; + +#include <dt-bindings/clk/ti-dra7-atl.h> + +&atl { + status = "okay"; + + atl2 { + bws = <DRA7_ATL_WS_MCASP2_FSX>; + aws = <DRA7_ATL_WS_MCASP3_FSX>; + }; +}; diff --git a/Documentation/devicetree/bindings/clock/ti/gate.txt b/Documentation/devicetree/bindings/clock/ti/gate.txt index 125281aaa4ca..03f8fdee62a7 100644 --- a/Documentation/devicetree/bindings/clock/ti/gate.txt +++ b/Documentation/devicetree/bindings/clock/ti/gate.txt @@ -25,6 +25,11 @@ Required properties: to map clockdomains properly "ti,hsdiv-gate-clock" - gate clock with OMAP36xx specific hardware handling, required for a hardware errata + "ti,composite-gate-clock" - composite gate clock, to be part of composite + clock + "ti,composite-no-wait-gate-clock" - composite gate clock that does not wait + for clock to be active before returning + from clk_enable() - #clock-cells : from common clock binding; shall be set to 0 - clocks : link to phandle of parent clock - reg : offset for register controlling adjustable gate, not needed for @@ -41,7 +46,7 @@ Examples: #clock-cells = <0>; compatible = "ti,gate-clock"; clocks = <&core_96m_fck>; - reg = <0x48004a00 0x4>; + reg = <0x0a00>; ti,bit-shift = <25>; }; @@ -57,7 +62,7 @@ Examples: #clock-cells = <0>; compatible = "ti,dss-gate-clock"; clocks = <&dpll4_m4x2_ck>; - reg = <0x48004e00 0x4>; + reg = <0x0e00>; ti,bit-shift = <0>; }; @@ -65,7 +70,7 @@ Examples: #clock-cells = <0>; compatible = "ti,am35xx-gate-clock"; clocks = <&ipss_ick>; - reg = <0x4800259c 0x4>; + reg = <0x059c>; ti,bit-shift = <1>; }; @@ -80,6 +85,22 @@ Examples: compatible = "ti,hsdiv-gate-clock"; clocks = <&dpll4_m2x2_mul_ck>; ti,bit-shift = <0x1b>; - reg = <0x48004d00 0x4>; + reg = <0x0d00>; ti,set-bit-to-disable; }; + + vlynq_gate_fck: vlynq_gate_fck { + #clock-cells = <0>; + compatible = "ti,composite-gate-clock"; + clocks = <&core_ck>; + ti,bit-shift = <3>; + reg = <0x0200>; + }; + + sys_clkout2_src_gate: sys_clkout2_src_gate { + #clock-cells = <0>; + compatible = "ti,composite-no-wait-gate-clock"; + clocks = <&core_ck>; + ti,bit-shift = <15>; + reg = <0x0070>; + }; diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt index 064e8caccac3..3111a409fea6 100644 --- a/Documentation/devicetree/bindings/clock/ti/interface.txt +++ b/Documentation/devicetree/bindings/clock/ti/interface.txt @@ -21,6 +21,8 @@ Required properties: "ti,omap3-dss-interface-clock" - interface clock with DSS specific HW handling "ti,omap3-ssi-interface-clock" - interface clock with SSI specific HW handling "ti,am35xx-interface-clock" - interface clock with AM35xx specific HW handling + "ti,omap2430-interface-clock" - interface clock with OMAP2430 specific HW + handling - #clock-cells : from common clock binding; shall be set to 0 - clocks : link to phandle of parent clock - reg : base address for the control register diff --git a/Documentation/hwmon/shtc1 b/Documentation/hwmon/shtc1 new file mode 100644 index 000000000000..6b1e05458f0f --- /dev/null +++ b/Documentation/hwmon/shtc1 @@ -0,0 +1,43 @@ +Kernel driver shtc1 +=================== + +Supported chips: + * Sensirion SHTC1 + Prefix: 'shtc1' + Addresses scanned: none + Datasheet: http://www.sensirion.com/file/datasheet_shtc1 + + * Sensirion SHTW1 + Prefix: 'shtw1' + Addresses scanned: none + Datasheet: Not publicly available + +Author: + Johannes Winkelmann <johannes.winkelmann@sensirion.com> + +Description +----------- + +This driver implements support for the Sensirion SHTC1 chip, a humidity and +temperature sensor. Temperature is measured in degrees celsius, relative +humidity is expressed as a percentage. Driver can be used as well for SHTW1 +chip, which has the same electrical interface. + +The device communicates with the I2C protocol. All sensors are set to I2C +address 0x70. See Documentation/i2c/instantiating-devices for methods to +instantiate the device. + +There are two options configurable by means of shtc1_platform_data: +1. blocking (pull the I2C clock line down while performing the measurement) or + non-blocking mode. Blocking mode will guarantee the fastest result but + the I2C bus will be busy during that time. By default, non-blocking mode + is used. Make sure clock-stretching works properly on your device if you + want to use blocking mode. +2. high or low accuracy. High accuracy is used by default and using it is + strongly recommended. + +sysfs-Interface +--------------- + +temp1_input - temperature input +humidity1_input - humidity input diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt index 69372fb98cf8..3fb39e0116b4 100644 --- a/Documentation/kbuild/modules.txt +++ b/Documentation/kbuild/modules.txt @@ -470,7 +470,7 @@ build. Sometimes, an external module uses exported symbols from another external module. kbuild needs to have full knowledge of - all symbols to avoid spliitting out warnings about undefined + all symbols to avoid spitting out warnings about undefined symbols. Three solutions exist for this situation. NOTE: The method with a top-level kbuild file is recommended diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 0cfb00fd86ff..4bbeca8483ed 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -22,8 +22,9 @@ Appendix B: The kprobes sysctl interface Kprobes enables you to dynamically break into any kernel routine and collect debugging and performance information non-disruptively. You -can trap at almost any kernel code address, specifying a handler +can trap at almost any kernel code address(*), specifying a handler routine to be invoked when the breakpoint is hit. +(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist) There are currently three types of probes: kprobes, jprobes, and kretprobes (also called return probes). A kprobe can be inserted @@ -273,6 +274,19 @@ using one of the following techniques: or - Execute 'sysctl -w debug.kprobes_optimization=n' +1.5 Blacklist + +Kprobes can probe most of the kernel except itself. This means +that there are some functions where kprobes cannot probe. Probing +(trapping) such functions can cause a recursive trap (e.g. double +fault) or the nested probe handler may never be called. +Kprobes manages such functions as a blacklist. +If you want to add a function into the blacklist, you just need +to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro +to specify a blacklisted function. +Kprobes checks the given probe address against the blacklist and +rejects registering it, if the given address is in the blacklist. + 2. Architectures Supported Kprobes, jprobes, and return probes are implemented on the following diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt index 1dfe62c3641d..ee231ed09ec6 100644 --- a/Documentation/mutex-design.txt +++ b/Documentation/mutex-design.txt @@ -1,139 +1,157 @@ Generic Mutex Subsystem started by Ingo Molnar <mingo@redhat.com> +updated by Davidlohr Bueso <davidlohr@hp.com> - "Why on earth do we need a new mutex subsystem, and what's wrong - with semaphores?" +What are mutexes? +----------------- -firstly, there's nothing wrong with semaphores. But if the simpler -mutex semantics are sufficient for your code, then there are a couple -of advantages of mutexes: +In the Linux kernel, mutexes refer to a particular locking primitive +that enforces serialization on shared memory systems, and not only to +the generic term referring to 'mutual exclusion' found in academia +or similar theoretical text books. Mutexes are sleeping locks which +behave similarly to binary semaphores, and were introduced in 2006[1] +as an alternative to these. This new data structure provided a number +of advantages, including simpler interfaces, and at that time smaller +code (see Disadvantages). - - 'struct mutex' is smaller on most architectures: E.g. on x86, - 'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes. - A smaller structure size means less RAM footprint, and better - CPU-cache utilization. +[1] http://lwn.net/Articles/164802/ - - tighter code. On x86 i get the following .text sizes when - switching all mutex-alike semaphores in the kernel to the mutex - subsystem: +Implementation +-------------- - text data bss dec hex filename - 3280380 868188 396860 4545428 455b94 vmlinux-semaphore - 3255329 865296 396732 4517357 44eded vmlinux-mutex +Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h +and implemented in kernel/locking/mutex.c. These locks use a three +state atomic counter (->count) to represent the different possible +transitions that can occur during the lifetime of a lock: - that's 25051 bytes of code saved, or a 0.76% win - off the hottest - codepaths of the kernel. (The .data savings are 2892 bytes, or 0.33%) - Smaller code means better icache footprint, which is one of the - major optimization goals in the Linux kernel currently. + 1: unlocked + 0: locked, no waiters + negative: locked, with potential waiters - - the mutex subsystem is slightly faster and has better scalability for - contended workloads. On an 8-way x86 system, running a mutex-based - kernel and testing creat+unlink+close (of separate, per-task files) - in /tmp with 16 parallel tasks, the average number of ops/sec is: +In its most basic form it also includes a wait-queue and a spinlock +that serializes access to it. CONFIG_SMP systems can also include +a pointer to the lock task owner (->owner) as well as a spinner MCS +lock (->osq), both described below in (ii). - Semaphores: Mutexes: +When acquiring a mutex, there are three possible paths that can be +taken, depending on the state of the lock: - $ ./test-mutex V 16 10 $ ./test-mutex V 16 10 - 8 CPUs, running 16 tasks. 8 CPUs, running 16 tasks. - checking VFS performance. checking VFS performance. - avg loops/sec: 34713 avg loops/sec: 84153 - CPU utilization: 63% CPU utilization: 22% +(i) fastpath: tries to atomically acquire the lock by decrementing the + counter. If it was already taken by another task it goes to the next + possible path. This logic is architecture specific. On x86-64, the + locking fastpath is 2 instructions: - i.e. in this workload, the mutex based kernel was 2.4 times faster - than the semaphore based kernel, _and_ it also had 2.8 times less CPU - utilization. (In terms of 'ops per CPU cycle', the semaphore kernel - performed 551 ops/sec per 1% of CPU time used, while the mutex kernel - performed 3825 ops/sec per 1% of CPU time used - it was 6.9 times - more efficient.) - - the scalability difference is visible even on a 2-way P4 HT box: - - Semaphores: Mutexes: - - $ ./test-mutex V 16 10 $ ./test-mutex V 16 10 - 4 CPUs, running 16 tasks. 8 CPUs, running 16 tasks. - checking VFS performance. checking VFS performance. - avg loops/sec: 127659 avg loops/sec: 181082 - CPU utilization: 100% CPU utilization: 34% - - (the straight performance advantage of mutexes is 41%, the per-cycle - efficiency of mutexes is 4.1 times better.) - - - there are no fastpath tradeoffs, the mutex fastpath is just as tight - as the semaphore fastpath. On x86, the locking fastpath is 2 - instructions: - - c0377ccb <mutex_lock>: - c0377ccb: f0 ff 08 lock decl (%eax) - c0377cce: 78 0e js c0377cde <.text..lock.mutex> - c0377cd0: c3 ret + 0000000000000e10 <mutex_lock>: + e21: f0 ff 0b lock decl (%rbx) + e24: 79 08 jns e2e <mutex_lock+0x1e> the unlocking fastpath is equally tight: - c0377cd1 <mutex_unlock>: - c0377cd1: f0 ff 00 lock incl (%eax) - c0377cd4: 7e 0f jle c0377ce5 <.text..lock.mutex+0x7> - c0377cd6: c3 ret - - - 'struct mutex' semantics are well-defined and are enforced if - CONFIG_DEBUG_MUTEXES is turned on. Semaphores on the other hand have - virtually no debugging code or instrumentation. The mutex subsystem - checks and enforces the following rules: - - * - only one task can hold the mutex at a time - * - only the owner can unlock the mutex - * - multiple unlocks are not permitted - * - recursive locking is not permitted - * - a mutex object must be initialized via the API - * - a mutex object must not be initialized via memset or copying - * - task may not exit with mutex held - * - memory areas where held locks reside must not be freed - * - held mutexes must not be reinitialized - * - mutexes may not be used in hardware or software interrupt - * contexts such as tasklets and timers - - furthermore, there are also convenience features in the debugging - code: - - * - uses symbolic names of mutexes, whenever they are printed in debug output - * - point-of-acquire tracking, symbolic lookup of function names - * - list of all locks held in the system, printout of them - * - owner tracking - * - detects self-recursing locks and prints out all relevant info - * - detects multi-task circular deadlocks and prints out all affected - * locks and tasks (and only those tasks) + 0000000000000bc0 <mutex_unlock>: + bc8: f0 ff 07 lock incl (%rdi) + bcb: 7f 0a jg bd7 <mutex_unlock+0x17> + + +(ii) midpath: aka optimistic spinning, tries to spin for acquisition + while the lock owner is running and there are no other tasks ready + to run that have higher priority (need_resched). The rationale is + that if the lock owner is running, it is likely to release the lock + soon. The mutex spinners are queued up using MCS lock so that only + one spinner can compete for the mutex. + + The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock + with the desirable properties of being fair and with each cpu trying + to acquire the lock spinning on a local variable. It avoids expensive + cacheline bouncing that common test-and-set spinlock implementations + incur. An MCS-like lock is specially tailored for optimistic spinning + for sleeping lock implementation. An important feature of the customized + MCS lock is that it has the extra property that spinners are able to exit + the MCS spinlock queue when they need to reschedule. This further helps + avoid situations where MCS spinners that need to reschedule would continue + waiting to spin on mutex owner, only to go directly to slowpath upon + obtaining the MCS lock. + + +(iii) slowpath: last resort, if the lock is still unable to be acquired, + the task is added to the wait-queue and sleeps until woken up by the + unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE. + +While formally kernel mutexes are sleepable locks, it is path (ii) that +makes them more practically a hybrid type. By simply not interrupting a +task and busy-waiting for a few cycles instead of immediately sleeping, +the performance of this lock has been seen to significantly improve a +number of workloads. Note that this technique is also used for rw-semaphores. + +Semantics +--------- + +The mutex subsystem checks and enforces the following rules: + + - Only one task can hold the mutex at a time. + - Only the owner can unlock the mutex. + - Multiple unlocks are not permitted. + - Recursive locking/unlocking is not permitted. + - A mutex must only be initialized via the API (see below). + - A task may not exit with a mutex held. + - Memory areas where held locks reside must not be freed. + - Held mutexes must not be reinitialized. + - Mutexes may not be used in hardware or software interrupt + contexts such as tasklets and timers. + +These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled. +In addition, the mutex debugging code also implements a number of other +features that make lock debugging easier and faster: + + - Uses symbolic names of mutexes, whenever they are printed + in debug output. + - Point-of-acquire tracking, symbolic lookup of function names, + list of all locks held in the system, printout of them. + - Owner tracking. + - Detects self-recursing locks and prints out all relevant info. + - Detects multi-task circular deadlocks and prints out all affected + locks and tasks (and only those tasks). + + +Interfaces +---------- +Statically define the mutex: + DEFINE_MUTEX(name); + +Dynamically initialize the mutex: + mutex_init(mutex); + +Acquire the mutex, uninterruptible: + void mutex_lock(struct mutex *lock); + void mutex_lock_nested(struct mutex *lock, unsigned int subclass); + int mutex_trylock(struct mutex *lock); + +Acquire the mutex, interruptible: + int mutex_lock_interruptible_nested(struct mutex *lock, + unsigned int subclass); + int mutex_lock_interruptible(struct mutex *lock); + +Acquire the mutex, interruptible, if dec to 0: + int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); + +Unlock the mutex: + void mutex_unlock(struct mutex *lock); + +Test if the mutex is taken: + int mutex_is_locked(struct mutex *lock); Disadvantages ------------- -The stricter mutex API means you cannot use mutexes the same way you -can use semaphores: e.g. they cannot be used from an interrupt context, -nor can they be unlocked from a different context that which acquired -it. [ I'm not aware of any other (e.g. performance) disadvantages from -using mutexes at the moment, please let me know if you find any. ] - -Implementation of mutexes -------------------------- - -'struct mutex' is the new mutex type, defined in include/linux/mutex.h and -implemented in kernel/locking/mutex.c. It is a counter-based mutex with a -spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", 0 for -"locked" and negative numbers (usually -1) for "locked, potential waiters -queued". - -the APIs of 'struct mutex' have been streamlined: - - DEFINE_MUTEX(name); +Unlike its original design and purpose, 'struct mutex' is larger than +most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice +as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the +'struct rw_semaphore' variant. Larger structure sizes mean more CPU +cache and memory footprint. - mutex_init(mutex); +When to use mutexes +------------------- - void mutex_lock(struct mutex *lock); - int mutex_lock_interruptible(struct mutex *lock); - int mutex_trylock(struct mutex *lock); - void mutex_unlock(struct mutex *lock); - int mutex_is_locked(struct mutex *lock); - void mutex_lock_nested(struct mutex *lock, unsigned int subclass); - int mutex_lock_interruptible_nested(struct mutex *lock, - unsigned int subclass); - int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); +Unless the strict semantics of mutexes are unsuitable and/or the critical +region prevents the lock from being shared, always prefer them to any other +locking primitive. diff --git a/Documentation/vDSO/parse_vdso.c b/Documentation/vDSO/parse_vdso.c index 85870208edcf..1dbb4b87268f 100644 --- a/Documentation/vDSO/parse_vdso.c +++ b/Documentation/vDSO/parse_vdso.c @@ -1,6 +1,6 @@ /* * parse_vdso.c: Linux reference vDSO parser - * Written by Andrew Lutomirski, 2011. + * Written by Andrew Lutomirski, 2011-2014. * * This code is meant to be linked in to various programs that run on Linux. * As such, it is available with as few restrictions as possible. This file @@ -11,13 +11,14 @@ * it starts a program. It works equally well in statically and dynamically * linked binaries. * - * This code is tested on x86_64. In principle it should work on any 64-bit + * This code is tested on x86. In principle it should work on any * architecture that has a vDSO. */ #include <stdbool.h> #include <stdint.h> #include <string.h> +#include <limits.h> #include <elf.h> /* @@ -45,11 +46,18 @@ extern void *vdso_sym(const char *version, const char *name); /* And here's the code. */ - -#ifndef __x86_64__ -# error Not yet ported to non-x86_64 architectures +#ifndef ELF_BITS +# if ULONG_MAX > 0xffffffffUL +# define ELF_BITS 64 +# else +# define ELF_BITS 32 +# endif #endif +#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x +#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) +#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) + static struct vdso_info { bool valid; @@ -59,14 +67,14 @@ static struct vdso_info uintptr_t load_offset; /* load_addr - recorded vaddr */ /* Symbol table */ - Elf64_Sym *symtab; + ELF(Sym) *symtab; const char *symstrings; - Elf64_Word *bucket, *chain; - Elf64_Word nbucket, nchain; + ELF(Word) *bucket, *chain; + ELF(Word) nbucket, nchain; /* Version table */ - Elf64_Versym *versym; - Elf64_Verdef *verdef; + ELF(Versym) *versym; + ELF(Verdef) *verdef; } vdso_info; /* Straight from the ELF specification. */ @@ -92,9 +100,14 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) vdso_info.load_addr = base; - Elf64_Ehdr *hdr = (Elf64_Ehdr*)base; - Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff); - Elf64_Dyn *dyn = 0; + ELF(Ehdr) *hdr = (ELF(Ehdr)*)base; + if (hdr->e_ident[EI_CLASS] != + (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) { + return; /* Wrong ELF class -- check ELF_BITS */ + } + + ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff); + ELF(Dyn) *dyn = 0; /* * We need two things from the segment table: the load offset @@ -108,7 +121,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) + (uintptr_t)pt[i].p_offset - (uintptr_t)pt[i].p_vaddr; } else if (pt[i].p_type == PT_DYNAMIC) { - dyn = (Elf64_Dyn*)(base + pt[i].p_offset); + dyn = (ELF(Dyn)*)(base + pt[i].p_offset); } } @@ -118,7 +131,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) /* * Fish out the useful bits of the dynamic table. */ - Elf64_Word *hash = 0; + ELF(Word) *hash = 0; vdso_info.symstrings = 0; vdso_info.symtab = 0; vdso_info.versym = 0; @@ -131,22 +144,22 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) + vdso_info.load_offset); break; case DT_SYMTAB: - vdso_info.symtab = (Elf64_Sym *) + vdso_info.symtab = (ELF(Sym) *) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; case DT_HASH: - hash = (Elf64_Word *) + hash = (ELF(Word) *) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; case DT_VERSYM: - vdso_info.versym = (Elf64_Versym *) + vdso_info.versym = (ELF(Versym) *) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; case DT_VERDEF: - vdso_info.verdef = (Elf64_Verdef *) + vdso_info.verdef = (ELF(Verdef) *) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; @@ -168,8 +181,8 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) vdso_info.valid = true; } -static bool vdso_match_version(Elf64_Versym ver, - const char *name, Elf64_Word hash) +static bool vdso_match_version(ELF(Versym) ver, + const char *name, ELF(Word) hash) { /* * This is a helper function to check if the version indexed by @@ -188,7 +201,7 @@ static bool vdso_match_version(Elf64_Versym ver, /* First step: find the version definition */ ver &= 0x7fff; /* Apparently bit 15 means "hidden" */ - Elf64_Verdef *def = vdso_info.verdef; + ELF(Verdef) *def = vdso_info.verdef; while(true) { if ((def->vd_flags & VER_FLG_BASE) == 0 && (def->vd_ndx & 0x7fff) == ver) @@ -197,11 +210,11 @@ static bool vdso_match_version(Elf64_Versym ver, if (def->vd_next == 0) return false; /* No definition. */ - def = (Elf64_Verdef *)((char *)def + def->vd_next); + def = (ELF(Verdef) *)((char *)def + def->vd_next); } /* Now figure out whether it matches. */ - Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux); + ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux); return def->vd_hash == hash && !strcmp(name, vdso_info.symstrings + aux->vda_name); } @@ -213,10 +226,10 @@ void *vdso_sym(const char *version, const char *name) return 0; ver_hash = elf_hash(version); - Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; + ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) { - Elf64_Sym *sym = &vdso_info.symtab[chain]; + ELF(Sym) *sym = &vdso_info.symtab[chain]; /* Check for a defined global or weak function w/ right name. */ if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) @@ -243,7 +256,7 @@ void *vdso_sym(const char *version, const char *name) void vdso_init_from_auxv(void *auxv) { - Elf64_auxv_t *elf_auxv = auxv; + ELF(auxv_t) *elf_auxv = auxv; for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++) { if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) { diff --git a/Documentation/vDSO/vdso_standalone_test_x86.c b/Documentation/vDSO/vdso_standalone_test_x86.c new file mode 100644 index 000000000000..d46240265c50 --- /dev/null +++ b/Documentation/vDSO/vdso_standalone_test_x86.c @@ -0,0 +1,128 @@ +/* + * vdso_test.c: Sample code to test parse_vdso.c on x86 + * Copyright (c) 2011-2014 Andy Lutomirski + * Subject to the GNU General Public License, version 2 + * + * You can amuse yourself by compiling with: + * gcc -std=gnu99 -nostdlib + * -Os -fno-asynchronous-unwind-tables -flto -lgcc_s + * vdso_standalone_test_x86.c parse_vdso.c + * to generate a small binary. On x86_64, you can omit -lgcc_s + * if you want the binary to be completely standalone. + */ + +#include <sys/syscall.h> +#include <sys/time.h> +#include <unistd.h> +#include <stdint.h> + +extern void *vdso_sym(const char *version, const char *name); +extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); +extern void vdso_init_from_auxv(void *auxv); + +/* We need a libc functions... */ +int strcmp(const char *a, const char *b) +{ + /* This implementation is buggy: it never returns -1. */ + while (*a || *b) { + if (*a != *b) + return 1; + if (*a == 0 || *b == 0) + return 1; + a++; + b++; + } + + return 0; +} + +/* ...and two syscalls. This is x86-specific. */ +static inline long x86_syscall3(long nr, long a0, long a1, long a2) +{ + long ret; +#ifdef __x86_64__ + asm volatile ("syscall" : "=a" (ret) : "a" (nr), + "D" (a0), "S" (a1), "d" (a2) : + "cc", "memory", "rcx", + "r8", "r9", "r10", "r11" ); +#else + asm volatile ("int $0x80" : "=a" (ret) : "a" (nr), + "b" (a0), "c" (a1), "d" (a2) : + "cc", "memory" ); +#endif + return ret; +} + +static inline long linux_write(int fd, const void *data, size_t len) +{ + return x86_syscall3(__NR_write, fd, (long)data, (long)len); +} + +static inline void linux_exit(int code) +{ + x86_syscall3(__NR_exit, code, 0, 0); +} + +void to_base10(char *lastdig, uint64_t n) +{ + while (n) { + *lastdig = (n % 10) + '0'; + n /= 10; + lastdig--; + } +} + +__attribute__((externally_visible)) void c_main(void **stack) +{ + /* Parse the stack */ + long argc = (long)*stack; + stack += argc + 2; + + /* Now we're pointing at the environment. Skip it. */ + while(*stack) + stack++; + stack++; + + /* Now we're pointing at auxv. Initialize the vDSO parser. */ + vdso_init_from_auxv((void *)stack); + + /* Find gettimeofday. */ + typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); + gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); + + if (!gtod) + linux_exit(1); + + struct timeval tv; + long ret = gtod(&tv, 0); + + if (ret == 0) { + char buf[] = "The time is .000000\n"; + to_base10(buf + 31, tv.tv_sec); + to_base10(buf + 38, tv.tv_usec); + linux_write(1, buf, sizeof(buf) - 1); + } else { + linux_exit(ret); + } + + linux_exit(0); +} + +/* + * This is the real entry point. It passes the initial stack into + * the C entry point. + */ +asm ( + ".text\n" + ".global _start\n" + ".type _start,@function\n" + "_start:\n\t" +#ifdef __x86_64__ + "mov %rsp,%rdi\n\t" + "jmp c_main" +#else + "push %esp\n\t" + "call c_main\n\t" + "int $3" +#endif + ); diff --git a/Documentation/vDSO/vdso_test.c b/Documentation/vDSO/vdso_test.c index fff633432dff..8daeb7d7032c 100644 --- a/Documentation/vDSO/vdso_test.c +++ b/Documentation/vDSO/vdso_test.c @@ -1,111 +1,52 @@ /* - * vdso_test.c: Sample code to test parse_vdso.c on x86_64 - * Copyright (c) 2011 Andy Lutomirski + * vdso_test.c: Sample code to test parse_vdso.c + * Copyright (c) 2014 Andy Lutomirski * Subject to the GNU General Public License, version 2 * - * You can amuse yourself by compiling with: - * gcc -std=gnu99 -nostdlib - * -Os -fno-asynchronous-unwind-tables -flto - * vdso_test.c parse_vdso.c -o vdso_test - * to generate a small binary with no dependencies at all. + * Compile with: + * gcc -std=gnu99 vdso_test.c parse_vdso.c + * + * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too. */ -#include <sys/syscall.h> -#include <sys/time.h> -#include <unistd.h> #include <stdint.h> +#include <elf.h> +#include <stdio.h> +#include <sys/auxv.h> +#include <sys/time.h> extern void *vdso_sym(const char *version, const char *name); extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); extern void vdso_init_from_auxv(void *auxv); -/* We need a libc functions... */ -int strcmp(const char *a, const char *b) +int main(int argc, char **argv) { - /* This implementation is buggy: it never returns -1. */ - while (*a || *b) { - if (*a != *b) - return 1; - if (*a == 0 || *b == 0) - return 1; - a++; - b++; + unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + if (!sysinfo_ehdr) { + printf("AT_SYSINFO_EHDR is not present!\n"); + return 0; } - return 0; -} - -/* ...and two syscalls. This is x86_64-specific. */ -static inline long linux_write(int fd, const void *data, size_t len) -{ - - long ret; - asm volatile ("syscall" : "=a" (ret) : "a" (__NR_write), - "D" (fd), "S" (data), "d" (len) : - "cc", "memory", "rcx", - "r8", "r9", "r10", "r11" ); - return ret; -} - -static inline void linux_exit(int code) -{ - asm volatile ("syscall" : : "a" (__NR_exit), "D" (code)); -} - -void to_base10(char *lastdig, uint64_t n) -{ - while (n) { - *lastdig = (n % 10) + '0'; - n /= 10; - lastdig--; - } -} - -__attribute__((externally_visible)) void c_main(void **stack) -{ - /* Parse the stack */ - long argc = (long)*stack; - stack += argc + 2; - - /* Now we're pointing at the environment. Skip it. */ - while(*stack) - stack++; - stack++; - - /* Now we're pointing at auxv. Initialize the vDSO parser. */ - vdso_init_from_auxv((void *)stack); + vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); /* Find gettimeofday. */ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); - if (!gtod) - linux_exit(1); + if (!gtod) { + printf("Could not find __vdso_gettimeofday\n"); + return 1; + } struct timeval tv; long ret = gtod(&tv, 0); if (ret == 0) { - char buf[] = "The time is .000000\n"; - to_base10(buf + 31, tv.tv_sec); - to_base10(buf + 38, tv.tv_usec); - linux_write(1, buf, sizeof(buf) - 1); + printf("The time is %lld.%06lld\n", + (long long)tv.tv_sec, (long long)tv.tv_usec); } else { - linux_exit(ret); + printf("__vdso_gettimeofday failed\n"); } - linux_exit(0); + return 0; } - -/* - * This is the real entry point. It passes the initial stack into - * the C entry point. - */ -asm ( - ".text\n" - ".global _start\n" - ".type _start,@function\n" - "_start:\n\t" - "mov %rsp,%rdi\n\t" - "jmp c_main" - ); @@ -105,10 +105,6 @@ ifeq ("$(origin O)", "command line") KBUILD_OUTPUT := $(O) endif -ifeq ("$(origin W)", "command line") - export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W) -endif - # That's our default target when none is given on the command line PHONY := _all _all: @@ -153,8 +149,18 @@ else _all: modules endif -srctree := $(if $(KBUILD_SRC),$(KBUILD_SRC),$(CURDIR)) -objtree := $(CURDIR) +ifeq ($(KBUILD_SRC),) + # building in the source tree + srctree := . +else + ifeq ($(KBUILD_SRC)/,$(dir $(CURDIR))) + # building in a subdirectory of the source tree + srctree := .. + else + srctree := $(KBUILD_SRC) + endif +endif +objtree := . src := $(srctree) obj := $(objtree) @@ -166,7 +172,7 @@ export srctree objtree VPATH # SUBARCH tells the usermode build what the underlying arch is. That is set # first, and if a usermode build is happening, the "ARCH=um" on the command # line overrides the setting of ARCH below. If a native build is happening, -# then ARCH is assigned, getting whatever value it gets normally, and +# then ARCH is assigned, getting whatever value it gets normally, and # SUBARCH is subsequently ignored. SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ @@ -259,18 +265,18 @@ endif KBUILD_MODULES := KBUILD_BUILTIN := 1 -# If we have only "make modules", don't compile built-in objects. -# When we're building modules with modversions, we need to consider -# the built-in objects during the descend as well, in order to -# make sure the checksums are up to date before we record them. +# If we have only "make modules", don't compile built-in objects. +# When we're building modules with modversions, we need to consider +# the built-in objects during the descend as well, in order to +# make sure the checksums are up to date before we record them. ifeq ($(MAKECMDGOALS),modules) KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1) endif -# If we have "make <whatever> modules", compile modules -# in addition to whatever we do anyway. -# Just "make" or "make all" shall build modules as well +# If we have "make <whatever> modules", compile modules +# in addition to whatever we do anyway. +# Just "make" or "make all" shall build modules as well ifneq ($(filter all _all modules,$(MAKECMDGOALS)),) KBUILD_MODULES := 1 @@ -294,7 +300,7 @@ export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD # cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< # # If $(quiet) is empty, the whole command will be printed. -# If it is set to "quiet_", only the short version will be printed. +# If it is set to "quiet_", only the short version will be printed. # If it is set to "silent_", nothing will be printed at all, since # the variable $(silent_cmd_cc_o_c) doesn't exist. # @@ -346,7 +352,6 @@ $(srctree)/scripts/Kbuild.include: ; include $(srctree)/scripts/Kbuild.include # Make variables (CC, etc...) - AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld CC = $(CROSS_COMPILE)gcc @@ -395,8 +400,8 @@ KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ - -Wno-format-security \ - $(call cc-option,-fno-delete-null-pointer-checks,) + -Wno-format-security + KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := KBUILD_AFLAGS := -D__ASSEMBLY__ @@ -504,8 +509,16 @@ ifeq ($(mixed-targets),1) # We're called with mixed targets (*config and build targets). # Handle them one by one. -%:: FORCE - $(Q)$(MAKE) -C $(srctree) KBUILD_SRC= $@ +PHONY += $(MAKECMDGOALS) __build_one_by_one + +$(filter-out __build_one_by_one, $(MAKECMDGOALS)): __build_one_by_one + @: + +__build_one_by_one: + $(Q)set -e; \ + for i in $(MAKECMDGOALS); do \ + $(MAKE) -f $(srctree)/Makefile $$i; \ + done else ifeq ($(config-targets),1) @@ -520,11 +533,9 @@ include $(srctree)/arch/$(SRCARCH)/Makefile export KBUILD_DEFCONFIG KBUILD_KCONFIG config: scripts_basic outputmakefile FORCE - $(Q)mkdir -p include/linux include/config $(Q)$(MAKE) $(build)=scripts/kconfig $@ %config: scripts_basic outputmakefile FORCE - $(Q)mkdir -p include/linux include/config $(Q)$(MAKE) $(build)=scripts/kconfig $@ else @@ -594,14 +605,16 @@ endif # $(dot-config) # Defaults to vmlinux, but the arch makefile usually adds further targets all: vmlinux +include $(srctree)/arch/$(SRCARCH)/Makefile + +KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) + ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os $(call cc-disable-warning,maybe-uninitialized,) else KBUILD_CFLAGS += -O2 endif -include $(srctree)/arch/$(SRCARCH)/Makefile - ifdef CONFIG_READABLE_ASM # Disable optimizations that make assembler listings hard to read. # reorder blocks reorders the control in the function @@ -731,6 +744,8 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO endif +include $(srctree)/scripts/Makefile.extrawarn + # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments KBUILD_CPPFLAGS += $(KCPPFLAGS) KBUILD_AFLAGS += $(KAFLAGS) @@ -775,10 +790,10 @@ MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) export MODLIB # -# INSTALL_MOD_STRIP, if defined, will cause modules to be -# stripped after they are installed. If INSTALL_MOD_STRIP is '1', then -# the default option --strip-debug will be used. Otherwise, -# INSTALL_MOD_STRIP value will be used as the options to the strip command. +# INSTALL_MOD_STRIP, if defined, will cause modules to be +# stripped after they are installed. If INSTALL_MOD_STRIP is '1', then +# the default option --strip-debug will be used. Otherwise, +# INSTALL_MOD_STRIP value will be used as the options to the strip command. ifdef INSTALL_MOD_STRIP ifeq ($(INSTALL_MOD_STRIP),1) @@ -863,7 +878,7 @@ ifdef CONFIG_BUILD_DOCSRC endif +$(call if_changed,link-vmlinux) -# The actual objects are generated when descending, +# The actual objects are generated when descending, # make sure no implicit rule kicks in $(sort $(vmlinux-deps)): $(vmlinux-dirs) ; @@ -1021,11 +1036,11 @@ ifdef CONFIG_MODULES all: modules -# Build modules +# Build modules # -# A module can be listed more than once in obj-m resulting in -# duplicate lines in modules.order files. Those are removed -# using awk while concatenating to the final file. +# A module can be listed more than once in obj-m resulting in +# duplicate lines in modules.order files. Those are removed +# using awk while concatenating to the final file. PHONY += modules modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) modules.builtin @@ -1054,10 +1069,10 @@ _modinst_: @rm -rf $(MODLIB)/kernel @rm -f $(MODLIB)/source @mkdir -p $(MODLIB)/kernel - @ln -s $(srctree) $(MODLIB)/source + @ln -s `cd $(srctree) && /bin/pwd` $(MODLIB)/source @if [ ! $(objtree) -ef $(MODLIB)/build ]; then \ rm -f $(MODLIB)/build ; \ - ln -s $(objtree) $(MODLIB)/build ; \ + ln -s $(CURDIR) $(MODLIB)/build ; \ fi @cp -f $(objtree)/modules.order $(MODLIB)/ @cp -f $(objtree)/modules.builtin $(MODLIB)/ @@ -1104,7 +1119,7 @@ CLEAN_DIRS += $(MODVERDIR) # Directories & files removed with 'make mrproper' MRPROPER_DIRS += include/config usr/include include/generated \ - arch/*/include/generated .tmp_objdiff + arch/*/include/generated .tmp_objdiff MRPROPER_FILES += .config .config.old .version .old_version $(version_h) \ Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \ signing_key.priv signing_key.x509 x509.genkey \ @@ -1478,7 +1493,7 @@ endif $(build)=$(build-dir) $(@:.ko=.o) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost -# FIXME Should go into a make.lib or something +# FIXME Should go into a make.lib or something # =========================================================================== quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN $(wildcard $(rm-dirs))) diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index c7676871d9c0..b03cfe49d22b 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -26,7 +26,7 @@ clock-frequency = <0>; }; - atlclkin3_ck: atlclkin3_ck { + atl_clkin3_ck: atl_clkin3_ck { #clock-cells = <0>; compatible = "fixed-clock"; clock-frequency = <0>; @@ -277,7 +277,7 @@ dpll_mpu_ck: dpll_mpu_ck { #clock-cells = <0>; - compatible = "ti,omap4-dpll-clock"; + compatible = "ti,omap5-mpu-dpll-clock"; clocks = <&sys_clkin1>, <&mpu_dpll_hs_clk_div>; reg = <0x0160>, <0x0164>, <0x016c>, <0x0168>; }; @@ -730,7 +730,7 @@ mcasp1_ahclkr_mux: mcasp1_ahclkr_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <28>; reg = <0x0550>; }; @@ -738,7 +738,7 @@ mcasp1_ahclkx_mux: mcasp1_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x0550>; }; @@ -1639,7 +1639,7 @@ mcasp2_ahclkr_mux: mcasp2_ahclkr_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <28>; reg = <0x1860>; }; @@ -1647,7 +1647,7 @@ mcasp2_ahclkx_mux: mcasp2_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1860>; }; @@ -1663,7 +1663,7 @@ mcasp3_ahclkx_mux: mcasp3_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1868>; }; @@ -1679,7 +1679,7 @@ mcasp4_ahclkx_mux: mcasp4_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1898>; }; @@ -1695,7 +1695,7 @@ mcasp5_ahclkx_mux: mcasp5_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1878>; }; @@ -1711,7 +1711,7 @@ mcasp6_ahclkx_mux: mcasp6_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1904>; }; @@ -1727,7 +1727,7 @@ mcasp7_ahclkx_mux: mcasp7_ahclkx_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <24>; reg = <0x1908>; }; @@ -1743,7 +1743,7 @@ mcasp8_ahclk_mux: mcasp8_ahclk_mux { #clock-cells = <0>; compatible = "ti,mux-clock"; - clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atlclkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; + clocks = <&abe_24m_fclk>, <&abe_sys_clk_div>, <&func_24m_clk>, <&atl_clkin3_ck>, <&atl_clkin2_ck>, <&atl_clkin1_ck>, <&atl_clkin0_ck>, <&sys_clkin2>, <&ref_clkin0_ck>, <&ref_clkin1_ck>, <&ref_clkin2_ck>, <&ref_clkin3_ck>, <&mlb_clk>, <&mlbp_clk>; ti,bit-shift = <22>; reg = <0x1890>; }; diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi index aeb142ce8e9d..e67a23b5d788 100644 --- a/arch/arm/boot/dts/omap54xx-clocks.dtsi +++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi @@ -335,7 +335,7 @@ dpll_mpu_ck: dpll_mpu_ck { #clock-cells = <0>; - compatible = "ti,omap4-dpll-clock"; + compatible = "ti,omap5-mpu-dpll-clock"; clocks = <&sys_clkin>, <&mpu_dpll_hs_clk_div>; reg = <0x0160>, <0x0164>, <0x016c>, <0x0168>; }; diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index a6bc431cde70..4238bcba9d60 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -410,7 +410,7 @@ __hw_perf_event_init(struct perf_event *event) */ hwc->config_base |= (unsigned long)mapping; - if (!hwc->sample_period) { + if (!is_sampling_event(event)) { /* * For non-sampling runs, limit the sample_period to half * of the counter width. That way, the new counter value diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index a71ae1523620..af9e35e8836f 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -126,8 +126,8 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) irqs = min(pmu_device->num_resources, num_possible_cpus()); if (irqs < 1) { - pr_err("no irqs for PMUs defined\n"); - return -ENODEV; + printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; } irq = platform_get_irq(pmu_device, 0); @@ -191,6 +191,10 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) /* Ensure the PMU has sane values out of reset. */ if (cpu_pmu->reset) on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; } /* diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 3997c411c140..9d853189028b 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -26,30 +26,30 @@ #include <asm/topology.h> /* - * cpu power scale management + * cpu capacity scale management */ /* - * cpu power table + * cpu capacity table * This per cpu data structure describes the relative capacity of each core. * On a heteregenous system, cores don't have the same computation capacity - * and we reflect that difference in the cpu_power field so the scheduler can - * take this difference into account during load balance. A per cpu structure - * is preferred because each CPU updates its own cpu_power field during the - * load balance except for idle cores. One idle core is selected to run the - * rebalance_domains for all idle cores and the cpu_power can be updated - * during this sequence. + * and we reflect that difference in the cpu_capacity field so the scheduler + * can take this difference into account during load balance. A per cpu + * structure is preferred because each CPU updates its own cpu_capacity field + * during the load balance except for idle cores. One idle core is selected + * to run the rebalance_domains for all idle cores and the cpu_capacity can be + * updated during this sequence. */ static DEFINE_PER_CPU(unsigned long, cpu_scale); -unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) { return per_cpu(cpu_scale, cpu); } -static void set_power_scale(unsigned int cpu, unsigned long power) +static void set_capacity_scale(unsigned int cpu, unsigned long capacity) { - per_cpu(cpu_scale, cpu) = power; + per_cpu(cpu_scale, cpu) = capacity; } #ifdef CONFIG_OF @@ -62,11 +62,11 @@ struct cpu_efficiency { * Table of relative efficiency of each processors * The efficiency value must fit in 20bit and the final * cpu_scale value must be in the range - * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * 0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2 * in order to return at most 1 when DIV_ROUND_CLOSEST * is used to compute the capacity of a CPU. * Processors that are not defined in the table, - * use the default SCHED_POWER_SCALE value for cpu_scale. + * use the default SCHED_CAPACITY_SCALE value for cpu_scale. */ static const struct cpu_efficiency table_efficiency[] = { {"arm,cortex-a15", 3891}, @@ -83,9 +83,9 @@ static unsigned long middle_capacity = 1; * Iterate all CPUs' descriptor in DT and compute the efficiency * (as per table_efficiency). Also calculate a middle efficiency * as close as possible to (max{eff_i} - min{eff_i}) / 2 - * This is later used to scale the cpu_power field such that an - * 'average' CPU is of middle power. Also see the comments near - * table_efficiency[] and update_cpu_power(). + * This is later used to scale the cpu_capacity field such that an + * 'average' CPU is of middle capacity. Also see the comments near + * table_efficiency[] and update_cpu_capacity(). */ static void __init parse_dt_topology(void) { @@ -141,15 +141,15 @@ static void __init parse_dt_topology(void) * cpu_scale because all CPUs have the same capacity. Otherwise, we * compute a middle_capacity factor that will ensure that the capacity * of an 'average' CPU of the system will be as close as possible to - * SCHED_POWER_SCALE, which is the default value, but with the + * SCHED_CAPACITY_SCALE, which is the default value, but with the * constraint explained near table_efficiency[]. */ if (4*max_capacity < (3*(max_capacity + min_capacity))) middle_capacity = (min_capacity + max_capacity) - >> (SCHED_POWER_SHIFT+1); + >> (SCHED_CAPACITY_SHIFT+1); else middle_capacity = ((max_capacity / 3) - >> (SCHED_POWER_SHIFT-1)) + 1; + >> (SCHED_CAPACITY_SHIFT-1)) + 1; } @@ -158,20 +158,20 @@ static void __init parse_dt_topology(void) * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the * function returns directly for SMP system. */ -static void update_cpu_power(unsigned int cpu) +static void update_cpu_capacity(unsigned int cpu) { if (!cpu_capacity(cpu)) return; - set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity); + set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); - printk(KERN_INFO "CPU%u: update cpu_power %lu\n", - cpu, arch_scale_freq_power(NULL, cpu)); + printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n", + cpu, arch_scale_freq_capacity(NULL, cpu)); } #else static inline void parse_dt_topology(void) {} -static inline void update_cpu_power(unsigned int cpuid) {} +static inline void update_cpu_capacity(unsigned int cpuid) {} #endif /* @@ -267,7 +267,7 @@ void store_cpu_topology(unsigned int cpuid) update_siblings_masks(cpuid); - update_cpu_power(cpuid); + update_cpu_capacity(cpuid); printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", cpuid, cpu_topology[cpuid].thread_id, @@ -297,7 +297,7 @@ void __init init_cpu_topology(void) { unsigned int cpu; - /* init core mask and power*/ + /* init core mask and capacity */ for_each_possible_cpu(cpu) { struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); @@ -307,7 +307,7 @@ void __init init_cpu_topology(void) cpumask_clear(&cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); - set_power_scale(cpu, SCHED_POWER_SCALE); + set_capacity_scale(cpu, SCHED_CAPACITY_SCALE); } smp_wmb(); diff --git a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c index b935ed2922d8..85e0b0c06718 100644 --- a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c +++ b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c @@ -208,3 +208,56 @@ void omap2xxx_clkt_vps_late_init(void) clk_put(c); } } + +#ifdef CONFIG_OF +#include <linux/clk-provider.h> +#include <linux/clkdev.h> + +static const struct clk_ops virt_prcm_set_ops = { + .recalc_rate = &omap2_table_mpu_recalc, + .set_rate = &omap2_select_table_rate, + .round_rate = &omap2_round_to_table_rate, +}; + +/** + * omap2xxx_clkt_vps_init - initialize virt_prcm_set clock + * + * Does a manual init for the virtual prcm DVFS clock for OMAP2. This + * function is called only from omap2 DT clock init, as the virtual + * node is not modelled in the DT clock data. + */ +void omap2xxx_clkt_vps_init(void) +{ + struct clk_init_data init = { NULL }; + struct clk_hw_omap *hw = NULL; + struct clk *clk; + const char *parent_name = "mpu_ck"; + struct clk_lookup *lookup = NULL; + + omap2xxx_clkt_vps_late_init(); + omap2xxx_clkt_vps_check_bootloader_rates(); + + hw = kzalloc(sizeof(*hw), GFP_KERNEL); + lookup = kzalloc(sizeof(*lookup), GFP_KERNEL); + if (!hw || !lookup) + goto cleanup; + init.name = "virt_prcm_set"; + init.ops = &virt_prcm_set_ops; + init.parent_names = &parent_name; + init.num_parents = 1; + + hw->hw.init = &init; + + clk = clk_register(NULL, &hw->hw); + + lookup->dev_id = NULL; + lookup->con_id = "cpufreq_ck"; + lookup->clk = clk; + + clkdev_add(lookup); + return; +cleanup: + kfree(hw); + kfree(lookup); +} +#endif diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index bda767a9dea8..12f54d428d7c 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -178,17 +178,6 @@ struct clksel { const struct clksel_rate *rates; }; -struct clk_hw_omap_ops { - void (*find_idlest)(struct clk_hw_omap *oclk, - void __iomem **idlest_reg, - u8 *idlest_bit, u8 *idlest_val); - void (*find_companion)(struct clk_hw_omap *oclk, - void __iomem **other_reg, - u8 *other_bit); - void (*allow_idle)(struct clk_hw_omap *oclk); - void (*deny_idle)(struct clk_hw_omap *oclk); -}; - unsigned long omap_fixed_divisor_recalc(struct clk_hw *hw, unsigned long parent_rate); @@ -279,8 +268,6 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_apll54; extern const struct clk_hw_omap_ops clkhwops_apll96; -extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; -extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; /* clksel_rate blocks shared between OMAP44xx and AM33xx */ extern const struct clksel_rate div_1_0_rates[]; diff --git a/arch/arm/mach-omap2/clock2xxx.h b/arch/arm/mach-omap2/clock2xxx.h index 539dc08afbba..45f41a411603 100644 --- a/arch/arm/mach-omap2/clock2xxx.h +++ b/arch/arm/mach-omap2/clock2xxx.h @@ -21,10 +21,6 @@ unsigned long omap2xxx_sys_clk_recalc(struct clk_hw *clk, unsigned long parent_rate); unsigned long omap2_osc_clk_recalc(struct clk_hw *clk, unsigned long parent_rate); -unsigned long omap2_dpllcore_recalc(struct clk_hw *hw, - unsigned long parent_rate); -int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate, - unsigned long parent_rate); void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw); unsigned long omap2_clk_apll54_recalc(struct clk_hw *hw, unsigned long parent_rate); diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index fcd8036af910..6d7ba37e2257 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -319,6 +319,15 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel) /* Set DPLL multiplier, divider */ v = omap2_clk_readl(clk, dd->mult_div1_reg); + + /* Handle Duty Cycle Correction */ + if (dd->dcc_mask) { + if (dd->last_rounded_rate >= dd->dcc_rate) + v |= dd->dcc_mask; /* Enable DCC */ + else + v &= ~dd->dcc_mask; /* Disable DCC */ + } + v &= ~(dd->mult_mask | dd->div1_mask); v |= dd->last_rounded_m << __ffs(dd->mult_mask); v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask); diff --git a/arch/blackfin/configs/BF526-EZBRD_defconfig b/arch/blackfin/configs/BF526-EZBRD_defconfig index 1759fad54017..e66ba31ef84d 100644 --- a/arch/blackfin/configs/BF526-EZBRD_defconfig +++ b/arch/blackfin/configs/BF526-EZBRD_defconfig @@ -53,7 +53,6 @@ CONFIG_IP_PNP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -63,6 +62,7 @@ CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=m +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig index 357729682c00..0207c588c19f 100644 --- a/arch/blackfin/configs/BF527-EZKIT-V2_defconfig +++ b/arch/blackfin/configs/BF527-EZKIT-V2_defconfig @@ -58,7 +58,6 @@ CONFIG_BFIN_SIR0=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=m CONFIG_MTD_RAM=y @@ -66,6 +65,7 @@ CONFIG_MTD_ROM=m CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=m +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/blackfin/configs/BF527-EZKIT_defconfig b/arch/blackfin/configs/BF527-EZKIT_defconfig index 2e73a5d33da8..99c131ba7d90 100644 --- a/arch/blackfin/configs/BF527-EZKIT_defconfig +++ b/arch/blackfin/configs/BF527-EZKIT_defconfig @@ -57,7 +57,6 @@ CONFIG_BFIN_SIR0=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=m CONFIG_MTD_RAM=y @@ -65,6 +64,7 @@ CONFIG_MTD_ROM=m CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=m +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/blackfin/configs/BF548-EZKIT_defconfig b/arch/blackfin/configs/BF548-EZKIT_defconfig index f0a2ddf5de46..38cb17d218d4 100644 --- a/arch/blackfin/configs/BF548-EZKIT_defconfig +++ b/arch/blackfin/configs/BF548-EZKIT_defconfig @@ -64,7 +64,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -75,6 +74,7 @@ CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_BF5XX=y # CONFIG_MTD_NAND_BF5XX_HWECC is not set +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig index 4ca39ab6b2bf..a7e9bfd84183 100644 --- a/arch/blackfin/configs/BF609-EZKIT_defconfig +++ b/arch/blackfin/configs/BF609-EZKIT_defconfig @@ -57,7 +57,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -65,6 +64,7 @@ CONFIG_MTD_CFI_STAA=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_M25P80=y +CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y diff --git a/arch/blackfin/configs/BlackStamp_defconfig b/arch/blackfin/configs/BlackStamp_defconfig index 3853c473b443..f4a9200e1ab1 100644 --- a/arch/blackfin/configs/BlackStamp_defconfig +++ b/arch/blackfin/configs/BlackStamp_defconfig @@ -45,7 +45,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=m CONFIG_MTD_CFI_AMDSTD=m @@ -53,7 +52,7 @@ CONFIG_MTD_RAM=y CONFIG_MTD_ROM=m CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y -# CONFIG_M25PXX_USE_FAST_READ is not set +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/blackfin/configs/H8606_defconfig b/arch/blackfin/configs/H8606_defconfig index f754e490bbfd..0ff97d8d047a 100644 --- a/arch/blackfin/configs/H8606_defconfig +++ b/arch/blackfin/configs/H8606_defconfig @@ -36,13 +36,12 @@ CONFIG_IRTTY_SIR=m # CONFIG_WIRELESS is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_RAM=y CONFIG_MTD_ROM=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_M25P80=y -# CONFIG_M25PXX_USE_FAST_READ is not set +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_RAM=y CONFIG_MISC_DEVICES=y CONFIG_EEPROM_AT25=y diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h index 8d1e4c2d2c36..40e9c2bbc6e3 100644 --- a/arch/blackfin/include/asm/dma.h +++ b/arch/blackfin/include/asm/dma.h @@ -316,6 +316,8 @@ static inline void disable_dma(unsigned int channel) } static inline void enable_dma(unsigned int channel) { + dma_ch[channel].regs->curr_x_count = 0; + dma_ch[channel].regs->curr_y_count = 0; dma_ch[channel].regs->cfg |= DMAEN; } int set_dma_callback(unsigned int channel, irq_handler_t callback, void *data); diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c index d0989290f54c..6f4bac969bf7 100644 --- a/arch/blackfin/mach-bf533/boards/stamp.c +++ b/arch/blackfin/mach-bf533/boards/stamp.c @@ -17,6 +17,7 @@ #if IS_ENABLED(CONFIG_USB_ISP1362_HCD) #include <linux/usb/isp1362.h> #endif +#include <linux/gpio.h> #include <linux/irq.h> #include <linux/i2c.h> #include <asm/dma.h> diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 21c9f304e96c..790352f93700 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -235,11 +235,6 @@ config PPC_EARLY_DEBUG_USBGECKO Select this to enable early debugging for Nintendo GameCube/Wii consoles via an external USB Gecko adapter. -config PPC_EARLY_DEBUG_WSP - bool "Early debugging via WSP's internal UART" - depends on PPC_WSP - select PPC_UDBG_16550 - config PPC_EARLY_DEBUG_PS3GELIC bool "Early debugging through the PS3 Ethernet port" depends on PPC_PS3 diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 426dce7ae7c4..ccc25eddbcb8 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -333,8 +333,8 @@ $(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz $(obj)/zImage.initrd.%: vmlinux $(wrapperbits) $(call if_changed,wrap,$*,,,$(obj)/ramdisk.image.gz) -$(obj)/zImage.%: vmlinux $(wrapperbits) - $(call if_changed,wrap,$*) +$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits) + $(call if_changed,wrap,$(subst $(obj)/zImage.,,$@)) # dtbImage% - a dtbImage is a zImage with an embedded device tree blob $(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig deleted file mode 100644 index 4f35fc462385..000000000000 --- a/arch/powerpc/configs/chroma_defconfig +++ /dev/null @@ -1,307 +0,0 @@ -CONFIG_PPC64=y -CONFIG_PPC_BOOK3E_64=y -# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set -CONFIG_SMP=y -CONFIG_NR_CPUS=256 -CONFIG_EXPERIMENTAL=y -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_AUDIT=y -CONFIG_AUDITSYSCALL=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=19 -CONFIG_CGROUPS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEMCG=y -CONFIG_CGROUP_MEMCG_SWAP=y -CONFIG_NAMESPACES=y -CONFIG_RELAY=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_INITRAMFS_COMPRESSION_GZIP=y -CONFIG_KALLSYMS_ALL=y -CONFIG_EMBEDDED=y -CONFIG_PERF_EVENTS=y -CONFIG_PROFILING=y -CONFIG_OPROFILE=y -CONFIG_KPROBES=y -CONFIG_MODULES=y -CONFIG_MODULE_FORCE_LOAD=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_SCOM_DEBUGFS=y -CONFIG_PPC_A2_DD2=y -CONFIG_KVM_GUEST=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_HZ_100=y -# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set -CONFIG_BINFMT_MISC=y -CONFIG_NUMA=y -# CONFIG_MIGRATION is not set -CONFIG_PPC_64K_PAGES=y -CONFIG_SCHED_SMT=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="" -# CONFIG_SECCOMP is not set -CONFIG_PCIEPORTBUS=y -# CONFIG_PCIEASPM is not set -CONFIG_PCI_MSI=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=m -CONFIG_XFRM_SUB_POLICY=y -CONFIG_XFRM_STATISTICS=y -CONFIG_NET_KEY=m -CONFIG_NET_KEY_MIGRATE=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_NET_IPIP=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_IPV6=y -CONFIG_IPV6_PRIVACY=y -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_IPV6_ROUTE_INFO=y -CONFIG_IPV6_OPTIMISTIC_DAD=y -CONFIG_INET6_AH=y -CONFIG_INET6_ESP=y -CONFIG_INET6_IPCOMP=y -CONFIG_IPV6_MIP6=y -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=y -CONFIG_IPV6_TUNNEL=y -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_IPV6_MROUTE=y -CONFIG_IPV6_PIMSM_V2=y -CONFIG_NETFILTER=y -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CT_PROTO_UDPLITE=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_NET_TCPPROBE=y -# CONFIG_WIRELESS is not set -CONFIG_NET_9P=y -CONFIG_NET_9P_DEBUG=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_DEVTMPFS=y -CONFIG_MTD=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_LE_BYTE_SWAP=y -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_CFI_STAA=y -CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=y -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=65536 -CONFIG_CDROM_PKTCDVD=y -CONFIG_MISC_DEVICES=y -CONFIG_BLK_DEV_SD=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_ISCSI_ATTRS=m -CONFIG_SCSI_SAS_ATTRS=m -CONFIG_SCSI_SRP_ATTRS=y -CONFIG_ATA=y -CONFIG_SATA_AHCI=y -CONFIG_SATA_SIL24=y -CONFIG_SATA_MV=y -CONFIG_SATA_SIL=y -CONFIG_PATA_CMD64X=y -CONFIG_PATA_MARVELL=y -CONFIG_PATA_SIL680=y -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=y -CONFIG_BLK_DEV_DM=y -CONFIG_DM_CRYPT=y -CONFIG_DM_SNAPSHOT=y -CONFIG_DM_MIRROR=y -CONFIG_DM_ZERO=y -CONFIG_DM_UEVENT=y -CONFIG_NETDEVICES=y -CONFIG_TUN=y -CONFIG_E1000E=y -CONFIG_TIGON3=y -# CONFIG_WLAN is not set -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_DEVPTS_MULTIPLE_INSTANCES=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_HW_RANDOM=y -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=1024 -# CONFIG_HWMON is not set -# CONFIG_VGA_ARB is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EDAC=y -CONFIG_EDAC_MM_EDAC=y -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_DS1511=y -CONFIG_RTC_DRV_DS1553=y -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_EXT4_FS=y -# CONFIG_DNOTIFY is not set -CONFIG_FUSE_FS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_CONFIGFS_FS=m -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_NFS_V4_1=y -CONFIG_ROOT_NFS=y -CONFIG_CIFS=y -CONFIG_CIFS_WEAK_PW_HASH=y -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=y -CONFIG_NLS_ISO8859_1=y -CONFIG_CRC_CCITT=m -CONFIG_CRC_T10DIF=y -CONFIG_LIBCRC32C=m -CONFIG_PRINTK_TIME=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_STRIP_ASM_SYMS=y -CONFIG_DETECT_HUNG_TASK=y -# CONFIG_SCHED_DEBUG is not set -CONFIG_DEBUG_INFO=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_PPC_EMULATED_STATS=y -CONFIG_XMON=y -CONFIG_XMON_DEFAULT=y -CONFIG_IRQ_DOMAIN_DEBUG=y -CONFIG_PPC_EARLY_DEBUG=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y -CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_LZO=m -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_VIRTUALIZATION=y diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index f42e9baf3a4e..7c8608b09694 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -489,7 +489,6 @@ typedef struct scc_trans { #define FCC_GFMR_TCI ((uint)0x20000000) #define FCC_GFMR_TRX ((uint)0x10000000) #define FCC_GFMR_TTX ((uint)0x08000000) -#define FCC_GFMR_TTX ((uint)0x08000000) #define FCC_GFMR_CDP ((uint)0x04000000) #define FCC_GFMR_CTSP ((uint)0x02000000) #define FCC_GFMR_CDS ((uint)0x01000000) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index b76f58c124ca..fab7743c2640 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -254,6 +254,7 @@ void *eeh_pe_traverse(struct eeh_pe *root, void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag); void eeh_pe_restore_bars(struct eeh_pe *pe); +const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); void *eeh_dev_init(struct device_node *dn, void *data); diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index 89d5670b2eeb..1e551a2d6f82 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -33,7 +33,7 @@ struct eeh_event { int eeh_event_init(void); int eeh_send_failure_event(struct eeh_pe *pe); -void eeh_remove_event(struct eeh_pe *pe); +void eeh_remove_event(struct eeh_pe *pe, bool force); void eeh_handle_event(struct eeh_pe *pe); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 901dac6b6cb7..d0918e09557f 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -223,10 +223,6 @@ typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; -#ifdef CONFIG_PPC_ICSWX - struct spinlock *cop_lockp; /* guard cop related stuff */ - unsigned long acop; /* mask of enabled coprocessor types */ -#endif /* CONFIG_PPC_ICSWX */ #ifdef CONFIG_PPC_MM_SLICES u64 low_slices_psize; /* SLB page size encodings */ u64 high_slices_psize; /* 4 bits per slice for now */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index cb15cbb51600..460018889ba9 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -599,9 +599,9 @@ enum { }; struct OpalIoPhbErrorCommon { - uint32_t version; - uint32_t ioType; - uint32_t len; + __be32 version; + __be32 ioType; + __be32 len; }; struct OpalIoP7IOCPhbErrorData { @@ -666,64 +666,64 @@ struct OpalIoP7IOCPhbErrorData { struct OpalIoPhb3ErrorData { struct OpalIoPhbErrorCommon common; - uint32_t brdgCtl; + __be32 brdgCtl; /* PHB3 UTL regs */ - uint32_t portStatusReg; - uint32_t rootCmplxStatus; - uint32_t busAgentStatus; + __be32 portStatusReg; + __be32 rootCmplxStatus; + __be32 busAgentStatus; /* PHB3 cfg regs */ - uint32_t deviceStatus; - uint32_t slotStatus; - uint32_t linkStatus; - uint32_t devCmdStatus; - uint32_t devSecStatus; + __be32 deviceStatus; + __be32 slotStatus; + __be32 linkStatus; + __be32 devCmdStatus; + __be32 devSecStatus; /* cfg AER regs */ - uint32_t rootErrorStatus; - uint32_t uncorrErrorStatus; - uint32_t corrErrorStatus; - uint32_t tlpHdr1; - uint32_t tlpHdr2; - uint32_t tlpHdr3; - uint32_t tlpHdr4; - uint32_t sourceId; + __be32 rootErrorStatus; + __be32 uncorrErrorStatus; + __be32 corrErrorStatus; + __be32 tlpHdr1; + __be32 tlpHdr2; + __be32 tlpHdr3; + __be32 tlpHdr4; + __be32 sourceId; - uint32_t rsv3; + __be32 rsv3; /* Record data about the call to allocate a buffer */ - uint64_t errorClass; - uint64_t correlator; + __be64 errorClass; + __be64 correlator; - uint64_t nFir; /* 000 */ - uint64_t nFirMask; /* 003 */ - uint64_t nFirWOF; /* 008 */ + __be64 nFir; /* 000 */ + __be64 nFirMask; /* 003 */ + __be64 nFirWOF; /* 008 */ /* PHB3 MMIO Error Regs */ - uint64_t phbPlssr; /* 120 */ - uint64_t phbCsr; /* 110 */ - uint64_t lemFir; /* C00 */ - uint64_t lemErrorMask; /* C18 */ - uint64_t lemWOF; /* C40 */ - uint64_t phbErrorStatus; /* C80 */ - uint64_t phbFirstErrorStatus; /* C88 */ - uint64_t phbErrorLog0; /* CC0 */ - uint64_t phbErrorLog1; /* CC8 */ - uint64_t mmioErrorStatus; /* D00 */ - uint64_t mmioFirstErrorStatus; /* D08 */ - uint64_t mmioErrorLog0; /* D40 */ - uint64_t mmioErrorLog1; /* D48 */ - uint64_t dma0ErrorStatus; /* D80 */ - uint64_t dma0FirstErrorStatus; /* D88 */ - uint64_t dma0ErrorLog0; /* DC0 */ - uint64_t dma0ErrorLog1; /* DC8 */ - uint64_t dma1ErrorStatus; /* E00 */ - uint64_t dma1FirstErrorStatus; /* E08 */ - uint64_t dma1ErrorLog0; /* E40 */ - uint64_t dma1ErrorLog1; /* E48 */ - uint64_t pestA[OPAL_PHB3_NUM_PEST_REGS]; - uint64_t pestB[OPAL_PHB3_NUM_PEST_REGS]; + __be64 phbPlssr; /* 120 */ + __be64 phbCsr; /* 110 */ + __be64 lemFir; /* C00 */ + __be64 lemErrorMask; /* C18 */ + __be64 lemWOF; /* C40 */ + __be64 phbErrorStatus; /* C80 */ + __be64 phbFirstErrorStatus; /* C88 */ + __be64 phbErrorLog0; /* CC0 */ + __be64 phbErrorLog1; /* CC8 */ + __be64 mmioErrorStatus; /* D00 */ + __be64 mmioFirstErrorStatus; /* D08 */ + __be64 mmioErrorLog0; /* D40 */ + __be64 mmioErrorLog1; /* D48 */ + __be64 dma0ErrorStatus; /* D80 */ + __be64 dma0FirstErrorStatus; /* D88 */ + __be64 dma0ErrorLog0; /* DC0 */ + __be64 dma0ErrorLog1; /* DC8 */ + __be64 dma1ErrorStatus; /* E00 */ + __be64 dma1FirstErrorStatus; /* E08 */ + __be64 dma1ErrorLog0; /* E40 */ + __be64 dma1ErrorLog1; /* E48 */ + __be64 pestA[OPAL_PHB3_NUM_PEST_REGS]; + __be64 pestB[OPAL_PHB3_NUM_PEST_REGS]; }; enum { @@ -851,8 +851,8 @@ int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t erro int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action); int64_t opal_get_epow_status(__be64 *status); int64_t opal_set_system_attention_led(uint8_t led_action); -int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe, - uint16_t *pci_error_type, uint16_t *severity); +int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, + __be16 *pci_error_type, __be16 *severity); int64_t opal_pci_poll(uint64_t phb_id); int64_t opal_return_cpu(void); int64_t opal_reinit_cpus(uint64_t flags); diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h index 3d52a1132f3d..3ba9c6f096fc 100644 --- a/arch/powerpc/include/asm/reg_a2.h +++ b/arch/powerpc/include/asm/reg_a2.h @@ -110,15 +110,6 @@ #define TLB1_UR ASM_CONST(0x0000000000000002) #define TLB1_SR ASM_CONST(0x0000000000000001) -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP -#define WSP_UART_PHYS 0xffc000c000 -/* This needs to be careful chosen to hit a !0 congruence class - * in the TLB since we bolt it in way 3, which is already occupied - * by our linear mapping primary bolted entry in CC 0. - */ -#define WSP_UART_VIRT 0xf000000000001000 -#endif - /* A2 erativax attributes definitions */ #define ERATIVAX_RS_IS_ALL 0x000 #define ERATIVAX_RS_IS_TID 0x040 diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 0e83e7d8c73f..58abeda64cb7 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -16,13 +16,15 @@ struct thread_struct; extern struct task_struct *_switch(struct thread_struct *prev, struct thread_struct *next); #ifdef CONFIG_PPC_BOOK3S_64 -static inline void save_tar(struct thread_struct *prev) +static inline void save_early_sprs(struct thread_struct *prev) { if (cpu_has_feature(CPU_FTR_ARCH_207S)) prev->tar = mfspr(SPRN_TAR); + if (cpu_has_feature(CPU_FTR_DSCR)) + prev->dscr = mfspr(SPRN_DSCR); } #else -static inline void save_tar(struct thread_struct *prev) {} +static inline void save_early_sprs(struct thread_struct *prev) {} #endif extern void enable_kernel_fp(void); @@ -84,6 +86,8 @@ static inline void clear_task_ebb(struct task_struct *t) { #ifdef CONFIG_PPC_BOOK3S_64 /* EBB perf events are not inherited, so clear all EBB state. */ + t->thread.ebbrr = 0; + t->thread.ebbhr = 0; t->thread.bescr = 0; t->thread.mmcr2 = 0; t->thread.mmcr0 = 0; diff --git a/arch/powerpc/include/asm/wsp.h b/arch/powerpc/include/asm/wsp.h deleted file mode 100644 index c7dc83088a33..000000000000 --- a/arch/powerpc/include/asm/wsp.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright 2011 Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef __ASM_POWERPC_WSP_H -#define __ASM_POWERPC_WSP_H - -extern int wsp_get_chip_id(struct device_node *dn); - -#endif /* __ASM_POWERPC_WSP_H */ diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 5b7657959faa..de2c0e4ee1aa 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -41,5 +41,6 @@ #define PPC_FEATURE2_EBB 0x10000000 #define PPC_FEATURE2_ISEL 0x08000000 #define PPC_FEATURE2_TAR 0x04000000 +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fab19ec25597..670c312d914e 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -43,7 +43,6 @@ obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o -obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S deleted file mode 100644 index 61f079e05b61..000000000000 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ /dev/null @@ -1,120 +0,0 @@ -/* - * A2 specific assembly support code - * - * Copyright 2009 Ben Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/asm-offsets.h> -#include <asm/ppc_asm.h> -#include <asm/ppc-opcode.h> -#include <asm/processor.h> -#include <asm/reg_a2.h> -#include <asm/reg.h> -#include <asm/thread_info.h> - -/* - * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity. - * This also prevents external LPID accesses but that isn't a problem when not a - * guest. Under PV, this setting will be ignored and MMUCR will return the right - * number of PID bits we can use. - */ -#define MMUCR1_EXTEND_PID \ - (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \ - MMUCR1_DTTID | MMUCR1_DCCD) - -/* - * Use extended PIDs if enabled. - * Don't clear the ERATs on context sync events and enable I & D LRU. - * Enable ERAT back invalidate when tlbwe overwrites an entry. - */ -#define INITIAL_MMUCR1 \ - (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \ - MMUCR1_DRRE | MMUCR1_TLBWE_BINV) - -_GLOBAL(__setup_cpu_a2) - /* Some of these are actually thread local and some are - * core local but doing it always won't hurt - */ - -#ifdef CONFIG_PPC_ICSWX - /* Make sure ACOP starts out as zero */ - li r3,0 - mtspr SPRN_ACOP,r3 - - /* Skip the following if we are in Guest mode */ - mfmsr r3 - andis. r0,r3,MSR_GS@h - bne _icswx_skip_guest - - /* Enable icswx instruction */ - mfspr r3,SPRN_A2_CCR2 - ori r3,r3,A2_CCR2_ENABLE_ICSWX - mtspr SPRN_A2_CCR2,r3 - - /* Unmask all CTs in HACOP */ - li r3,-1 - mtspr SPRN_HACOP,r3 -_icswx_skip_guest: -#endif /* CONFIG_PPC_ICSWX */ - - /* Enable doorbell */ - mfspr r3,SPRN_A2_CCR2 - oris r3,r3,A2_CCR2_ENABLE_PC@h - mtspr SPRN_A2_CCR2,r3 - isync - - /* Setup CCR0 to disable power saving for now as it's busted - * in the current implementations. Setup CCR1 to wake on - * interrupts normally (we write the default value but who - * knows what FW may have clobbered...) - */ - li r3,0 - mtspr SPRN_A2_CCR0, r3 - LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f) - mtspr SPRN_A2_CCR1, r3 - - /* Initialise MMUCR1 */ - lis r3,INITIAL_MMUCR1@h - ori r3,r3,INITIAL_MMUCR1@l - mtspr SPRN_MMUCR1,r3 - - /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ - LOAD_REG_IMMEDIATE(r3, 0x000a7531) - mtspr SPRN_MMUCR2,r3 - - /* Set MMUCR3 to write all thids bit to the TLB */ - LOAD_REG_IMMEDIATE(r3, 0x0000000f) - mtspr SPRN_MMUCR3,r3 - - /* Don't do ERAT stuff if running guest mode */ - mfmsr r3 - andis. r0,r3,MSR_GS@h - bne 1f - - /* Now set the I-ERAT watermark to 15 */ - lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h - mtspr SPRN_MMUCR0, r4 - li r4,A2_IERAT_SIZE-1 - PPC_ERATWE(R4,R4,3) - - /* Now set the D-ERAT watermark to 31 */ - lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h - mtspr SPRN_MMUCR0, r4 - li r4,A2_DERAT_SIZE-1 - PPC_ERATWE(R4,R4,3) - - /* And invalidate the beast just in case. That won't get rid of - * a bolted entry though it will be in LRU and so will go away eventually - * but let's not bother for now - */ - PPC_ERATILX(0,0,R0) -1: - blr - -_GLOBAL(__restore_cpu_a2) - b __setup_cpu_a2 diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 1557e7c2c7e1..46733535cc0b 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -56,6 +56,7 @@ _GLOBAL(__setup_cpu_power8) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH bl __init_LPCR bl __init_HFSCR bl __init_tlb_power8 @@ -74,6 +75,7 @@ _GLOBAL(__restore_cpu_power8) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR + ori r3, r3, LPCR_PECEDH bl __init_LPCR bl __init_HFSCR bl __init_tlb_power8 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c1faade6506d..965291b4c2fa 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -109,7 +109,8 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE_PSERIES_PERFMON_COMPAT) #define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \ - PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR) + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) @@ -2148,44 +2149,6 @@ static struct cpu_spec __initdata cpu_specs[] = { } #endif /* CONFIG_PPC32 */ #endif /* CONFIG_E500 */ - -#ifdef CONFIG_PPC_A2 - { /* Standard A2 (>= DD2) + FPU core */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00480000, - .cpu_name = "A2 (>= DD2)", - .cpu_features = CPU_FTRS_A2, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTRS_A2, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 0, - .cpu_setup = __setup_cpu_a2, - .cpu_restore = __restore_cpu_a2, - .machine_check = machine_check_generic, - .platform = "ppca2", - }, - { /* This is a default entry to get going, to be replaced by - * a real one at some stage - */ -#define CPU_FTRS_BASE_BOOK3E (CPU_FTR_USE_TB | \ - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_SMT | \ - CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "Book3E", - .cpu_features = CPU_FTRS_BASE_BOOK3E, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_TYPE_3E | MMU_FTR_USE_TLBILX | - MMU_FTR_USE_TLBIVAX_BCAST | - MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 0, - .machine_check = machine_check_generic, - .platform = "power6", - }, -#endif /* CONFIG_PPC_A2 */ }; static struct cpu_spec the_cpu_spec; diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 7051ea3101b9..86e25702aaca 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -330,8 +330,8 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); eeh_serialize_unlock(flags); - pr_err("EEH: PHB#%x failure detected\n", - phb_pe->phb->global_number); + pr_err("EEH: PHB#%x failure detected, location: %s\n", + phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); dump_stack(); eeh_send_failure_event(phb_pe); @@ -358,10 +358,11 @@ out: int eeh_dev_check_failure(struct eeh_dev *edev) { int ret; + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); unsigned long flags; struct device_node *dn; struct pci_dev *dev; - struct eeh_pe *pe; + struct eeh_pe *pe, *parent_pe, *phb_pe; int rc = 0; const char *location; @@ -439,14 +440,34 @@ int eeh_dev_check_failure(struct eeh_dev *edev) */ if ((ret < 0) || (ret == EEH_STATE_NOT_SUPPORT) || - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { + ((ret & active_flags) == active_flags)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; goto dn_unlock; } + /* + * It should be corner case that the parent PE has been + * put into frozen state as well. We should take care + * that at first. + */ + parent_pe = pe->parent; + while (parent_pe) { + /* Hit the ceiling ? */ + if (parent_pe->type & EEH_PE_PHB) + break; + + /* Frozen parent PE ? */ + ret = eeh_ops->get_state(parent_pe, NULL); + if (ret > 0 && + (ret & active_flags) != active_flags) + pe = parent_pe; + + /* Next parent level */ + parent_pe = parent_pe->parent; + } + eeh_stats.slot_resets++; /* Avoid repeated reports of this failure, including problems @@ -460,8 +481,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * a stack trace will help the device-driver authors figure * out what happened. So print that out. */ - pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", - pe->addr, pe->phb->global_number); + phb_pe = eeh_phb_pe_get(pe->phb); + pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", + pe->phb->global_number, pe->addr); + pr_err("EEH: PE location: %s, PHB location: %s\n", + eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); dump_stack(); eeh_send_failure_event(pe); diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 7100a5b96e70..420da61d4ce0 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -447,8 +447,9 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) * PE reset (for 3 times), we try to clear the frozen state * for 3 times as well. */ -static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { + struct eeh_pe *pe = (struct eeh_pe *)data; int i, rc; for (i = 0; i < 3; i++) { @@ -461,13 +462,24 @@ static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) } /* The PE has been isolated, clear it */ - if (rc) + if (rc) { pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n", __func__, pe->phb->global_number, pe->addr, rc); - else + return (void *)pe; + } + + return NULL; +} + +static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) +{ + void *rc; + + rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); + if (!rc) eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - return rc; + return rc ? -EIO : 0; } /** @@ -758,7 +770,7 @@ static void eeh_handle_special_event(void) eeh_serialize_lock(&flags); /* Purge all events */ - eeh_remove_event(NULL); + eeh_remove_event(NULL, true); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); @@ -777,7 +789,7 @@ static void eeh_handle_special_event(void) eeh_serialize_lock(&flags); /* Purge all events of the PHB */ - eeh_remove_event(pe); + eeh_remove_event(pe, true); if (rc == EEH_NEXT_ERR_DEAD_PHB) eeh_pe_state_mark(pe, EEH_PE_ISOLATED); diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c index 72d748b56c86..4eefb6e34dbb 100644 --- a/arch/powerpc/kernel/eeh_event.c +++ b/arch/powerpc/kernel/eeh_event.c @@ -152,24 +152,33 @@ int eeh_send_failure_event(struct eeh_pe *pe) /** * eeh_remove_event - Remove EEH event from the queue * @pe: Event binding to the PE + * @force: Event will be removed unconditionally * * On PowerNV platform, we might have subsequent coming events * is part of the former one. For that case, those subsequent * coming events are totally duplicated and unnecessary, thus * they should be removed. */ -void eeh_remove_event(struct eeh_pe *pe) +void eeh_remove_event(struct eeh_pe *pe, bool force) { unsigned long flags; struct eeh_event *event, *tmp; + /* + * If we have NULL PE passed in, we have dead IOC + * or we're sure we can report all existing errors + * by the caller. + * + * With "force", the event with associated PE that + * have been isolated, the event won't be removed + * to avoid event lost. + */ spin_lock_irqsave(&eeh_eventlist_lock, flags); list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) { - /* - * If we don't have valid PE passed in, that means - * we already have event corresponding to dead IOC - * and all events should be purged. - */ + if (!force && event->pe && + (event->pe->state & EEH_PE_ISOLATED)) + continue; + if (!pe) { list_del(&event->list); kfree(event); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 995c2a284630..fbd01eba4473 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -792,6 +792,66 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) } /** + * eeh_pe_loc_get - Retrieve location code binding to the given PE + * @pe: EEH PE + * + * Retrieve the location code of the given PE. If the primary PE bus + * is root bus, we will grab location code from PHB device tree node + * or root port. Otherwise, the upstream bridge's device tree node + * of the primary PE bus will be checked for the location code. + */ +const char *eeh_pe_loc_get(struct eeh_pe *pe) +{ + struct pci_controller *hose; + struct pci_bus *bus = eeh_pe_bus_get(pe); + struct pci_dev *pdev; + struct device_node *dn; + const char *loc; + + if (!bus) + return "N/A"; + + /* PHB PE or root PE ? */ + if (pci_is_root_bus(bus)) { + hose = pci_bus_to_host(bus); + loc = of_get_property(hose->dn, + "ibm,loc-code", NULL); + if (loc) + return loc; + loc = of_get_property(hose->dn, + "ibm,io-base-loc-code", NULL); + if (loc) + return loc; + + pdev = pci_get_slot(bus, 0x0); + } else { + pdev = bus->self; + } + + if (!pdev) { + loc = "N/A"; + goto out; + } + + dn = pci_device_to_OF_node(pdev); + if (!dn) { + loc = "N/A"; + goto out; + } + + loc = of_get_property(dn, "ibm,loc-code", NULL); + if (!loc) + loc = of_get_property(dn, "ibm,slot-location-code", NULL); + if (!loc) + loc = "N/A"; + +out: + if (pci_is_root_bus(bus) && pdev) + pci_dev_put(pdev); + return loc; +} + +/** * eeh_pe_bus_get - Retrieve PCI bus according to the given PE * @pe: EEH PE * diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 911d45366f59..6528c5e2cc44 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -428,12 +428,6 @@ BEGIN_FTR_SECTION std r24,THREAD_VRSAVE(r3) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_PPC64 -BEGIN_FTR_SECTION - mfspr r25,SPRN_DSCR - std r25,THREAD_DSCR(r3) -END_FTR_SECTION_IFSET(CPU_FTR_DSCR) -#endif and. r0,r0,r22 beq+ 1f andc r22,r22,r0 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 771b4e92e5d9..bb9cac6c8051 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1467,22 +1467,6 @@ a2_tlbinit_after_linear_map: .globl a2_tlbinit_after_iprot_flush a2_tlbinit_after_iprot_flush: -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP - /* Now establish early debug mappings if applicable */ - /* Restore the MAS0 we used for linear mapping load */ - mtspr SPRN_MAS0,r11 - - lis r3,(MAS1_VALID | MAS1_IPROT)@h - ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT) - mtspr SPRN_MAS1,r3 - LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G) - mtspr SPRN_MAS2,r3 - LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW) - mtspr SPRN_MAS7_MAS3,r3 - /* re-use the MAS8 value from the linear mapping */ - tlbwe -#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ - PPC_TLBILX(0,0,R0) sync isync diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 20f11eb4dff7..a7d36b19221d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -439,9 +439,9 @@ BEGIN_FTR_SECTION * R9 = CR * Original R9 to R13 is saved on PACA_EXMC * - * Switch to mc_emergency stack and handle re-entrancy (though we - * currently don't test for overflow). Save MCE registers srr1, - * srr0, dar and dsisr and then set ME=1 + * Switch to mc_emergency stack and handle re-entrancy (we limit + * the nested MCE upto level 4 to avoid stack overflow). + * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1 * * We use paca->in_mce to check whether this is the first entry or * nested machine check. We increment paca->in_mce to track nested @@ -464,6 +464,9 @@ BEGIN_FTR_SECTION 0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ addi r10,r10,1 /* increment paca->in_mce */ sth r10,PACA_IN_MCE(r13) + /* Limit nested MCE to level 4 to avoid stack overflow */ + cmpwi r10,4 + bgt 2f /* Check if we hit limit of 4 */ std r11,GPR1(r1) /* Save r1 on the stack. */ std r11,0(r1) /* make stack chain pointer */ mfspr r11,SPRN_SRR0 /* Save SRR0 */ @@ -482,10 +485,23 @@ BEGIN_FTR_SECTION ori r11,r11,MSR_RI /* turn on RI bit */ ld r12,PACAKBASE(r13) /* get high part of &label */ LOAD_HANDLER(r12, machine_check_handle_early) - mtspr SPRN_SRR0,r12 +1: mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r11 rfid b . /* prevent speculative execution */ +2: + /* Stack overflow. Stay on emergency stack and panic. + * Keep the ME bit off while panic-ing, so that if we hit + * another machine check we checkstop. + */ + addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */ + ld r11,PACAKMSR(r13) + ld r12,PACAKBASE(r13) + LOAD_HANDLER(r12, unrecover_mce) + li r10,MSR_ME + andc r11,r11,r10 /* Turn off MSR_ME */ + b 1b + b . /* prevent speculative execution */ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) machine_check_pSeries: @@ -1389,6 +1405,7 @@ machine_check_handle_early: bl save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early + std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) #ifdef CONFIG_PPC_P7_NAP /* @@ -1443,11 +1460,33 @@ machine_check_handle_early: */ andi. r11,r12,MSR_RI bne 2f -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b 1b +1: mfspr r11,SPRN_SRR0 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10,unrecover_mce) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + /* + * We are going down. But there are chances that we might get hit by + * another MCE during panic path and we may run into unstable state + * with no way out. Hence, turn ME bit off while going down, so that + * when another MCE is hit during panic path, system will checkstop + * and hypervisor will get restarted cleanly by SP. + */ + li r3,MSR_ME + andc r10,r10,r3 /* Turn off MSR_ME */ + mtspr SPRN_SRR1,r10 + rfid + b . 2: /* + * Check if we have successfully handled/recovered from error, if not + * then stay on emergency stack and panic. + */ + ld r3,RESULT(r1) /* Load result */ + cmpdi r3,0 /* see if we handled MCE successfully */ + + beq 1b /* if !handled then panic */ + /* * Return from MC interrupt. * Queue up the MCE event so that we can log it later, while * returning from kernel or opal call. @@ -1460,6 +1499,17 @@ machine_check_handle_early: MACHINE_CHECK_HANDLER_WINDUP b machine_check_pSeries +unrecover_mce: + /* Invoke machine_check_exception to print MCE event and panic. */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + /* + * We will not reach here. Even if we did, there is no way out. Call + * unrecoverable_exception and die. + */ +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b 1b /* * r13 points to the PACA, r9 contains the saved CR, * r12 contain the saved SRR1, SRR0 is still ready for return diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 67ee0d6c1070..7d7d8635227a 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -930,25 +930,6 @@ initial_mmu: tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ -#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE) - - /* Load a TLB entry for the UART, so that ppc4xx_progress() can use - * the UARTs nice and early. We use a 4k real==virtual mapping. */ - - lis r3,SERIAL_DEBUG_IO_BASE@h - ori r3,r3,SERIAL_DEBUG_IO_BASE@l - mr r4,r3 - clrrwi r4,r4,12 - ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G) - - clrrwi r3,r3,12 - ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K)) - - li r0,0 /* TLB slot 0 */ - tlbwe r4,r0,TLB_DATA - tlbwe r3,r0,TLB_TAG -#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */ - isync /* Establish the exception vector base diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8a1edbe26b8f..be99774d3f44 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -755,15 +755,15 @@ struct task_struct *__switch_to(struct task_struct *prev, WARN_ON(!irqs_disabled()); - /* Back up the TAR across context switches. + /* Back up the TAR and DSCR across context switches. * Note that the TAR is not available for use in the kernel. (To * provide this, the TAR should be backed up/restored on exception * entry/exit instead, and be in pt_regs. FIXME, this should be in * pt_regs anyway (for debug).) - * Save the TAR here before we do treclaim/trecheckpoint as these - * will change the TAR. + * Save the TAR and DSCR here before we do treclaim/trecheckpoint as + * these will change them. */ - save_tar(&prev->thread); + save_early_sprs(&prev->thread); __switch_to_tm(prev); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index d4d418376f99..e239df3768ac 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -471,7 +471,7 @@ void __init smp_setup_cpu_maps(void) for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, be32_to_cpu(intserv[j])); - set_cpu_present(cpu, true); + set_cpu_present(cpu, of_device_is_available(dn)); set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); cpu++; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 7753af2d2613..51a3ff78838a 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -749,7 +749,7 @@ int setup_profiling_timer(unsigned int multiplier) /* cpumask of CPUs with asymetric SMT dependancy */ static const int powerpc_smt_flags(void) { - int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES; + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 7e711bdcc6da..9fff9cdcc519 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -551,7 +551,7 @@ void timer_interrupt(struct pt_regs * regs) may_hard_irq_enable(); -#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) +#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1bd7ca298fa1..239f1cde3fff 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -295,6 +295,8 @@ long machine_check_early(struct pt_regs *regs) { long handled = 0; + __get_cpu_var(irq_stat).mce_exceptions++; + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index a15837519dca..b7aa07279a63 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -62,8 +62,6 @@ void __init udbg_early_init(void) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) udbg_init_usbgecko(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) - udbg_init_wsp(); #elif defined(CONFIG_PPC_EARLY_DEBUG_MEMCONS) /* In memory console */ udbg_init_memcons(); diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 75702e207b29..6e7c4923b5ea 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -296,14 +296,3 @@ void __init udbg_init_40x_realmode(void) } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ - - -#ifdef CONFIG_PPC_EARLY_DEBUG_WSP - -void __init udbg_init_wsp(void) -{ - udbg_uart_init_mmio((void *)WSP_UART_VIRT, 1); - udbg_uart_setup(57600, 50000000); -} - -#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 768a9f977c00..3a5c568b1e89 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -113,10 +113,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) * We assume that if the condition is recovered then linux host * will have generated an error log event that we will pick * up and log later. - * Don't release mce event now. In case if condition is not - * recovered we do guest exit and go back to linux host machine - * check handler. Hence we need make sure that current mce event - * is available for linux host to consume. + * Don't release mce event now. We will queue up the event so that + * we can log the MCE event info on host console. */ if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) goto out; @@ -128,11 +126,12 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) out: /* - * If we have handled the error, then release the mce event because - * we will be delivering machine check to guest. + * We are now going enter guest either through machine check + * interrupt (for unhandled errors) or will continue from + * current HSRR0 (for handled errors) in guest. Hence + * queue up the event so that we can log it from host console later. */ - if (handled) - release_mce_event(); + machine_check_queue_event(); return handled; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 77356fd25ccc..868347ef09fd 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2257,15 +2257,28 @@ machine_check_realmode: mr r3, r9 /* get vcpu pointer */ bl kvmppc_realmode_machine_check nop - cmpdi r3, 0 /* continue exiting from guest? */ + cmpdi r3, 0 /* Did we handle MCE ? */ ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK - beq mc_cont + /* + * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through + * machine check interrupt (set HSRR0 to 0x200). And for handled + * errors (no-fatal), just go back to guest execution with current + * HSRR0 instead of exiting guest. This new approach will inject + * machine check to guest for fatal error causing guest to crash. + * + * The old code used to return to host for unhandled errors which + * was causing guest to hang with soft lockups inside guest and + * makes it difficult to recover guest instance. + */ + ld r10, VCPU_PC(r9) + ld r11, VCPU_MSR(r9) + bne 2f /* Continue guest execution. */ /* If not, deliver a machine check. SRR0/1 are already set */ li r10, BOOK3S_INTERRUPT_MACHINE_CHECK ld r11, VCPU_MSR(r9) bl kvmppc_msr_interrupt - b fast_interrupt_c_return +2: b fast_interrupt_c_return /* * Check the reason we woke from nap, and take appropriate action. diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c0511c27a733..412dd46dd0b7 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1470,7 +1470,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) regs->gpr[rd] = byterev_4(val); goto ldst_done; -#ifdef CONFIG_PPC_CPU +#ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index bf9c6d4cd26c..391b3f6b54a3 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -19,7 +19,6 @@ source "arch/powerpc/platforms/embedded6xx/Kconfig" source "arch/powerpc/platforms/44x/Kconfig" source "arch/powerpc/platforms/40x/Kconfig" source "arch/powerpc/platforms/amigaone/Kconfig" -source "arch/powerpc/platforms/wsp/Kconfig" config KVM_GUEST bool "KVM Guest support" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 43b65ad1970a..a41bd023647a 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -148,10 +148,6 @@ config POWER4 depends on PPC64 && PPC_BOOK3S def_bool y -config PPC_A2 - bool - depends on PPC_BOOK3E_64 - config TUNE_CELL bool "Optimize for Cell Broadband Engine" depends on PPC64 && PPC_BOOK3S @@ -280,7 +276,7 @@ config VSX config PPC_ICSWX bool "Support for PowerPC icswx coprocessor instruction" - depends on POWER4 || PPC_A2 + depends on POWER4 default n ---help--- diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 879b4a448498..469ef170d218 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -22,4 +22,3 @@ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ obj-$(CONFIG_AMIGAONE) += amigaone/ -obj-$(CONFIG_PPC_WSP) += wsp/ diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 0ba3c9598358..bcfd6f063efa 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -35,7 +35,6 @@ #define SPUFS_PS_MAP_SIZE 0x20000 #define SPUFS_MFC_MAP_SIZE 0x1000 #define SPUFS_CNTL_MAP_SIZE 0x1000 -#define SPUFS_CNTL_MAP_SIZE 0x1000 #define SPUFS_SIGNAL_MAP_SIZE PAGE_SIZE #define SPUFS_MSS_MAP_SIZE 0x1000 diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index c252ee95bddf..45a8ed0585cd 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -17,6 +17,7 @@ config PPC_POWERNV select CPU_FREQ_GOV_USERSPACE select CPU_FREQ_GOV_ONDEMAND select CPU_FREQ_GOV_CONSERVATIVE + select PPC_DOORBELL default y config PPC_POWERNV_RTAS diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 4ad0d345bc96..d55891f89a2c 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,9 +1,9 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o subcore.o subcore-asm.o +obj-y += opal-msglog.o -obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 753f08e36dfa..8ad0c5b891f4 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -267,7 +267,7 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) { s64 ret = 0; u8 fstate; - u16 pcierr; + __be16 pcierr; u32 pe_no; int result; struct pci_controller *hose = pe->phb; @@ -316,7 +316,7 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) result = 0; result &= ~EEH_STATE_RESET_ACTIVE; - if (pcierr != OPAL_EEH_PHB_ERROR) { + if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { result |= EEH_STATE_MMIO_ACTIVE; result |= EEH_STATE_DMA_ACTIVE; result |= EEH_STATE_MMIO_ENABLED; @@ -705,18 +705,19 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) { struct pci_controller *hose; struct pnv_phb *phb; - struct eeh_pe *phb_pe; - u64 frozen_pe_no; - u16 err_type, severity; + struct eeh_pe *phb_pe, *parent_pe; + __be64 frozen_pe_no; + __be16 err_type, severity; + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); long rc; - int ret = EEH_NEXT_ERR_NONE; + int state, ret = EEH_NEXT_ERR_NONE; /* * While running here, it's safe to purge the event queue. * And we should keep the cached OPAL notifier event sychronized * between the kernel and firmware. */ - eeh_remove_event(NULL); + eeh_remove_event(NULL, false); opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); list_for_each_entry(hose, &hose_list, list_node) { @@ -742,8 +743,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) } /* If the PHB doesn't have error, stop processing */ - if (err_type == OPAL_EEH_NO_ERROR || - severity == OPAL_EEH_SEV_NO_ERROR) { + if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR || + be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) { pr_devel("%s: No error found on PHB#%x\n", __func__, hose->global_number); continue; @@ -755,14 +756,14 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) * specific PHB. */ pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", - __func__, err_type, severity, - frozen_pe_no, hose->global_number); - switch (err_type) { + __func__, be16_to_cpu(err_type), be16_to_cpu(severity), + be64_to_cpu(frozen_pe_no), hose->global_number); + switch (be16_to_cpu(err_type)) { case OPAL_EEH_IOC_ERROR: - if (severity == OPAL_EEH_SEV_IOC_DEAD) { + if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) { pr_err("EEH: dead IOC detected\n"); ret = EEH_NEXT_ERR_DEAD_IOC; - } else if (severity == OPAL_EEH_SEV_INF) { + } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { pr_info("EEH: IOC informative error " "detected\n"); ioda_eeh_hub_diag(hose); @@ -771,20 +772,26 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) break; case OPAL_EEH_PHB_ERROR: - if (severity == OPAL_EEH_SEV_PHB_DEAD) { + if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) { *pe = phb_pe; - pr_err("EEH: dead PHB#%x detected\n", - hose->global_number); + pr_err("EEH: dead PHB#%x detected, " + "location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_DEAD_PHB; - } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { + } else if (be16_to_cpu(severity) == + OPAL_EEH_SEV_PHB_FENCED) { *pe = phb_pe; - pr_err("EEH: fenced PHB#%x detected\n", - hose->global_number); + pr_err("EEH: Fenced PHB#%x detected, " + "location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_FENCED_PHB; - } else if (severity == OPAL_EEH_SEV_INF) { + } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { pr_info("EEH: PHB#%x informative error " - "detected\n", - hose->global_number); + "detected, location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); ioda_eeh_phb_diag(hose); ret = EEH_NEXT_ERR_NONE; } @@ -792,34 +799,33 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) break; case OPAL_EEH_PE_ERROR: /* - * If we can't find the corresponding PE, the - * PEEV / PEST would be messy. So we force an - * fenced PHB so that it can be recovered. - * - * If the PE has been marked as isolated, that - * should have been removed permanently or in - * progress with recovery. We needn't report - * it again. + * If we can't find the corresponding PE, we + * just try to unfreeze. */ - if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) { - *pe = phb_pe; - pr_err("EEH: Escalated fenced PHB#%x " - "detected for PE#%llx\n", - hose->global_number, - frozen_pe_no); - ret = EEH_NEXT_ERR_FENCED_PHB; + if (ioda_eeh_get_pe(hose, + be64_to_cpu(frozen_pe_no), pe)) { + /* Try best to clear it */ + pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", + hose->global_number, frozen_pe_no); + pr_info("EEH: PHB location: %s\n", + eeh_pe_loc_get(phb_pe)); + opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + ret = EEH_NEXT_ERR_NONE; } else if ((*pe)->state & EEH_PE_ISOLATED) { ret = EEH_NEXT_ERR_NONE; } else { pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", (*pe)->addr, (*pe)->phb->global_number); + pr_err("EEH: PE location: %s, PHB location: %s\n", + eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe)); ret = EEH_NEXT_ERR_FROZEN_PE; } break; default: pr_warn("%s: Unexpected error type %d\n", - __func__, err_type); + __func__, be16_to_cpu(err_type)); } /* @@ -837,6 +843,31 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) } /* + * We probably have the frozen parent PE out there and + * we need have to handle frozen parent PE firstly. + */ + if (ret == EEH_NEXT_ERR_FROZEN_PE) { + parent_pe = (*pe)->parent; + while (parent_pe) { + /* Hit the ceiling ? */ + if (parent_pe->type & EEH_PE_PHB) + break; + + /* Frozen parent PE ? */ + state = ioda_eeh_get_state(parent_pe); + if (state > 0 && + (state & active_flags) != active_flags) + *pe = parent_pe; + + /* Next parent level */ + parent_pe = parent_pe->parent; + } + + /* We possibly migrate to another PE */ + eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); + } + + /* * If we have no errors on the specific PHB or only * informative error there, we continue poking it. * Otherwise, we need actions to be taken by upper diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 1bb25b952504..44ed78af1a0d 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -37,7 +37,8 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, { struct memcons *mc = bin_attr->private; const char *conbuf; - size_t ret, first_read = 0; + ssize_t ret; + size_t first_read = 0; uint32_t out_pos, avail; if (!mc) @@ -69,6 +70,9 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, to += first_read; count -= first_read; pos -= avail; + + if (count <= 0) + goto out; } /* Sanity check. The firmware should not do this to us. */ diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index d202f9bc3683..9d1acf22a099 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -260,10 +260,10 @@ void __init opal_sys_param_init(void) attr[i].kobj_attr.attr.mode = S_IRUGO; break; case OPAL_SYSPARAM_WRITE: - attr[i].kobj_attr.attr.mode = S_IWUGO; + attr[i].kobj_attr.attr.mode = S_IWUSR; break; case OPAL_SYSPARAM_RW: - attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUGO; + attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUSR; break; default: break; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index eefbfcc3fd8c..f91a4e5d872e 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -206,72 +206,91 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose, data = (struct OpalIoPhb3ErrorData*)common; pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n", - hose->global_number, common->version); + hose->global_number, be32_to_cpu(common->version)); if (data->brdgCtl) pr_info("brdgCtl: %08x\n", - data->brdgCtl); + be32_to_cpu(data->brdgCtl)); if (data->portStatusReg || data->rootCmplxStatus || data->busAgentStatus) pr_info("UtlSts: %08x %08x %08x\n", - data->portStatusReg, data->rootCmplxStatus, - data->busAgentStatus); + be32_to_cpu(data->portStatusReg), + be32_to_cpu(data->rootCmplxStatus), + be32_to_cpu(data->busAgentStatus)); if (data->deviceStatus || data->slotStatus || data->linkStatus || data->devCmdStatus || data->devSecStatus) pr_info("RootSts: %08x %08x %08x %08x %08x\n", - data->deviceStatus, data->slotStatus, - data->linkStatus, data->devCmdStatus, - data->devSecStatus); + be32_to_cpu(data->deviceStatus), + be32_to_cpu(data->slotStatus), + be32_to_cpu(data->linkStatus), + be32_to_cpu(data->devCmdStatus), + be32_to_cpu(data->devSecStatus)); if (data->rootErrorStatus || data->uncorrErrorStatus || data->corrErrorStatus) pr_info("RootErrSts: %08x %08x %08x\n", - data->rootErrorStatus, data->uncorrErrorStatus, - data->corrErrorStatus); + be32_to_cpu(data->rootErrorStatus), + be32_to_cpu(data->uncorrErrorStatus), + be32_to_cpu(data->corrErrorStatus)); if (data->tlpHdr1 || data->tlpHdr2 || data->tlpHdr3 || data->tlpHdr4) pr_info("RootErrLog: %08x %08x %08x %08x\n", - data->tlpHdr1, data->tlpHdr2, - data->tlpHdr3, data->tlpHdr4); + be32_to_cpu(data->tlpHdr1), + be32_to_cpu(data->tlpHdr2), + be32_to_cpu(data->tlpHdr3), + be32_to_cpu(data->tlpHdr4)); if (data->sourceId || data->errorClass || data->correlator) pr_info("RootErrLog1: %08x %016llx %016llx\n", - data->sourceId, data->errorClass, - data->correlator); + be32_to_cpu(data->sourceId), + be64_to_cpu(data->errorClass), + be64_to_cpu(data->correlator)); if (data->nFir) pr_info("nFir: %016llx %016llx %016llx\n", - data->nFir, data->nFirMask, - data->nFirWOF); + be64_to_cpu(data->nFir), + be64_to_cpu(data->nFirMask), + be64_to_cpu(data->nFirWOF)); if (data->phbPlssr || data->phbCsr) pr_info("PhbSts: %016llx %016llx\n", - data->phbPlssr, data->phbCsr); + be64_to_cpu(data->phbPlssr), + be64_to_cpu(data->phbCsr)); if (data->lemFir) pr_info("Lem: %016llx %016llx %016llx\n", - data->lemFir, data->lemErrorMask, - data->lemWOF); + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrorMask), + be64_to_cpu(data->lemWOF)); if (data->phbErrorStatus) pr_info("PhbErr: %016llx %016llx %016llx %016llx\n", - data->phbErrorStatus, data->phbFirstErrorStatus, - data->phbErrorLog0, data->phbErrorLog1); + be64_to_cpu(data->phbErrorStatus), + be64_to_cpu(data->phbFirstErrorStatus), + be64_to_cpu(data->phbErrorLog0), + be64_to_cpu(data->phbErrorLog1)); if (data->mmioErrorStatus) pr_info("OutErr: %016llx %016llx %016llx %016llx\n", - data->mmioErrorStatus, data->mmioFirstErrorStatus, - data->mmioErrorLog0, data->mmioErrorLog1); + be64_to_cpu(data->mmioErrorStatus), + be64_to_cpu(data->mmioFirstErrorStatus), + be64_to_cpu(data->mmioErrorLog0), + be64_to_cpu(data->mmioErrorLog1)); if (data->dma0ErrorStatus) pr_info("InAErr: %016llx %016llx %016llx %016llx\n", - data->dma0ErrorStatus, data->dma0FirstErrorStatus, - data->dma0ErrorLog0, data->dma0ErrorLog1); + be64_to_cpu(data->dma0ErrorStatus), + be64_to_cpu(data->dma0FirstErrorStatus), + be64_to_cpu(data->dma0ErrorLog0), + be64_to_cpu(data->dma0ErrorLog1)); if (data->dma1ErrorStatus) pr_info("InBErr: %016llx %016llx %016llx %016llx\n", - data->dma1ErrorStatus, data->dma1FirstErrorStatus, - data->dma1ErrorLog0, data->dma1ErrorLog1); + be64_to_cpu(data->dma1ErrorStatus), + be64_to_cpu(data->dma1FirstErrorStatus), + be64_to_cpu(data->dma1ErrorLog0), + be64_to_cpu(data->dma1ErrorLog1)); for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) { - if ((data->pestA[i] >> 63) == 0 && - (data->pestB[i] >> 63) == 0) + if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 && + (be64_to_cpu(data->pestB[i]) >> 63) == 0) continue; pr_info("PE[%3d] A/B: %016llx %016llx\n", - i, data->pestA[i], data->pestB[i]); + i, be64_to_cpu(data->pestA[i]), + be64_to_cpu(data->pestB[i])); } } @@ -284,7 +303,7 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, return; common = (struct OpalIoPhbErrorCommon *)log_buff; - switch (common->ioType) { + switch (be32_to_cpu(common->ioType)) { case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: pnv_pci_dump_p7ioc_diag_data(hose, common); break; @@ -293,7 +312,7 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, break; default: pr_warn("%s: Unrecognized ioType %d\n", - __func__, common->ioType); + __func__, be32_to_cpu(common->ioType)); } } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 8c16a5f96728..d9b88fa7c5a3 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -35,11 +35,14 @@ #include <asm/rtas.h> #include <asm/opal.h> #include <asm/kexec.h> +#include <asm/smp.h> #include "powernv.h" static void __init pnv_setup_arch(void) { + set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 0062a43a2e0d..5fcfcf44e3a9 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -32,6 +32,7 @@ #include <asm/opal.h> #include <asm/runlatch.h> #include <asm/code-patching.h> +#include <asm/dbell.h> #include "powernv.h" @@ -46,6 +47,11 @@ static void pnv_smp_setup_cpu(int cpu) { if (cpu != boot_cpuid) xics_setup_cpu(); + +#ifdef CONFIG_PPC_DOORBELL + if (cpu_has_feature(CPU_FTR_DBELL)) + doorbell_setup_this_cpu(); +#endif } int pnv_smp_kick_cpu(int nr) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 2cb8b776c84a..756b482f819a 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -21,6 +21,7 @@ config PPC_PSERIES select HAVE_CONTEXT_TRACKING select HOTPLUG_CPU if SMP select ARCH_RANDOM + select PPC_DOORBELL default y config PPC_SPLPAR diff --git a/arch/powerpc/platforms/wsp/Kconfig b/arch/powerpc/platforms/wsp/Kconfig deleted file mode 100644 index 422a175b10ee..000000000000 --- a/arch/powerpc/platforms/wsp/Kconfig +++ /dev/null @@ -1,30 +0,0 @@ -config PPC_WSP - bool - select PPC_A2 - select GENERIC_TBSYNC - select PPC_ICSWX - select PPC_SCOM - select PPC_XICS - select PPC_ICP_NATIVE - select PCI - select PPC_IO_WORKAROUNDS if PCI - select PPC_INDIRECT_PIO if PCI - default n - -menu "WSP platform selection" - depends on PPC_BOOK3E_64 - -config PPC_PSR2 - bool "PowerEN System Reference Platform 2" - select EPAPR_BOOT - select PPC_WSP - default y - -config PPC_CHROMA - bool "PowerEN PCIe Chroma Card" - select EPAPR_BOOT - select PPC_WSP - select OF_DYNAMIC - default y - -endmenu diff --git a/arch/powerpc/platforms/wsp/Makefile b/arch/powerpc/platforms/wsp/Makefile deleted file mode 100644 index 162fc60125a2..000000000000 --- a/arch/powerpc/platforms/wsp/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -ccflags-y += $(NO_MINIMAL_TOC) - -obj-y += setup.o ics.o wsp.o -obj-$(CONFIG_PPC_PSR2) += psr2.o -obj-$(CONFIG_PPC_CHROMA) += chroma.o h8.o -obj-$(CONFIG_PPC_WSP) += opb_pic.o -obj-$(CONFIG_PPC_WSP) += scom_wsp.o -obj-$(CONFIG_SMP) += smp.o scom_smp.o -obj-$(CONFIG_PCI) += wsp_pci.o -obj-$(CONFIG_PCI_MSI) += msi.o diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c deleted file mode 100644 index aaa46b353715..000000000000 --- a/arch/powerpc/platforms/wsp/chroma.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/of.h> -#include <linux/smp.h> -#include <linux/time.h> -#include <linux/of_fdt.h> - -#include <asm/machdep.h> -#include <asm/udbg.h> - -#include "ics.h" -#include "wsp.h" - -void __init chroma_setup_arch(void) -{ - wsp_setup_arch(); - wsp_setup_h8(); - -} - -static int __init chroma_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "ibm,wsp-chroma")) - return 0; - - return 1; -} - -define_machine(chroma_md) { - .name = "Chroma PCIe", - .probe = chroma_probe, - .setup_arch = chroma_setup_arch, - .restart = wsp_h8_restart, - .power_off = wsp_h8_power_off, - .halt = wsp_halt, - .calibrate_decr = generic_calibrate_decr, - .init_IRQ = wsp_setup_irq, - .progress = udbg_progress, - .power_save = book3e_idle, -}; - -machine_arch_initcall(chroma_md, wsp_probe_devices); diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c deleted file mode 100644 index a3c87f395750..000000000000 --- a/arch/powerpc/platforms/wsp/h8.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/io.h> -#include <linux/of_address.h> - -#include "wsp.h" - -/* - * The UART connection to the H8 is over ttyS1 which is just a 16550. - * We assume that FW has it setup right and no one messes with it. - */ - - -static u8 __iomem *h8; - -#define RBR 0 /* Receiver Buffer Register */ -#define THR 0 /* Transmitter Holding Register */ -#define LSR 5 /* Line Status Register */ -#define LSR_DR 0x01 /* LSR value for Data-Ready */ -#define LSR_THRE 0x20 /* LSR value for Transmitter-Holding-Register-Empty */ -static void wsp_h8_putc(int c) -{ - u8 lsr; - - do { - lsr = readb(h8 + LSR); - } while ((lsr & LSR_THRE) != LSR_THRE); - writeb(c, h8 + THR); -} - -static int wsp_h8_getc(void) -{ - u8 lsr; - - do { - lsr = readb(h8 + LSR); - } while ((lsr & LSR_DR) != LSR_DR); - - return readb(h8 + RBR); -} - -static void wsp_h8_puts(const char *s, int sz) -{ - int i; - - for (i = 0; i < sz; i++) { - wsp_h8_putc(s[i]); - - /* no flow control so wait for echo */ - wsp_h8_getc(); - } - wsp_h8_putc('\r'); - wsp_h8_putc('\n'); -} - -static void wsp_h8_terminal_cmd(const char *cmd, int sz) -{ - hard_irq_disable(); - wsp_h8_puts(cmd, sz); - /* should never return, but just in case */ - for (;;) - continue; -} - - -void wsp_h8_restart(char *cmd) -{ - static const char restart[] = "warm-reset"; - - (void)cmd; - wsp_h8_terminal_cmd(restart, sizeof(restart) - 1); -} - -void wsp_h8_power_off(void) -{ - static const char off[] = "power-off"; - - wsp_h8_terminal_cmd(off, sizeof(off) - 1); -} - -static void __iomem *wsp_h8_getaddr(void) -{ - struct device_node *aliases; - struct device_node *uart; - struct property *path; - void __iomem *va = NULL; - - /* - * there is nothing in the devtree to tell us which is mapped - * to the H8, but se know it is the second serial port. - */ - - aliases = of_find_node_by_path("/aliases"); - if (aliases == NULL) - return NULL; - - path = of_find_property(aliases, "serial1", NULL); - if (path == NULL) - goto out; - - uart = of_find_node_by_path(path->value); - if (uart == NULL) - goto out; - - va = of_iomap(uart, 0); - - /* remove it so no one messes with it */ - of_detach_node(uart); - of_node_put(uart); - -out: - of_node_put(aliases); - - return va; -} - -void __init wsp_setup_h8(void) -{ - h8 = wsp_h8_getaddr(); - - /* Devtree change? lets hard map it anyway */ - if (h8 == NULL) { - pr_warn("UART to H8 could not be found"); - h8 = ioremap(0xffc0008000ULL, 0x100); - } -} diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c deleted file mode 100644 index 9cd92e645028..000000000000 --- a/arch/powerpc/platforms/wsp/ics.c +++ /dev/null @@ -1,762 +0,0 @@ -/* - * Copyright 2008-2011 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/cpu.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/msi.h> -#include <linux/of.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> - -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/xics.h> - -#include "wsp.h" -#include "ics.h" - - -/* WSP ICS */ - -struct wsp_ics { - struct ics ics; - struct device_node *dn; - void __iomem *regs; - spinlock_t lock; - unsigned long *bitmap; - u32 chip_id; - u32 lsi_base; - u32 lsi_count; - u64 hwirq_start; - u64 count; -#ifdef CONFIG_SMP - int *hwirq_cpu_map; -#endif -}; - -#define to_wsp_ics(ics) container_of(ics, struct wsp_ics, ics) - -#define INT_SRC_LAYER_BUID_REG(base) ((base) + 0x00) -#define IODA_TBL_ADDR_REG(base) ((base) + 0x18) -#define IODA_TBL_DATA_REG(base) ((base) + 0x20) -#define XIVE_UPDATE_REG(base) ((base) + 0x28) -#define ICS_INT_CAPS_REG(base) ((base) + 0x30) - -#define TBL_AUTO_INCREMENT ((1UL << 63) | (1UL << 15)) -#define TBL_SELECT_XIST (1UL << 48) -#define TBL_SELECT_XIVT (1UL << 49) - -#define IODA_IRQ(irq) ((irq) & (0x7FFULL)) /* HRM 5.1.3.4 */ - -#define XIST_REQUIRED 0x8 -#define XIST_REJECTED 0x4 -#define XIST_PRESENTED 0x2 -#define XIST_PENDING 0x1 - -#define XIVE_SERVER_SHIFT 42 -#define XIVE_SERVER_MASK 0xFFFFULL -#define XIVE_PRIORITY_MASK 0xFFULL -#define XIVE_PRIORITY_SHIFT 32 -#define XIVE_WRITE_ENABLE (1ULL << 63) - -/* - * The docs refer to a 6 bit field called ChipID, which consists of a - * 3 bit NodeID and a 3 bit ChipID. On WSP the ChipID is always zero - * so we ignore it, and every where we use "chip id" in this code we - * mean the NodeID. - */ -#define WSP_ICS_CHIP_SHIFT 17 - - -static struct wsp_ics *ics_list; -static int num_ics; - -/* ICS Source controller accessors */ - -static u64 wsp_ics_get_xive(struct wsp_ics *ics, unsigned int irq) -{ - unsigned long flags; - u64 xive; - - spin_lock_irqsave(&ics->lock, flags); - out_be64(IODA_TBL_ADDR_REG(ics->regs), TBL_SELECT_XIVT | IODA_IRQ(irq)); - xive = in_be64(IODA_TBL_DATA_REG(ics->regs)); - spin_unlock_irqrestore(&ics->lock, flags); - - return xive; -} - -static void wsp_ics_set_xive(struct wsp_ics *ics, unsigned int irq, u64 xive) -{ - xive &= ~XIVE_ADDR_MASK; - xive |= (irq & XIVE_ADDR_MASK); - xive |= XIVE_WRITE_ENABLE; - - out_be64(XIVE_UPDATE_REG(ics->regs), xive); -} - -static u64 xive_set_server(u64 xive, unsigned int server) -{ - u64 mask = ~(XIVE_SERVER_MASK << XIVE_SERVER_SHIFT); - - xive &= mask; - xive |= (server & XIVE_SERVER_MASK) << XIVE_SERVER_SHIFT; - - return xive; -} - -static u64 xive_set_priority(u64 xive, unsigned int priority) -{ - u64 mask = ~(XIVE_PRIORITY_MASK << XIVE_PRIORITY_SHIFT); - - xive &= mask; - xive |= (priority & XIVE_PRIORITY_MASK) << XIVE_PRIORITY_SHIFT; - - return xive; -} - - -#ifdef CONFIG_SMP -/* Find logical CPUs within mask on a given chip and store result in ret */ -void cpus_on_chip(int chip_id, cpumask_t *mask, cpumask_t *ret) -{ - int cpu, chip; - struct device_node *cpu_dn, *dn; - const u32 *prop; - - cpumask_clear(ret); - for_each_cpu(cpu, mask) { - cpu_dn = of_get_cpu_node(cpu, NULL); - if (!cpu_dn) - continue; - - prop = of_get_property(cpu_dn, "at-node", NULL); - if (!prop) { - of_node_put(cpu_dn); - continue; - } - - dn = of_find_node_by_phandle(*prop); - of_node_put(cpu_dn); - - chip = wsp_get_chip_id(dn); - if (chip == chip_id) - cpumask_set_cpu(cpu, ret); - - of_node_put(dn); - } -} - -/* Store a suitable CPU to handle a hwirq in the ics->hwirq_cpu_map cache */ -static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq, - const cpumask_t *affinity) -{ - cpumask_var_t avail, newmask; - int ret = -ENOMEM, cpu, cpu_rover = 0, target; - int index = hwirq - ics->hwirq_start; - unsigned int nodeid; - - BUG_ON(index < 0 || index >= ics->count); - - if (!ics->hwirq_cpu_map) - return -ENOMEM; - - if (!distribute_irqs) { - ics->hwirq_cpu_map[hwirq - ics->hwirq_start] = xics_default_server; - return 0; - } - - /* Allocate needed CPU masks */ - if (!alloc_cpumask_var(&avail, GFP_KERNEL)) - goto ret; - if (!alloc_cpumask_var(&newmask, GFP_KERNEL)) - goto freeavail; - - /* Find PBus attached to the source of this IRQ */ - nodeid = (hwirq >> WSP_ICS_CHIP_SHIFT) & 0x3; /* 12:14 */ - - /* Find CPUs that could handle this IRQ */ - if (affinity) - cpumask_and(avail, cpu_online_mask, affinity); - else - cpumask_copy(avail, cpu_online_mask); - - /* Narrow selection down to logical CPUs on the same chip */ - cpus_on_chip(nodeid, avail, newmask); - - /* Ensure we haven't narrowed it down to 0 */ - if (unlikely(cpumask_empty(newmask))) { - if (unlikely(cpumask_empty(avail))) { - ret = -1; - goto out; - } - cpumask_copy(newmask, avail); - } - - /* Choose a CPU out of those we narrowed it down to in round robin */ - target = hwirq % cpumask_weight(newmask); - for_each_cpu(cpu, newmask) { - if (cpu_rover++ >= target) { - ics->hwirq_cpu_map[index] = get_hard_smp_processor_id(cpu); - ret = 0; - goto out; - } - } - - /* Shouldn't happen */ - WARN_ON(1); - -out: - free_cpumask_var(newmask); -freeavail: - free_cpumask_var(avail); -ret: - if (ret < 0) { - ics->hwirq_cpu_map[index] = cpumask_first(cpu_online_mask); - pr_warning("Error, falling hwirq 0x%x routing back to CPU %i\n", - hwirq, ics->hwirq_cpu_map[index]); - } - return ret; -} - -static void alloc_irq_map(struct wsp_ics *ics) -{ - int i; - - ics->hwirq_cpu_map = kmalloc(sizeof(int) * ics->count, GFP_KERNEL); - if (!ics->hwirq_cpu_map) { - pr_warning("Allocate hwirq_cpu_map failed, " - "IRQ balancing disabled\n"); - return; - } - - for (i=0; i < ics->count; i++) - ics->hwirq_cpu_map[i] = xics_default_server; -} - -static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq) -{ - int index = hwirq - ics->hwirq_start; - - BUG_ON(index < 0 || index >= ics->count); - - if (!ics->hwirq_cpu_map) - return xics_default_server; - - return ics->hwirq_cpu_map[index]; -} -#else /* !CONFIG_SMP */ -static int cache_hwirq_map(struct wsp_ics *ics, unsigned int hwirq, - const cpumask_t *affinity) -{ - return 0; -} - -static int get_irq_server(struct wsp_ics *ics, unsigned int hwirq) -{ - return xics_default_server; -} - -static void alloc_irq_map(struct wsp_ics *ics) { } -#endif - -static void wsp_chip_unmask_irq(struct irq_data *d) -{ - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct wsp_ics *ics; - int server; - u64 xive; - - if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) - return; - - ics = d->chip_data; - if (WARN_ON(!ics)) - return; - - server = get_irq_server(ics, hw_irq); - - xive = wsp_ics_get_xive(ics, hw_irq); - xive = xive_set_server(xive, server); - xive = xive_set_priority(xive, DEFAULT_PRIORITY); - wsp_ics_set_xive(ics, hw_irq, xive); -} - -static unsigned int wsp_chip_startup(struct irq_data *d) -{ - /* unmask it */ - wsp_chip_unmask_irq(d); - return 0; -} - -static void wsp_mask_real_irq(unsigned int hw_irq, struct wsp_ics *ics) -{ - u64 xive; - - if (hw_irq == XICS_IPI) - return; - - if (WARN_ON(!ics)) - return; - xive = wsp_ics_get_xive(ics, hw_irq); - xive = xive_set_server(xive, xics_default_server); - xive = xive_set_priority(xive, LOWEST_PRIORITY); - wsp_ics_set_xive(ics, hw_irq, xive); -} - -static void wsp_chip_mask_irq(struct irq_data *d) -{ - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct wsp_ics *ics = d->chip_data; - - if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) - return; - - wsp_mask_real_irq(hw_irq, ics); -} - -static int wsp_chip_set_affinity(struct irq_data *d, - const struct cpumask *cpumask, bool force) -{ - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct wsp_ics *ics; - int ret; - u64 xive; - - if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) - return -1; - - ics = d->chip_data; - if (WARN_ON(!ics)) - return -1; - xive = wsp_ics_get_xive(ics, hw_irq); - - /* - * For the moment only implement delivery to all cpus or one cpu. - * Get current irq_server for the given irq - */ - ret = cache_hwirq_map(ics, hw_irq, cpumask); - if (ret == -1) { - char cpulist[128]; - cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); - pr_warning("%s: No online cpus in the mask %s for irq %d\n", - __func__, cpulist, d->irq); - return -1; - } else if (ret == -ENOMEM) { - pr_warning("%s: Out of memory\n", __func__); - return -1; - } - - xive = xive_set_server(xive, get_irq_server(ics, hw_irq)); - wsp_ics_set_xive(ics, hw_irq, xive); - - return IRQ_SET_MASK_OK; -} - -static struct irq_chip wsp_irq_chip = { - .name = "WSP ICS", - .irq_startup = wsp_chip_startup, - .irq_mask = wsp_chip_mask_irq, - .irq_unmask = wsp_chip_unmask_irq, - .irq_set_affinity = wsp_chip_set_affinity -}; - -static int wsp_ics_host_match(struct ics *ics, struct device_node *dn) -{ - /* All ICSs in the system implement a global irq number space, - * so match against them all. */ - return of_device_is_compatible(dn, "ibm,ppc-xics"); -} - -static int wsp_ics_match_hwirq(struct wsp_ics *wsp_ics, unsigned int hwirq) -{ - if (hwirq >= wsp_ics->hwirq_start && - hwirq < wsp_ics->hwirq_start + wsp_ics->count) - return 1; - - return 0; -} - -static int wsp_ics_map(struct ics *ics, unsigned int virq) -{ - struct wsp_ics *wsp_ics = to_wsp_ics(ics); - unsigned int hw_irq = virq_to_hw(virq); - unsigned long flags; - - if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) - return -ENOENT; - - irq_set_chip_and_handler(virq, &wsp_irq_chip, handle_fasteoi_irq); - - irq_set_chip_data(virq, wsp_ics); - - spin_lock_irqsave(&wsp_ics->lock, flags); - bitmap_allocate_region(wsp_ics->bitmap, hw_irq - wsp_ics->hwirq_start, 0); - spin_unlock_irqrestore(&wsp_ics->lock, flags); - - return 0; -} - -static void wsp_ics_mask_unknown(struct ics *ics, unsigned long hw_irq) -{ - struct wsp_ics *wsp_ics = to_wsp_ics(ics); - - if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) - return; - - pr_err("%s: IRQ %lu (real) is invalid, disabling it.\n", __func__, hw_irq); - wsp_mask_real_irq(hw_irq, wsp_ics); -} - -static long wsp_ics_get_server(struct ics *ics, unsigned long hw_irq) -{ - struct wsp_ics *wsp_ics = to_wsp_ics(ics); - - if (!wsp_ics_match_hwirq(wsp_ics, hw_irq)) - return -ENOENT; - - return get_irq_server(wsp_ics, hw_irq); -} - -/* HW Number allocation API */ - -static struct wsp_ics *wsp_ics_find_dn_ics(struct device_node *dn) -{ - struct device_node *iparent; - int i; - - iparent = of_irq_find_parent(dn); - if (!iparent) { - pr_err("wsp_ics: Failed to find interrupt parent!\n"); - return NULL; - } - - for(i = 0; i < num_ics; i++) { - if(ics_list[i].dn == iparent) - break; - } - - if (i >= num_ics) { - pr_err("wsp_ics: Unable to find parent bitmap!\n"); - return NULL; - } - - return &ics_list[i]; -} - -int wsp_ics_alloc_irq(struct device_node *dn, int num) -{ - struct wsp_ics *ics; - int order, offset; - - ics = wsp_ics_find_dn_ics(dn); - if (!ics) - return -ENODEV; - - /* Fast, but overly strict if num isn't a power of two */ - order = get_count_order(num); - - spin_lock_irq(&ics->lock); - offset = bitmap_find_free_region(ics->bitmap, ics->count, order); - spin_unlock_irq(&ics->lock); - - if (offset < 0) - return offset; - - return offset + ics->hwirq_start; -} - -void wsp_ics_free_irq(struct device_node *dn, unsigned int irq) -{ - struct wsp_ics *ics; - - ics = wsp_ics_find_dn_ics(dn); - if (WARN_ON(!ics)) - return; - - spin_lock_irq(&ics->lock); - bitmap_release_region(ics->bitmap, irq, 0); - spin_unlock_irq(&ics->lock); -} - -/* Initialisation */ - -static int __init wsp_ics_bitmap_setup(struct wsp_ics *ics, - struct device_node *dn) -{ - int len, i, j, size; - u32 start, count; - const u32 *p; - - size = BITS_TO_LONGS(ics->count) * sizeof(long); - ics->bitmap = kzalloc(size, GFP_KERNEL); - if (!ics->bitmap) { - pr_err("wsp_ics: ENOMEM allocating IRQ bitmap!\n"); - return -ENOMEM; - } - - spin_lock_init(&ics->lock); - - p = of_get_property(dn, "available-ranges", &len); - if (!p || !len) { - /* FIXME this should be a WARN() once mambo is updated */ - pr_err("wsp_ics: No available-ranges defined for %s\n", - dn->full_name); - return 0; - } - - if (len % (2 * sizeof(u32)) != 0) { - /* FIXME this should be a WARN() once mambo is updated */ - pr_err("wsp_ics: Invalid available-ranges for %s\n", - dn->full_name); - return 0; - } - - bitmap_fill(ics->bitmap, ics->count); - - for (i = 0; i < len / sizeof(u32); i += 2) { - start = of_read_number(p + i, 1); - count = of_read_number(p + i + 1, 1); - - pr_devel("%s: start: %d count: %d\n", __func__, start, count); - - if ((start + count) > (ics->hwirq_start + ics->count) || - start < ics->hwirq_start) { - pr_err("wsp_ics: Invalid range! -> %d to %d\n", - start, start + count); - break; - } - - for (j = 0; j < count; j++) - bitmap_release_region(ics->bitmap, - (start + j) - ics->hwirq_start, 0); - } - - /* Ensure LSIs are not available for allocation */ - bitmap_allocate_region(ics->bitmap, ics->lsi_base, - get_count_order(ics->lsi_count)); - - return 0; -} - -static int __init wsp_ics_setup(struct wsp_ics *ics, struct device_node *dn) -{ - u32 lsi_buid, msi_buid, msi_base, msi_count; - void __iomem *regs; - const u32 *p; - int rc, len, i; - u64 caps, buid; - - p = of_get_property(dn, "interrupt-ranges", &len); - if (!p || len < (2 * sizeof(u32))) { - pr_err("wsp_ics: No/bad interrupt-ranges found on %s\n", - dn->full_name); - return -ENOENT; - } - - if (len > (2 * sizeof(u32))) { - pr_err("wsp_ics: Multiple ics ranges not supported.\n"); - return -EINVAL; - } - - regs = of_iomap(dn, 0); - if (!regs) { - pr_err("wsp_ics: of_iomap(%s) failed\n", dn->full_name); - return -ENXIO; - } - - ics->hwirq_start = of_read_number(p, 1); - ics->count = of_read_number(p + 1, 1); - ics->regs = regs; - - ics->chip_id = wsp_get_chip_id(dn); - if (WARN_ON(ics->chip_id < 0)) - ics->chip_id = 0; - - /* Get some informations about the critter */ - caps = in_be64(ICS_INT_CAPS_REG(ics->regs)); - buid = in_be64(INT_SRC_LAYER_BUID_REG(ics->regs)); - ics->lsi_count = caps >> 56; - msi_count = (caps >> 44) & 0x7ff; - - /* Note: LSI BUID is 9 bits, but really only 3 are BUID and the - * rest is mixed in the interrupt number. We store the whole - * thing though - */ - lsi_buid = (buid >> 48) & 0x1ff; - ics->lsi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | lsi_buid << 5; - msi_buid = (buid >> 37) & 0x7; - msi_base = (ics->chip_id << WSP_ICS_CHIP_SHIFT) | msi_buid << 11; - - pr_info("wsp_ics: Found %s\n", dn->full_name); - pr_info("wsp_ics: irq range : 0x%06llx..0x%06llx\n", - ics->hwirq_start, ics->hwirq_start + ics->count - 1); - pr_info("wsp_ics: %4d LSIs : 0x%06x..0x%06x\n", - ics->lsi_count, ics->lsi_base, - ics->lsi_base + ics->lsi_count - 1); - pr_info("wsp_ics: %4d MSIs : 0x%06x..0x%06x\n", - msi_count, msi_base, - msi_base + msi_count - 1); - - /* Let's check the HW config is sane */ - if (ics->lsi_base < ics->hwirq_start || - (ics->lsi_base + ics->lsi_count) > (ics->hwirq_start + ics->count)) - pr_warning("wsp_ics: WARNING ! LSIs out of interrupt-ranges !\n"); - if (msi_base < ics->hwirq_start || - (msi_base + msi_count) > (ics->hwirq_start + ics->count)) - pr_warning("wsp_ics: WARNING ! MSIs out of interrupt-ranges !\n"); - - /* We don't check for overlap between LSI and MSI, which will happen - * if we use the same BUID, I'm not sure yet how legit that is. - */ - - rc = wsp_ics_bitmap_setup(ics, dn); - if (rc) { - iounmap(regs); - return rc; - } - - ics->dn = of_node_get(dn); - alloc_irq_map(ics); - - for(i = 0; i < ics->count; i++) - wsp_mask_real_irq(ics->hwirq_start + i, ics); - - ics->ics.map = wsp_ics_map; - ics->ics.mask_unknown = wsp_ics_mask_unknown; - ics->ics.get_server = wsp_ics_get_server; - ics->ics.host_match = wsp_ics_host_match; - - xics_register_ics(&ics->ics); - - return 0; -} - -static void __init wsp_ics_set_default_server(void) -{ - struct device_node *np; - u32 hwid; - - /* Find the server number for the boot cpu. */ - np = of_get_cpu_node(boot_cpuid, NULL); - BUG_ON(!np); - - hwid = get_hard_smp_processor_id(boot_cpuid); - - pr_info("wsp_ics: default server is %#x, CPU %s\n", hwid, np->full_name); - xics_default_server = hwid; - - of_node_put(np); -} - -static int __init wsp_ics_init(void) -{ - struct device_node *dn; - struct wsp_ics *ics; - int rc, found; - - wsp_ics_set_default_server(); - - found = 0; - for_each_compatible_node(dn, NULL, "ibm,ppc-xics") - found++; - - if (found == 0) { - pr_err("wsp_ics: No ICS's found!\n"); - return -ENODEV; - } - - ics_list = kmalloc(sizeof(*ics) * found, GFP_KERNEL); - if (!ics_list) { - pr_err("wsp_ics: No memory for structs.\n"); - return -ENOMEM; - } - - num_ics = 0; - ics = ics_list; - for_each_compatible_node(dn, NULL, "ibm,wsp-xics") { - rc = wsp_ics_setup(ics, dn); - if (rc == 0) { - ics++; - num_ics++; - } - } - - if (found != num_ics) { - pr_err("wsp_ics: Failed setting up %d ICS's\n", - found - num_ics); - return -1; - } - - return 0; -} - -void __init wsp_init_irq(void) -{ - wsp_ics_init(); - xics_init(); - - /* We need to patch our irq chip's EOI to point to the right ICP */ - wsp_irq_chip.irq_eoi = icp_ops->eoi; -} - -#ifdef CONFIG_PCI_MSI -static void wsp_ics_msi_unmask_irq(struct irq_data *d) -{ - wsp_chip_unmask_irq(d); - unmask_msi_irq(d); -} - -static unsigned int wsp_ics_msi_startup(struct irq_data *d) -{ - wsp_ics_msi_unmask_irq(d); - return 0; -} - -static void wsp_ics_msi_mask_irq(struct irq_data *d) -{ - mask_msi_irq(d); - wsp_chip_mask_irq(d); -} - -/* - * we do it this way because we reassinge default EOI handling in - * irq_init() above - */ -static void wsp_ics_eoi(struct irq_data *data) -{ - wsp_irq_chip.irq_eoi(data); -} - -static struct irq_chip wsp_ics_msi = { - .name = "WSP ICS MSI", - .irq_startup = wsp_ics_msi_startup, - .irq_mask = wsp_ics_msi_mask_irq, - .irq_unmask = wsp_ics_msi_unmask_irq, - .irq_eoi = wsp_ics_eoi, - .irq_set_affinity = wsp_chip_set_affinity -}; - -void wsp_ics_set_msi_chip(unsigned int irq) -{ - irq_set_chip(irq, &wsp_ics_msi); -} - -void wsp_ics_set_std_chip(unsigned int irq) -{ - irq_set_chip(irq, &wsp_irq_chip); -} -#endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/platforms/wsp/ics.h b/arch/powerpc/platforms/wsp/ics.h deleted file mode 100644 index 07b644e0cf97..000000000000 --- a/arch/powerpc/platforms/wsp/ics.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2009 IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __ICS_H -#define __ICS_H - -#define XIVE_ADDR_MASK 0x7FFULL - -extern void wsp_init_irq(void); - -extern int wsp_ics_alloc_irq(struct device_node *dn, int num); -extern void wsp_ics_free_irq(struct device_node *dn, unsigned int irq); - -#ifdef CONFIG_PCI_MSI -extern void wsp_ics_set_msi_chip(unsigned int irq); -extern void wsp_ics_set_std_chip(unsigned int irq); -#endif /* CONFIG_PCI_MSI */ - -#endif /* __ICS_H */ diff --git a/arch/powerpc/platforms/wsp/msi.c b/arch/powerpc/platforms/wsp/msi.c deleted file mode 100644 index 380882f27add..000000000000 --- a/arch/powerpc/platforms/wsp/msi.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2011 Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/msi.h> -#include <linux/irq.h> -#include <linux/interrupt.h> - -#include "msi.h" -#include "ics.h" -#include "wsp_pci.h" - -/* Magic addresses for 32 & 64-bit MSIs with hardcoded MVE 0 */ -#define MSI_ADDR_32 0xFFFF0000ul -#define MSI_ADDR_64 0x1000000000000000ul - -int wsp_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - struct pci_controller *phb; - struct msi_desc *entry; - struct msi_msg msg; - unsigned int virq; - int hwirq; - - phb = pci_bus_to_host(dev->bus); - if (!phb) - return -ENOENT; - - entry = list_first_entry(&dev->msi_list, struct msi_desc, list); - if (entry->msi_attrib.is_64) { - msg.address_lo = 0; - msg.address_hi = MSI_ADDR_64 >> 32; - } else { - msg.address_lo = MSI_ADDR_32; - msg.address_hi = 0; - } - - list_for_each_entry(entry, &dev->msi_list, list) { - hwirq = wsp_ics_alloc_irq(phb->dn, 1); - if (hwirq < 0) { - dev_warn(&dev->dev, "wsp_msi: hwirq alloc failed!\n"); - return hwirq; - } - - virq = irq_create_mapping(NULL, hwirq); - if (virq == NO_IRQ) { - dev_warn(&dev->dev, "wsp_msi: virq alloc failed!\n"); - return -1; - } - - dev_dbg(&dev->dev, "wsp_msi: allocated irq %#x/%#x\n", - hwirq, virq); - - wsp_ics_set_msi_chip(virq); - irq_set_msi_desc(virq, entry); - msg.data = hwirq & XIVE_ADDR_MASK; - write_msi_msg(virq, &msg); - } - - return 0; -} - -void wsp_teardown_msi_irqs(struct pci_dev *dev) -{ - struct pci_controller *phb; - struct msi_desc *entry; - int hwirq; - - phb = pci_bus_to_host(dev->bus); - - dev_dbg(&dev->dev, "wsp_msi: tearing down msi irqs\n"); - - list_for_each_entry(entry, &dev->msi_list, list) { - if (entry->irq == NO_IRQ) - continue; - - irq_set_msi_desc(entry->irq, NULL); - wsp_ics_set_std_chip(entry->irq); - - hwirq = virq_to_hw(entry->irq); - /* In this order to avoid racing with irq_create_mapping() */ - irq_dispose_mapping(entry->irq); - wsp_ics_free_irq(phb->dn, hwirq); - } -} - -void wsp_setup_phb_msi(struct pci_controller *phb) -{ - /* Create a single MVE at offset 0 that matches everything */ - out_be64(phb->cfg_data + PCIE_REG_IODA_ADDR, PCIE_REG_IODA_AD_TBL_MVT); - out_be64(phb->cfg_data + PCIE_REG_IODA_DATA0, 1ull << 63); - - ppc_md.setup_msi_irqs = wsp_setup_msi_irqs; - ppc_md.teardown_msi_irqs = wsp_teardown_msi_irqs; -} diff --git a/arch/powerpc/platforms/wsp/msi.h b/arch/powerpc/platforms/wsp/msi.h deleted file mode 100644 index 0ab27b71b24d..000000000000 --- a/arch/powerpc/platforms/wsp/msi.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright 2011 Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __WSP_MSI_H -#define __WSP_MSI_H - -#ifdef CONFIG_PCI_MSI -extern void wsp_setup_phb_msi(struct pci_controller *phb); -#else -static inline void wsp_setup_phb_msi(struct pci_controller *phb) { } -#endif - -#endif /* __WSP_MSI_H */ diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c deleted file mode 100644 index 3f6729807938..000000000000 --- a/arch/powerpc/platforms/wsp/opb_pic.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * IBM Onboard Peripheral Bus Interrupt Controller - * - * Copyright 2010 Jack Miller, IBM Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/of.h> -#include <linux/slab.h> -#include <linux/time.h> -#include <linux/of_address.h> -#include <linux/of_irq.h> - -#include <asm/reg_a2.h> -#include <asm/irq.h> - -#define OPB_NR_IRQS 32 - -#define OPB_MLSASIER 0x04 /* MLS Accumulated Status IER */ -#define OPB_MLSIR 0x50 /* MLS Interrupt Register */ -#define OPB_MLSIER 0x54 /* MLS Interrupt Enable Register */ -#define OPB_MLSIPR 0x58 /* MLS Interrupt Polarity Register */ -#define OPB_MLSIIR 0x5c /* MLS Interrupt Inputs Register */ - -static int opb_index = 0; - -struct opb_pic { - struct irq_domain *host; - void *regs; - int index; - spinlock_t lock; -}; - -static u32 opb_in(struct opb_pic *opb, int offset) -{ - return in_be32(opb->regs + offset); -} - -static void opb_out(struct opb_pic *opb, int offset, u32 val) -{ - out_be32(opb->regs + offset, val); -} - -static void opb_unmask_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 ier, bitset; - - opb = d->chip_data; - bitset = (1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - ier = opb_in(opb, OPB_MLSIER); - opb_out(opb, OPB_MLSIER, ier | bitset); - ier = opb_in(opb, OPB_MLSIER); - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static void opb_mask_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 ier, mask; - - opb = d->chip_data; - mask = ~(1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - ier = opb_in(opb, OPB_MLSIER); - opb_out(opb, OPB_MLSIER, ier & mask); - ier = opb_in(opb, OPB_MLSIER); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static void opb_ack_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 bitset; - - opb = d->chip_data; - bitset = (1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - opb_out(opb, OPB_MLSIR, bitset); - opb_in(opb, OPB_MLSIR); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static void opb_mask_ack_irq(struct irq_data *d) -{ - struct opb_pic *opb; - unsigned long flags; - u32 bitset; - u32 ier, ir; - - opb = d->chip_data; - bitset = (1 << (31 - irqd_to_hwirq(d))); - - spin_lock_irqsave(&opb->lock, flags); - - ier = opb_in(opb, OPB_MLSIER); - opb_out(opb, OPB_MLSIER, ier & ~bitset); - ier = opb_in(opb, OPB_MLSIER); // Flush posted writes - - opb_out(opb, OPB_MLSIR, bitset); - ir = opb_in(opb, OPB_MLSIR); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); -} - -static int opb_set_irq_type(struct irq_data *d, unsigned int flow) -{ - struct opb_pic *opb; - unsigned long flags; - int invert, ipr, mask, bit; - - opb = d->chip_data; - - /* The only information we're interested in in the type is whether it's - * a high or low trigger. For high triggered interrupts, the polarity - * set for it in the MLS Interrupt Polarity Register is 0, for low - * interrupts it's 1 so that the proper input in the MLS Interrupt Input - * Register is interrupted as asserting the interrupt. */ - - switch (flow) { - case IRQ_TYPE_NONE: - opb_mask_irq(d); - return 0; - - case IRQ_TYPE_LEVEL_HIGH: - invert = 0; - break; - - case IRQ_TYPE_LEVEL_LOW: - invert = 1; - break; - - default: - return -EINVAL; - } - - bit = (1 << (31 - irqd_to_hwirq(d))); - mask = ~bit; - - spin_lock_irqsave(&opb->lock, flags); - - ipr = opb_in(opb, OPB_MLSIPR); - ipr = (ipr & mask) | (invert ? bit : 0); - opb_out(opb, OPB_MLSIPR, ipr); - ipr = opb_in(opb, OPB_MLSIPR); // Flush posted writes - - spin_unlock_irqrestore(&opb->lock, flags); - - /* Record the type in the interrupt descriptor */ - irqd_set_trigger_type(d, flow); - - return 0; -} - -static struct irq_chip opb_irq_chip = { - .name = "OPB", - .irq_mask = opb_mask_irq, - .irq_unmask = opb_unmask_irq, - .irq_mask_ack = opb_mask_ack_irq, - .irq_ack = opb_ack_irq, - .irq_set_type = opb_set_irq_type -}; - -static int opb_host_map(struct irq_domain *host, unsigned int virq, - irq_hw_number_t hwirq) -{ - struct opb_pic *opb; - - opb = host->host_data; - - /* Most of the important stuff is handled by the generic host code, like - * the lookup, so just attach some info to the virtual irq */ - - irq_set_chip_data(virq, opb); - irq_set_chip_and_handler(virq, &opb_irq_chip, handle_level_irq); - irq_set_irq_type(virq, IRQ_TYPE_NONE); - - return 0; -} - -static const struct irq_domain_ops opb_host_ops = { - .map = opb_host_map, - .xlate = irq_domain_xlate_twocell, -}; - -irqreturn_t opb_irq_handler(int irq, void *private) -{ - struct opb_pic *opb; - u32 ir, src, subvirq; - - opb = (struct opb_pic *) private; - - /* Read the OPB MLS Interrupt Register for - * asserted interrupts */ - ir = opb_in(opb, OPB_MLSIR); - if (!ir) - return IRQ_NONE; - - do { - /* Get 1 - 32 source, *NOT* bit */ - src = 32 - ffs(ir); - - /* Translate from the OPB's conception of interrupt number to - * Linux's virtual IRQ */ - - subvirq = irq_linear_revmap(opb->host, src); - - generic_handle_irq(subvirq); - } while ((ir = opb_in(opb, OPB_MLSIR))); - - return IRQ_HANDLED; -} - -struct opb_pic *opb_pic_init_one(struct device_node *dn) -{ - struct opb_pic *opb; - struct resource res; - - if (of_address_to_resource(dn, 0, &res)) { - printk(KERN_ERR "opb: Couldn't translate resource\n"); - return NULL; - } - - opb = kzalloc(sizeof(struct opb_pic), GFP_KERNEL); - if (!opb) { - printk(KERN_ERR "opb: Failed to allocate opb struct!\n"); - return NULL; - } - - /* Get access to the OPB MMIO registers */ - opb->regs = ioremap(res.start + 0x10000, 0x1000); - if (!opb->regs) { - printk(KERN_ERR "opb: Failed to allocate register space!\n"); - goto free_opb; - } - - /* Allocate an irq domain so that Linux knows that despite only - * having one interrupt to issue, we're the controller for multiple - * hardware IRQs, so later we can lookup their virtual IRQs. */ - - opb->host = irq_domain_add_linear(dn, OPB_NR_IRQS, &opb_host_ops, opb); - if (!opb->host) { - printk(KERN_ERR "opb: Failed to allocate IRQ host!\n"); - goto free_regs; - } - - opb->index = opb_index++; - spin_lock_init(&opb->lock); - - /* Disable all interrupts by default */ - opb_out(opb, OPB_MLSASIER, 0); - opb_out(opb, OPB_MLSIER, 0); - - /* ACK any interrupts left by FW */ - opb_out(opb, OPB_MLSIR, 0xFFFFFFFF); - - return opb; - -free_regs: - iounmap(opb->regs); -free_opb: - kfree(opb); - return NULL; -} - -void __init opb_pic_init(void) -{ - struct device_node *dn; - struct opb_pic *opb; - int virq; - int rc; - - /* Call init_one for each OPB device */ - for_each_compatible_node(dn, NULL, "ibm,opb") { - - /* Fill in an OPB struct */ - opb = opb_pic_init_one(dn); - if (!opb) { - printk(KERN_WARNING "opb: Failed to init node, skipped!\n"); - continue; - } - - /* Map / get opb's hardware virtual irq */ - virq = irq_of_parse_and_map(dn, 0); - if (virq <= 0) { - printk("opb: irq_op_parse_and_map failed!\n"); - continue; - } - - /* Attach opb interrupt handler to new virtual IRQ */ - rc = request_irq(virq, opb_irq_handler, IRQF_NO_THREAD, - "OPB LS Cascade", opb); - if (rc) { - printk("opb: request_irq failed: %d\n", rc); - continue; - } - - printk("OPB%d init with %d IRQs at %p\n", opb->index, - OPB_NR_IRQS, opb->regs); - } -} diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c deleted file mode 100644 index a87b414c766a..000000000000 --- a/arch/powerpc/platforms/wsp/psr2.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/delay.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/of.h> -#include <linux/smp.h> -#include <linux/time.h> -#include <linux/of_fdt.h> - -#include <asm/machdep.h> -#include <asm/udbg.h> - -#include "ics.h" -#include "wsp.h" - - -static void psr2_spin(void) -{ - hard_irq_disable(); - for (;;) - continue; -} - -static void psr2_restart(char *cmd) -{ - psr2_spin(); -} - -static int __init psr2_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "ibm,wsp-chroma")) { - /* chroma systems also claim they are psr2s */ - return 0; - } - - if (!of_flat_dt_is_compatible(root, "ibm,psr2")) - return 0; - - return 1; -} - -define_machine(psr2_md) { - .name = "PSR2 A2", - .probe = psr2_probe, - .setup_arch = wsp_setup_arch, - .restart = psr2_restart, - .power_off = psr2_spin, - .halt = psr2_spin, - .calibrate_decr = generic_calibrate_decr, - .init_IRQ = wsp_setup_irq, - .progress = udbg_progress, - .power_save = book3e_idle, -}; - -machine_arch_initcall(psr2_md, wsp_probe_devices); diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c deleted file mode 100644 index 8c79ce016cf1..000000000000 --- a/arch/powerpc/platforms/wsp/scom_smp.c +++ /dev/null @@ -1,435 +0,0 @@ -/* - * SCOM support for A2 platforms - * - * Copyright 2007-2011 Benjamin Herrenschmidt, David Gibson, - * Michael Ellerman, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/cpumask.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/spinlock.h> -#include <linux/types.h> - -#include <asm/cputhreads.h> -#include <asm/reg_a2.h> -#include <asm/scom.h> -#include <asm/udbg.h> -#include <asm/code-patching.h> - -#include "wsp.h" - -#define SCOM_RAMC 0x2a /* Ram Command */ -#define SCOM_RAMC_TGT1_EXT 0x80000000 -#define SCOM_RAMC_SRC1_EXT 0x40000000 -#define SCOM_RAMC_SRC2_EXT 0x20000000 -#define SCOM_RAMC_SRC3_EXT 0x10000000 -#define SCOM_RAMC_ENABLE 0x00080000 -#define SCOM_RAMC_THREADSEL 0x00060000 -#define SCOM_RAMC_EXECUTE 0x00010000 -#define SCOM_RAMC_MSR_OVERRIDE 0x00008000 -#define SCOM_RAMC_MSR_PR 0x00004000 -#define SCOM_RAMC_MSR_GS 0x00002000 -#define SCOM_RAMC_FORCE 0x00001000 -#define SCOM_RAMC_FLUSH 0x00000800 -#define SCOM_RAMC_INTERRUPT 0x00000004 -#define SCOM_RAMC_ERROR 0x00000002 -#define SCOM_RAMC_DONE 0x00000001 -#define SCOM_RAMI 0x29 /* Ram Instruction */ -#define SCOM_RAMIC 0x28 /* Ram Instruction and Command */ -#define SCOM_RAMIC_INSN 0xffffffff00000000 -#define SCOM_RAMD 0x2d /* Ram Data */ -#define SCOM_RAMDH 0x2e /* Ram Data High */ -#define SCOM_RAMDL 0x2f /* Ram Data Low */ -#define SCOM_PCCR0 0x33 /* PC Configuration Register 0 */ -#define SCOM_PCCR0_ENABLE_DEBUG 0x80000000 -#define SCOM_PCCR0_ENABLE_RAM 0x40000000 -#define SCOM_THRCTL 0x30 /* Thread Control and Status */ -#define SCOM_THRCTL_T0_STOP 0x80000000 -#define SCOM_THRCTL_T1_STOP 0x40000000 -#define SCOM_THRCTL_T2_STOP 0x20000000 -#define SCOM_THRCTL_T3_STOP 0x10000000 -#define SCOM_THRCTL_T0_STEP 0x08000000 -#define SCOM_THRCTL_T1_STEP 0x04000000 -#define SCOM_THRCTL_T2_STEP 0x02000000 -#define SCOM_THRCTL_T3_STEP 0x01000000 -#define SCOM_THRCTL_T0_RUN 0x00800000 -#define SCOM_THRCTL_T1_RUN 0x00400000 -#define SCOM_THRCTL_T2_RUN 0x00200000 -#define SCOM_THRCTL_T3_RUN 0x00100000 -#define SCOM_THRCTL_T0_PM 0x00080000 -#define SCOM_THRCTL_T1_PM 0x00040000 -#define SCOM_THRCTL_T2_PM 0x00020000 -#define SCOM_THRCTL_T3_PM 0x00010000 -#define SCOM_THRCTL_T0_UDE 0x00008000 -#define SCOM_THRCTL_T1_UDE 0x00004000 -#define SCOM_THRCTL_T2_UDE 0x00002000 -#define SCOM_THRCTL_T3_UDE 0x00001000 -#define SCOM_THRCTL_ASYNC_DIS 0x00000800 -#define SCOM_THRCTL_TB_DIS 0x00000400 -#define SCOM_THRCTL_DEC_DIS 0x00000200 -#define SCOM_THRCTL_AND 0x31 /* Thread Control and Status */ -#define SCOM_THRCTL_OR 0x32 /* Thread Control and Status */ - - -static DEFINE_PER_CPU(scom_map_t, scom_ptrs); - -static scom_map_t get_scom(int cpu, struct device_node *np, int *first_thread) -{ - scom_map_t scom = per_cpu(scom_ptrs, cpu); - int tcpu; - - if (scom_map_ok(scom)) { - *first_thread = 0; - return scom; - } - - *first_thread = 1; - - scom = scom_map_device(np, 0); - - for (tcpu = cpu_first_thread_sibling(cpu); - tcpu <= cpu_last_thread_sibling(cpu); tcpu++) - per_cpu(scom_ptrs, tcpu) = scom; - - /* Hack: for the boot core, this will actually get called on - * the second thread up, not the first so our test above will - * set first_thread incorrectly. */ - if (cpu_first_thread_sibling(cpu) == 0) - *first_thread = 0; - - return scom; -} - -static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask) -{ - u64 cmd, mask, val; - int n = 0; - - cmd = ((u64)insn << 32) | (((u64)extmask & 0xf) << 28) - | ((u64)thread << 17) | SCOM_RAMC_ENABLE | SCOM_RAMC_EXECUTE; - mask = SCOM_RAMC_DONE | SCOM_RAMC_INTERRUPT | SCOM_RAMC_ERROR; - - scom_write(scom, SCOM_RAMIC, cmd); - - for (;;) { - if (scom_read(scom, SCOM_RAMC, &val) != 0) { - pr_err("SCOM error on instruction 0x%08x, thread %d\n", - insn, thread); - return -1; - } - if (val & mask) - break; - pr_devel("Waiting on RAMC = 0x%llx\n", val); - if (++n == 3) { - pr_err("RAMC timeout on instruction 0x%08x, thread %d\n", - insn, thread); - return -1; - } - } - - if (val & SCOM_RAMC_INTERRUPT) { - pr_err("RAMC interrupt on instruction 0x%08x, thread %d\n", - insn, thread); - return -SCOM_RAMC_INTERRUPT; - } - - if (val & SCOM_RAMC_ERROR) { - pr_err("RAMC error on instruction 0x%08x, thread %d\n", - insn, thread); - return -SCOM_RAMC_ERROR; - } - - return 0; -} - -static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt, - u64 *out_gpr) -{ - int rc; - - /* or rN, rN, rN */ - u32 insn = 0x7c000378 | (gpr << 21) | (gpr << 16) | (gpr << 11); - rc = a2_scom_ram(scom, thread, insn, alt ? 0xf : 0x0); - if (rc) - return rc; - - return scom_read(scom, SCOM_RAMD, out_gpr); -} - -static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr) -{ - int rc, sprhi, sprlo; - u32 insn; - - sprhi = spr >> 5; - sprlo = spr & 0x1f; - insn = 0x7c2002a6 | (sprlo << 16) | (sprhi << 11); /* mfspr r1,spr */ - - if (spr == 0x0ff0) - insn = 0x7c2000a6; /* mfmsr r1 */ - - rc = a2_scom_ram(scom, thread, insn, 0xf); - if (rc) - return rc; - return a2_scom_getgpr(scom, thread, 1, 1, out_spr); -} - -static int a2_scom_setgpr(scom_map_t scom, int thread, int gpr, - int alt, u64 val) -{ - u32 lis = 0x3c000000 | (gpr << 21); - u32 li = 0x38000000 | (gpr << 21); - u32 oris = 0x64000000 | (gpr << 21) | (gpr << 16); - u32 ori = 0x60000000 | (gpr << 21) | (gpr << 16); - u32 rldicr32 = 0x780007c6 | (gpr << 21) | (gpr << 16); - u32 highest = val >> 48; - u32 higher = (val >> 32) & 0xffff; - u32 high = (val >> 16) & 0xffff; - u32 low = val & 0xffff; - int lext = alt ? 0x8 : 0x0; - int oext = alt ? 0xf : 0x0; - int rc = 0; - - if (highest) - rc |= a2_scom_ram(scom, thread, lis | highest, lext); - - if (higher) { - if (highest) - rc |= a2_scom_ram(scom, thread, oris | higher, oext); - else - rc |= a2_scom_ram(scom, thread, li | higher, lext); - } - - if (highest || higher) - rc |= a2_scom_ram(scom, thread, rldicr32, oext); - - if (high) { - if (highest || higher) - rc |= a2_scom_ram(scom, thread, oris | high, oext); - else - rc |= a2_scom_ram(scom, thread, lis | high, lext); - } - - if (highest || higher || high) - rc |= a2_scom_ram(scom, thread, ori | low, oext); - else - rc |= a2_scom_ram(scom, thread, li | low, lext); - - return rc; -} - -static int a2_scom_setspr(scom_map_t scom, int thread, int spr, u64 val) -{ - int sprhi = spr >> 5; - int sprlo = spr & 0x1f; - /* mtspr spr, r1 */ - u32 insn = 0x7c2003a6 | (sprlo << 16) | (sprhi << 11); - - if (spr == 0x0ff0) - insn = 0x7c200124; /* mtmsr r1 */ - - if (a2_scom_setgpr(scom, thread, 1, 1, val)) - return -1; - - return a2_scom_ram(scom, thread, insn, 0xf); -} - -static int a2_scom_initial_tlb(scom_map_t scom, int thread) -{ - extern u32 a2_tlbinit_code_start[], a2_tlbinit_code_end[]; - extern u32 a2_tlbinit_after_iprot_flush[]; - extern u32 a2_tlbinit_after_linear_map[]; - u32 assoc, entries, i; - u64 epn, tlbcfg; - u32 *p; - int rc; - - /* Invalidate all entries (including iprot) */ - - rc = a2_scom_getspr(scom, thread, SPRN_TLB0CFG, &tlbcfg); - if (rc) - goto scom_fail; - entries = tlbcfg & TLBnCFG_N_ENTRY; - assoc = (tlbcfg & TLBnCFG_ASSOC) >> 24; - epn = 0; - - /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ - a2_scom_setspr(scom, thread, SPRN_MMUCR2, 0x000a7531); - /* Set MMUCR3 to write all thids bit to the TLB */ - a2_scom_setspr(scom, thread, SPRN_MMUCR3, 0x0000000f); - - /* Set MAS1 for 1G page size, and MAS2 to our initial EPN */ - a2_scom_setspr(scom, thread, SPRN_MAS1, MAS1_TSIZE(BOOK3E_PAGESZ_1GB)); - a2_scom_setspr(scom, thread, SPRN_MAS2, epn); - for (i = 0; i < entries; i++) { - - a2_scom_setspr(scom, thread, SPRN_MAS0, MAS0_ESEL(i % assoc)); - - /* tlbwe */ - rc = a2_scom_ram(scom, thread, 0x7c0007a4, 0); - if (rc) - goto scom_fail; - - /* Next entry is new address? */ - if((i + 1) % assoc == 0) { - epn += (1 << 30); - a2_scom_setspr(scom, thread, SPRN_MAS2, epn); - } - } - - /* Setup args for linear mapping */ - rc = a2_scom_setgpr(scom, thread, 3, 0, MAS0_TLBSEL(0)); - if (rc) - goto scom_fail; - - /* Linear mapping */ - for (p = a2_tlbinit_code_start; p < a2_tlbinit_after_linear_map; p++) { - rc = a2_scom_ram(scom, thread, *p, 0); - if (rc) - goto scom_fail; - } - - /* - * For the boot thread, between the linear mapping and the debug - * mappings there is a loop to flush iprot mappings. Ramming doesn't do - * branches, but the secondary threads don't need to be nearly as smart - * (i.e. we don't need to worry about invalidating the mapping we're - * standing on). - */ - - /* Debug mappings. Expects r11 = MAS0 from linear map (set above) */ - for (p = a2_tlbinit_after_iprot_flush; p < a2_tlbinit_code_end; p++) { - rc = a2_scom_ram(scom, thread, *p, 0); - if (rc) - goto scom_fail; - } - -scom_fail: - if (rc) - pr_err("Setting up initial TLB failed, err %d\n", rc); - - if (rc == -SCOM_RAMC_INTERRUPT) { - /* Interrupt, dump some status */ - int rc[10]; - u64 iar, srr0, srr1, esr, mas0, mas1, mas2, mas7_3, mas8, ccr2; - rc[0] = a2_scom_getspr(scom, thread, SPRN_IAR, &iar); - rc[1] = a2_scom_getspr(scom, thread, SPRN_SRR0, &srr0); - rc[2] = a2_scom_getspr(scom, thread, SPRN_SRR1, &srr1); - rc[3] = a2_scom_getspr(scom, thread, SPRN_ESR, &esr); - rc[4] = a2_scom_getspr(scom, thread, SPRN_MAS0, &mas0); - rc[5] = a2_scom_getspr(scom, thread, SPRN_MAS1, &mas1); - rc[6] = a2_scom_getspr(scom, thread, SPRN_MAS2, &mas2); - rc[7] = a2_scom_getspr(scom, thread, SPRN_MAS7_MAS3, &mas7_3); - rc[8] = a2_scom_getspr(scom, thread, SPRN_MAS8, &mas8); - rc[9] = a2_scom_getspr(scom, thread, SPRN_A2_CCR2, &ccr2); - pr_err(" -> retreived IAR =0x%llx (err %d)\n", iar, rc[0]); - pr_err(" retreived SRR0=0x%llx (err %d)\n", srr0, rc[1]); - pr_err(" retreived SRR1=0x%llx (err %d)\n", srr1, rc[2]); - pr_err(" retreived ESR =0x%llx (err %d)\n", esr, rc[3]); - pr_err(" retreived MAS0=0x%llx (err %d)\n", mas0, rc[4]); - pr_err(" retreived MAS1=0x%llx (err %d)\n", mas1, rc[5]); - pr_err(" retreived MAS2=0x%llx (err %d)\n", mas2, rc[6]); - pr_err(" retreived MS73=0x%llx (err %d)\n", mas7_3, rc[7]); - pr_err(" retreived MAS8=0x%llx (err %d)\n", mas8, rc[8]); - pr_err(" retreived CCR2=0x%llx (err %d)\n", ccr2, rc[9]); - } - - return rc; -} - -int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, struct device_node *np) -{ - u64 init_iar, init_msr, init_ccr2; - unsigned long start_here; - int rc, core_setup; - scom_map_t scom; - u64 pccr0; - - scom = get_scom(lcpu, np, &core_setup); - if (!scom) { - printk(KERN_ERR "Couldn't map SCOM for CPU%d\n", lcpu); - return -1; - } - - pr_devel("Bringing up CPU%d using SCOM...\n", lcpu); - - if (scom_read(scom, SCOM_PCCR0, &pccr0) != 0) { - printk(KERN_ERR "XSCOM failure readng PCCR0 on CPU%d\n", lcpu); - return -1; - } - scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG | - SCOM_PCCR0_ENABLE_RAM); - - /* Stop the thead with THRCTL. If we are setting up the TLB we stop all - * threads. We also disable asynchronous interrupts while RAMing. - */ - if (core_setup) - scom_write(scom, SCOM_THRCTL_OR, - SCOM_THRCTL_T0_STOP | - SCOM_THRCTL_T1_STOP | - SCOM_THRCTL_T2_STOP | - SCOM_THRCTL_T3_STOP | - SCOM_THRCTL_ASYNC_DIS); - else - scom_write(scom, SCOM_THRCTL_OR, SCOM_THRCTL_T0_STOP >> thr_idx); - - /* Flush its pipeline just in case */ - scom_write(scom, SCOM_RAMC, ((u64)thr_idx << 17) | - SCOM_RAMC_FLUSH | SCOM_RAMC_ENABLE); - - a2_scom_getspr(scom, thr_idx, SPRN_IAR, &init_iar); - a2_scom_getspr(scom, thr_idx, 0x0ff0, &init_msr); - a2_scom_getspr(scom, thr_idx, SPRN_A2_CCR2, &init_ccr2); - - /* Set MSR to MSR_CM (0x0ff0 is magic value for MSR_CM) */ - rc = a2_scom_setspr(scom, thr_idx, 0x0ff0, MSR_CM); - if (rc) { - pr_err("Failed to set MSR ! err %d\n", rc); - return rc; - } - - /* RAM in an sync/isync for the sake of it */ - a2_scom_ram(scom, thr_idx, 0x7c0004ac, 0); - a2_scom_ram(scom, thr_idx, 0x4c00012c, 0); - - if (core_setup) { - pr_devel("CPU%d is first thread in core, initializing TLB...\n", - lcpu); - rc = a2_scom_initial_tlb(scom, thr_idx); - if (rc) - goto fail; - } - - start_here = ppc_function_entry(core_setup ? generic_secondary_smp_init - : generic_secondary_thread_init); - pr_devel("CPU%d entry point at 0x%lx...\n", lcpu, start_here); - - rc |= a2_scom_setspr(scom, thr_idx, SPRN_IAR, start_here); - rc |= a2_scom_setgpr(scom, thr_idx, 3, 0, - get_hard_smp_processor_id(lcpu)); - /* - * Tell book3e_secondary_core_init not to set up the TLB, we've - * already done that. - */ - rc |= a2_scom_setgpr(scom, thr_idx, 4, 0, 1); - - rc |= a2_scom_setspr(scom, thr_idx, SPRN_TENS, 0x1 << thr_idx); - - scom_write(scom, SCOM_RAMC, 0); - scom_write(scom, SCOM_THRCTL_AND, ~(SCOM_THRCTL_T0_STOP >> thr_idx)); - scom_write(scom, SCOM_PCCR0, pccr0); -fail: - pr_devel(" SCOM initialization %s\n", rc ? "failed" : "succeeded"); - if (rc) { - pr_err("Old IAR=0x%08llx MSR=0x%08llx CCR2=0x%08llx\n", - init_iar, init_msr, init_ccr2); - } - - return rc; -} diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c deleted file mode 100644 index 6538b4de34fc..000000000000 --- a/arch/powerpc/platforms/wsp/scom_wsp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * SCOM backend for WSP - * - * Copyright 2010 Benjamin Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/cpumask.h> -#include <linux/io.h> -#include <linux/of.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <linux/of_address.h> - -#include <asm/cputhreads.h> -#include <asm/reg_a2.h> -#include <asm/scom.h> -#include <asm/udbg.h> - -#include "wsp.h" - - -static scom_map_t wsp_scom_map(struct device_node *dev, u64 reg, u64 count) -{ - struct resource r; - u64 xscom_addr; - - if (!of_get_property(dev, "scom-controller", NULL)) { - pr_err("%s: device %s is not a SCOM controller\n", - __func__, dev->full_name); - return SCOM_MAP_INVALID; - } - - if (of_address_to_resource(dev, 0, &r)) { - pr_debug("Failed to find SCOM controller address\n"); - return 0; - } - - /* Transform the SCOM address into an XSCOM offset */ - xscom_addr = ((reg & 0x7f000000) >> 1) | ((reg & 0xfffff) << 3); - - return (scom_map_t)ioremap(r.start + xscom_addr, count << 3); -} - -static void wsp_scom_unmap(scom_map_t map) -{ - iounmap((void *)map); -} - -static int wsp_scom_read(scom_map_t map, u64 reg, u64 *value) -{ - u64 __iomem *addr = (u64 __iomem *)map; - - *value = in_be64(addr + reg); - - return 0; -} - -static int wsp_scom_write(scom_map_t map, u64 reg, u64 value) -{ - u64 __iomem *addr = (u64 __iomem *)map; - - out_be64(addr + reg, value); - - return 0; -} - -static const struct scom_controller wsp_scom_controller = { - .map = wsp_scom_map, - .unmap = wsp_scom_unmap, - .read = wsp_scom_read, - .write = wsp_scom_write -}; - -void scom_init_wsp(void) -{ - scom_init(&wsp_scom_controller); -} diff --git a/arch/powerpc/platforms/wsp/setup.c b/arch/powerpc/platforms/wsp/setup.c deleted file mode 100644 index 11ac2f05e01c..000000000000 --- a/arch/powerpc/platforms/wsp/setup.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2010 Michael Ellerman, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/of_platform.h> - -#include "wsp.h" - -/* - * Find chip-id by walking up device tree looking for ibm,wsp-chip-id property. - * Won't work for nodes that are not a descendant of a wsp node. - */ -int wsp_get_chip_id(struct device_node *dn) -{ - const u32 *p; - int rc; - - /* Start looking at the specified node, not its parent */ - dn = of_node_get(dn); - while (dn && !(p = of_get_property(dn, "ibm,wsp-chip-id", NULL))) - dn = of_get_next_parent(dn); - - if (!dn) - return -1; - - rc = *p; - of_node_put(dn); - - return rc; -} diff --git a/arch/powerpc/platforms/wsp/smp.c b/arch/powerpc/platforms/wsp/smp.c deleted file mode 100644 index 332a18b81403..000000000000 --- a/arch/powerpc/platforms/wsp/smp.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * SMP Support for A2 platforms - * - * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include <linux/cpumask.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/smp.h> - -#include <asm/dbell.h> -#include <asm/machdep.h> -#include <asm/xics.h> - -#include "ics.h" -#include "wsp.h" - -static void smp_a2_setup_cpu(int cpu) -{ - doorbell_setup_this_cpu(); - - if (cpu != boot_cpuid) - xics_setup_cpu(); -} - -int smp_a2_kick_cpu(int nr) -{ - const char *enable_method; - struct device_node *np; - int thr_idx; - - if (nr < 0 || nr >= NR_CPUS) - return -ENOENT; - - np = of_get_cpu_node(nr, &thr_idx); - if (!np) - return -ENODEV; - - enable_method = of_get_property(np, "enable-method", NULL); - pr_devel("CPU%d has enable-method: \"%s\"\n", nr, enable_method); - - if (!enable_method) { - printk(KERN_ERR "CPU%d has no enable-method\n", nr); - return -ENOENT; - } else if (strcmp(enable_method, "ibm,a2-scom") == 0) { - if (a2_scom_startup_cpu(nr, thr_idx, np)) - return -1; - } else { - printk(KERN_ERR "CPU%d: Don't understand enable-method \"%s\"\n", - nr, enable_method); - return -EINVAL; - } - - /* - * The processor is currently spinning, waiting for the - * cpu_start field to become non-zero After we set cpu_start, - * the processor will continue on to secondary_start - */ - paca[nr].cpu_start = 1; - - return 0; -} - -static int __init smp_a2_probe(void) -{ - return num_possible_cpus(); -} - -static struct smp_ops_t a2_smp_ops = { - .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ - .cause_ipi = doorbell_cause_ipi, - .probe = smp_a2_probe, - .kick_cpu = smp_a2_kick_cpu, - .setup_cpu = smp_a2_setup_cpu, -}; - -void __init a2_setup_smp(void) -{ - smp_ops = &a2_smp_ops; -} diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c deleted file mode 100644 index 58cd1f00e1ef..000000000000 --- a/arch/powerpc/platforms/wsp/wsp.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2008-2011, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/of_device.h> -#include <linux/smp.h> -#include <linux/delay.h> -#include <linux/time.h> -#include <linux/of_address.h> - -#include <asm/scom.h> - -#include "wsp.h" -#include "ics.h" - -#define WSP_SOC_COMPATIBLE "ibm,wsp-soc" -#define PBIC_COMPATIBLE "ibm,wsp-pbic" -#define COPRO_COMPATIBLE "ibm,wsp-coprocessor" - -static int __init wsp_probe_buses(void) -{ - static __initdata struct of_device_id bus_ids[] = { - /* - * every node in between needs to be here or you won't - * find it - */ - { .compatible = WSP_SOC_COMPATIBLE, }, - { .compatible = PBIC_COMPATIBLE, }, - { .compatible = COPRO_COMPATIBLE, }, - {}, - }; - of_platform_bus_probe(NULL, bus_ids, NULL); - - return 0; -} - -void __init wsp_setup_arch(void) -{ - /* init to some ~sane value until calibrate_delay() runs */ - loops_per_jiffy = 50000000; - - scom_init_wsp(); - - /* Setup SMP callback */ -#ifdef CONFIG_SMP - a2_setup_smp(); -#endif -#ifdef CONFIG_PCI - wsp_setup_pci(); -#endif -} - -void __init wsp_setup_irq(void) -{ - wsp_init_irq(); - opb_pic_init(); -} - - -int __init wsp_probe_devices(void) -{ - struct device_node *np; - - /* Our RTC is a ds1500. It seems to be programatically compatible - * with the ds1511 for which we have a driver so let's use that - */ - np = of_find_compatible_node(NULL, NULL, "dallas,ds1500"); - if (np != NULL) { - struct resource res; - if (of_address_to_resource(np, 0, &res) == 0) - platform_device_register_simple("ds1511", 0, &res, 1); - } - - wsp_probe_buses(); - - return 0; -} - -void wsp_halt(void) -{ - u64 val; - scom_map_t m; - struct device_node *dn; - struct device_node *mine; - struct device_node *me; - int rc; - - me = of_get_cpu_node(smp_processor_id(), NULL); - mine = scom_find_parent(me); - - /* This will halt all the A2s but not power off the chip */ - for_each_node_with_property(dn, "scom-controller") { - if (dn == mine) - continue; - m = scom_map(dn, 0, 1); - - /* read-modify-write it so the HW probe does not get - * confused */ - rc = scom_read(m, 0, &val); - if (rc == 0) - scom_write(m, 0, val | 1); - scom_unmap(m); - } - m = scom_map(mine, 0, 1); - rc = scom_read(m, 0, &val); - if (rc == 0) - scom_write(m, 0, val | 1); - /* should never return */ - scom_unmap(m); -} diff --git a/arch/powerpc/platforms/wsp/wsp.h b/arch/powerpc/platforms/wsp/wsp.h deleted file mode 100644 index a563a8aaf812..000000000000 --- a/arch/powerpc/platforms/wsp/wsp.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __WSP_H -#define __WSP_H - -#include <asm/wsp.h> - -/* Devtree compatible strings for major devices */ -#define PCIE_COMPATIBLE "ibm,wsp-pciex" - -extern void wsp_setup_arch(void); -extern void wsp_setup_irq(void); -extern int wsp_probe_devices(void); -extern void wsp_halt(void); - -extern void wsp_setup_pci(void); -extern void scom_init_wsp(void); - -extern void a2_setup_smp(void); -extern int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, - struct device_node *np); -extern int smp_a2_kick_cpu(int nr); - -extern void opb_pic_init(void); - -/* chroma specific managment */ -extern void wsp_h8_restart(char *cmd); -extern void wsp_h8_power_off(void); -extern void __init wsp_setup_h8(void); - -#endif /* __WSP_H */ diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c deleted file mode 100644 index 9a15e5b39bb8..000000000000 --- a/arch/powerpc/platforms/wsp/wsp_pci.c +++ /dev/null @@ -1,1134 +0,0 @@ -/* - * Copyright 2010 Ben Herrenschmidt, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define DEBUG - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/irq.h> -#include <linux/interrupt.h> -#include <linux/debugfs.h> - -#include <asm/sections.h> -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/pci-bridge.h> -#include <asm/machdep.h> -#include <asm/ppc-pci.h> -#include <asm/iommu.h> -#include <asm/io-workarounds.h> -#include <asm/debug.h> - -#include "wsp.h" -#include "wsp_pci.h" -#include "msi.h" - - -/* Max number of TVTs for one table. Only 32-bit tables can use - * multiple TVTs and so the max currently supported is thus 8 - * since only 2G of DMA space is supported - */ -#define MAX_TABLE_TVT_COUNT 8 - -struct wsp_dma_table { - struct list_head link; - struct iommu_table table; - struct wsp_phb *phb; - struct page *tces[MAX_TABLE_TVT_COUNT]; -}; - -/* We support DMA regions from 0...2G in 32bit space (no support for - * 64-bit DMA just yet). Each device gets a separate TCE table (TVT - * entry) with validation enabled (though not supported by SimiCS - * just yet). - * - * To simplify things, we divide this 2G space into N regions based - * on the constant below which could be turned into a tunable eventually - * - * We then assign dynamically those regions to devices as they show up. - * - * We use a bitmap as an allocator for these. - * - * Tables are allocated/created dynamically as devices are discovered, - * multiple TVT entries are used if needed - * - * When 64-bit DMA support is added we should simply use a separate set - * of larger regions (the HW supports 64 TVT entries). We can - * additionally create a bypass region in 64-bit space for performances - * though that would have a cost in term of security. - * - * If you set NUM_DMA32_REGIONS to 1, then a single table is shared - * for all devices and bus/dev/fn validation is disabled - * - * Note that a DMA32 region cannot be smaller than 256M so the max - * supported here for now is 8. We don't yet support sharing regions - * between multiple devices so the max number of devices supported - * is MAX_TABLE_TVT_COUNT. - */ -#define NUM_DMA32_REGIONS 1 - -struct wsp_phb { - struct pci_controller *hose; - - /* Lock controlling access to the list of dma tables. - * It does -not- protect against dma_* operations on - * those tables, those should be stopped before an entry - * is removed from the list. - * - * The lock is also used for error handling operations - */ - spinlock_t lock; - struct list_head dma_tables; - unsigned long dma32_map; - unsigned long dma32_base; - unsigned int dma32_num_regions; - unsigned long dma32_region_size; - - /* Debugfs stuff */ - struct dentry *ddir; - - struct list_head all; -}; -static LIST_HEAD(wsp_phbs); - -//#define cfg_debug(fmt...) pr_debug(fmt) -#define cfg_debug(fmt...) - - -static int wsp_pcie_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - int suboff; - u64 addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = PCIE_REG_CA_ENABLE | - ((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT | - ((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT | - ((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT; - suboff = offset & 3; - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - - switch (len) { - case 1: - addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - *val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA) - >> (suboff << 3)) & 0xff; - cfg_debug("read 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, *val); - break; - case 2: - addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - *val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA) - >> (suboff << 3)) & 0xffff; - cfg_debug("read 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, *val); - break; - default: - addr |= 0xful << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - *val = in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA); - cfg_debug("read 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, *val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int wsp_pcie_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - int suboff; - u64 addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = PCIE_REG_CA_ENABLE | - ((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT | - ((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT | - ((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT; - suboff = offset & 3; - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT; - val <<= suboff << 3; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); - cfg_debug("write 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, val); - break; - case 2: - addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT; - val <<= suboff << 3; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); - cfg_debug("write 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, val); - break; - default: - addr |= 0xful << PCIE_REG_CA_BE_SHIFT; - out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); - out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); - cfg_debug("write 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n", - bus->number, devfn >> 3, devfn & 7, - offset, suboff, addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops wsp_pcie_pci_ops = -{ - .read = wsp_pcie_read_config, - .write = wsp_pcie_write_config, -}; - -#define TCE_SHIFT 12 -#define TCE_PAGE_SIZE (1 << TCE_SHIFT) -#define TCE_PCI_WRITE 0x2 /* write from PCI allowed */ -#define TCE_PCI_READ 0x1 /* read from PCI allowed */ -#define TCE_RPN_MASK 0x3fffffffffful /* 42-bit RPN (4K pages) */ -#define TCE_RPN_SHIFT 12 - -//#define dma_debug(fmt...) pr_debug(fmt) -#define dma_debug(fmt...) - -static int tce_build_wsp(struct iommu_table *tbl, long index, long npages, - unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) -{ - struct wsp_dma_table *ptbl = container_of(tbl, - struct wsp_dma_table, - table); - u64 proto_tce; - u64 *tcep; - u64 rpn; - - proto_tce = TCE_PCI_READ; -#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - proto_tce |= TCE_PCI_WRITE; -#else - if (direction != DMA_TO_DEVICE) - proto_tce |= TCE_PCI_WRITE; -#endif - - /* XXX Make this faster by factoring out the page address for - * within a TCE table - */ - while (npages--) { - /* We don't use it->base as the table can be scattered */ - tcep = (u64 *)page_address(ptbl->tces[index >> 16]); - tcep += (index & 0xffff); - - /* can't move this out since we might cross LMB boundary */ - rpn = __pa(uaddr) >> TCE_SHIFT; - *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; - - dma_debug("[DMA] TCE %p set to 0x%016llx (dma addr: 0x%lx)\n", - tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT_4K); - - uaddr += TCE_PAGE_SIZE; - index++; - } - return 0; -} - -static void tce_free_wsp(struct iommu_table *tbl, long index, long npages) -{ - struct wsp_dma_table *ptbl = container_of(tbl, - struct wsp_dma_table, - table); -#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - struct pci_controller *hose = ptbl->phb->hose; -#endif - u64 *tcep; - - /* XXX Make this faster by factoring out the page address for - * within a TCE table. Also use line-kill option to kill multiple - * TCEs at once - */ - while (npages--) { - /* We don't use it->base as the table can be scattered */ - tcep = (u64 *)page_address(ptbl->tces[index >> 16]); - tcep += (index & 0xffff); - dma_debug("[DMA] TCE %p cleared\n", tcep); - *tcep = 0; -#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - /* Don't write there since it would pollute other MMIO accesses */ - out_be64(hose->cfg_data + PCIE_REG_TCE_KILL, - PCIE_REG_TCEKILL_SINGLE | PCIE_REG_TCEKILL_PS_4K | - (__pa(tcep) & PCIE_REG_TCEKILL_ADDR_MASK)); -#endif - index++; - } -} - -static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb, - unsigned int region, - struct pci_dev *validate) -{ - struct pci_controller *hose = phb->hose; - unsigned long size = phb->dma32_region_size; - unsigned long addr = phb->dma32_region_size * region + phb->dma32_base; - struct wsp_dma_table *tbl; - int tvts_per_table, i, tvt, nid; - unsigned long flags; - - nid = of_node_to_nid(phb->hose->dn); - - /* Calculate how many TVTs are needed */ - tvts_per_table = size / 0x10000000; - if (tvts_per_table == 0) - tvts_per_table = 1; - - /* Calculate the base TVT index. We know all tables have the same - * size so we just do a simple multiply here - */ - tvt = region * tvts_per_table; - - pr_debug(" Region : %d\n", region); - pr_debug(" DMA range : 0x%08lx..0x%08lx\n", addr, addr + size - 1); - pr_debug(" Number of TVTs : %d\n", tvts_per_table); - pr_debug(" Base TVT : %d\n", tvt); - pr_debug(" Node : %d\n", nid); - - tbl = kzalloc_node(sizeof(struct wsp_dma_table), GFP_KERNEL, nid); - if (!tbl) - return ERR_PTR(-ENOMEM); - tbl->phb = phb; - - /* Create as many TVTs as needed, each represents 256M at most */ - for (i = 0; i < tvts_per_table; i++) { - u64 tvt_data1, tvt_data0; - - /* Allocate table. We use a 4K TCE size for now always so - * one table is always 8 * (258M / 4K) == 512K - */ - tbl->tces[i] = alloc_pages_node(nid, GFP_KERNEL, get_order(0x80000)); - if (tbl->tces[i] == NULL) - goto fail; - memset(page_address(tbl->tces[i]), 0, 0x80000); - - pr_debug(" TCE table %d at : %p\n", i, page_address(tbl->tces[i])); - - /* Table size. We currently set it to be the whole 256M region */ - tvt_data0 = 2ull << IODA_TVT0_TCE_TABLE_SIZE_SHIFT; - /* IO page size set to 4K */ - tvt_data1 = 1ull << IODA_TVT1_IO_PAGE_SIZE_SHIFT; - /* Shift in the address */ - tvt_data0 |= __pa(page_address(tbl->tces[i])) << IODA_TVT0_TTA_SHIFT; - - /* Validation stuff. We only validate fully bus/dev/fn for now - * one day maybe we can group devices but that isn't the case - * at the moment - */ - if (validate) { - tvt_data0 |= IODA_TVT0_BUSNUM_VALID_MASK; - tvt_data0 |= validate->bus->number; - tvt_data1 |= IODA_TVT1_DEVNUM_VALID; - tvt_data1 |= ((u64)PCI_SLOT(validate->devfn)) - << IODA_TVT1_DEVNUM_VALUE_SHIFT; - tvt_data1 |= IODA_TVT1_FUNCNUM_VALID; - tvt_data1 |= ((u64)PCI_FUNC(validate->devfn)) - << IODA_TVT1_FUNCNUM_VALUE_SHIFT; - } - - /* XX PE number is always 0 for now */ - - /* Program the values using the PHB lock */ - spin_lock_irqsave(&phb->lock, flags); - out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR, - (tvt + i) | PCIE_REG_IODA_AD_TBL_TVT); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, tvt_data1); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, tvt_data0); - spin_unlock_irqrestore(&phb->lock, flags); - } - - /* Init bits and pieces */ - tbl->table.it_blocksize = 16; - tbl->table.it_page_shift = IOMMU_PAGE_SHIFT_4K; - tbl->table.it_offset = addr >> tbl->table.it_page_shift; - tbl->table.it_size = size >> tbl->table.it_page_shift; - - /* - * It's already blank but we clear it anyway. - * Consider an aditiona interface that makes cleaing optional - */ - iommu_init_table(&tbl->table, nid); - - list_add(&tbl->link, &phb->dma_tables); - return tbl; - - fail: - pr_debug(" Failed to allocate a 256M TCE table !\n"); - for (i = 0; i < tvts_per_table; i++) - if (tbl->tces[i]) - __free_pages(tbl->tces[i], get_order(0x80000)); - kfree(tbl); - return ERR_PTR(-ENOMEM); -} - -static void wsp_pci_dma_dev_setup(struct pci_dev *pdev) -{ - struct dev_archdata *archdata = &pdev->dev.archdata; - struct pci_controller *hose = pci_bus_to_host(pdev->bus); - struct wsp_phb *phb = hose->private_data; - struct wsp_dma_table *table = NULL; - unsigned long flags; - int i; - - /* Don't assign an iommu table to a bridge */ - if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - return; - - pr_debug("%s: Setting up DMA...\n", pci_name(pdev)); - - spin_lock_irqsave(&phb->lock, flags); - - /* If only one region, check if it already exist */ - if (phb->dma32_num_regions == 1) { - spin_unlock_irqrestore(&phb->lock, flags); - if (list_empty(&phb->dma_tables)) - table = wsp_pci_create_dma32_table(phb, 0, NULL); - else - table = list_first_entry(&phb->dma_tables, - struct wsp_dma_table, - link); - } else { - /* else find a free region */ - for (i = 0; i < phb->dma32_num_regions && !table; i++) { - if (__test_and_set_bit(i, &phb->dma32_map)) - continue; - spin_unlock_irqrestore(&phb->lock, flags); - table = wsp_pci_create_dma32_table(phb, i, pdev); - } - } - - /* Check if we got an error */ - if (IS_ERR(table)) { - pr_err("%s: Failed to create DMA table, err %ld !\n", - pci_name(pdev), PTR_ERR(table)); - return; - } - - /* Or a valid table */ - if (table) { - pr_info("%s: Setup iommu: 32-bit DMA region 0x%08lx..0x%08lx\n", - pci_name(pdev), - table->table.it_offset << IOMMU_PAGE_SHIFT_4K, - (table->table.it_offset << IOMMU_PAGE_SHIFT_4K) - + phb->dma32_region_size - 1); - archdata->dma_data.iommu_table_base = &table->table; - return; - } - - /* Or no room */ - spin_unlock_irqrestore(&phb->lock, flags); - pr_err("%s: Out of DMA space !\n", pci_name(pdev)); -} - -static void __init wsp_pcie_configure_hw(struct pci_controller *hose) -{ - u64 val; - int i; - -#define DUMP_REG(x) \ - pr_debug("%-30s : 0x%016llx\n", #x, in_be64(hose->cfg_data + x)) - - /* - * Some WSP variants has a bogus class code by default in the PCI-E - * root complex's built-in P2P bridge - */ - val = in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1); - pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", val); - out_be64(hose->cfg_data + PCIE_REG_SYS_CFG1, - (val & ~PCIE_REG_SYS_CFG1_CLASS_CODE) | (PCI_CLASS_BRIDGE_PCI << 8)); - pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1)); - -#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS - /* XXX Disable TCE caching, it doesn't work on DD1 */ - out_be64(hose->cfg_data + 0xe50, - in_be64(hose->cfg_data + 0xe50) | (3ull << 62)); - printk("PCI-E DEBUG CONTROL 5 = 0x%llx\n", in_be64(hose->cfg_data + 0xe50)); -#endif - - /* Configure M32A and IO. IO is hard wired to be 1M for now */ - out_be64(hose->cfg_data + PCIE_REG_IO_BASE_ADDR, hose->io_base_phys); - out_be64(hose->cfg_data + PCIE_REG_IO_BASE_MASK, - (~(hose->io_resource.end - hose->io_resource.start)) & - 0x3fffffff000ul); - out_be64(hose->cfg_data + PCIE_REG_IO_START_ADDR, 0 | 1); - - out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_ADDR, - hose->mem_resources[0].start); - printk("Want to write to M32A_BASE_MASK : 0x%llx\n", - (~(hose->mem_resources[0].end - - hose->mem_resources[0].start)) & 0x3ffffff0000ul); - out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_MASK, - (~(hose->mem_resources[0].end - - hose->mem_resources[0].start)) & 0x3ffffff0000ul); - out_be64(hose->cfg_data + PCIE_REG_M32A_START_ADDR, - (hose->mem_resources[0].start - hose->mem_offset[0]) | 1); - - /* Clear all TVT entries - * - * XX Might get TVT count from device-tree - */ - for (i = 0; i < IODA_TVT_COUNT; i++) { - out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR, - PCIE_REG_IODA_AD_TBL_TVT | i); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, 0); - out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, 0); - } - - /* Kill the TCE cache */ - out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, - in_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG) | - PCIE_REG_PHBC_64B_TCE_EN); - - /* Enable 32 & 64-bit MSIs, IO space and M32A */ - val = PCIE_REG_PHBC_32BIT_MSI_EN | - PCIE_REG_PHBC_IO_EN | - PCIE_REG_PHBC_64BIT_MSI_EN | - PCIE_REG_PHBC_M32A_EN; - if (iommu_is_off) - val |= PCIE_REG_PHBC_DMA_XLATE_BYPASS; - pr_debug("Will write config: 0x%llx\n", val); - out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, val); - - /* Enable error reporting */ - out_be64(hose->cfg_data + 0xe00, - in_be64(hose->cfg_data + 0xe00) | 0x0008000000000000ull); - - /* Mask an error that's generated when doing config space probe - * - * XXX Maybe we should only mask it around config space cycles... that or - * ignore it when we know we had a config space cycle recently ? - */ - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS_MASK, 0x8000000000000000ull); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS_MASK, 0x8000000000000000ull); - - /* Enable UTL errors, for now, all of them got to UTL irq 1 - * - * We similarily mask one UTL error caused apparently during normal - * probing. We also mask the link up error - */ - out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_ERR_SEV, 0); - out_be64(hose->cfg_data + PCIE_UTL_RC_ERR_SEVERITY, 0); - out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_ERROR_SEV, 0); - out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_IRQ_EN, 0xffffffff00000000ull); - out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_IRQ_EN, 0xff5fffff00000000ull); - out_be64(hose->cfg_data + PCIE_UTL_EP_ERR_IRQ_EN, 0xffffffff00000000ull); - - DUMP_REG(PCIE_REG_IO_BASE_ADDR); - DUMP_REG(PCIE_REG_IO_BASE_MASK); - DUMP_REG(PCIE_REG_IO_START_ADDR); - DUMP_REG(PCIE_REG_M32A_BASE_ADDR); - DUMP_REG(PCIE_REG_M32A_BASE_MASK); - DUMP_REG(PCIE_REG_M32A_START_ADDR); - DUMP_REG(PCIE_REG_M32B_BASE_ADDR); - DUMP_REG(PCIE_REG_M32B_BASE_MASK); - DUMP_REG(PCIE_REG_M32B_START_ADDR); - DUMP_REG(PCIE_REG_M64_BASE_ADDR); - DUMP_REG(PCIE_REG_M64_BASE_MASK); - DUMP_REG(PCIE_REG_M64_START_ADDR); - DUMP_REG(PCIE_REG_PHB_CONFIG); -} - -static void wsp_pci_wait_io_idle(struct wsp_phb *phb, unsigned long port) -{ - u64 val; - int i; - - for (i = 0; i < 10000; i++) { - val = in_be64(phb->hose->cfg_data + 0xe08); - if ((val & 0x1900000000000000ull) == 0x0100000000000000ull) - return; - udelay(1); - } - pr_warning("PCI IO timeout on domain %d port 0x%lx\n", - phb->hose->global_number, port); -} - -#define DEF_PCI_AC_RET_pio(name, ret, at, al, aa) \ -static ret wsp_pci_##name at \ -{ \ - struct iowa_bus *bus; \ - struct wsp_phb *phb; \ - unsigned long flags; \ - ret rval; \ - bus = iowa_pio_find_bus(aa); \ - WARN_ON(!bus); \ - phb = bus->private; \ - spin_lock_irqsave(&phb->lock, flags); \ - wsp_pci_wait_io_idle(phb, aa); \ - rval = __do_##name al; \ - spin_unlock_irqrestore(&phb->lock, flags); \ - return rval; \ -} - -#define DEF_PCI_AC_NORET_pio(name, at, al, aa) \ -static void wsp_pci_##name at \ -{ \ - struct iowa_bus *bus; \ - struct wsp_phb *phb; \ - unsigned long flags; \ - bus = iowa_pio_find_bus(aa); \ - WARN_ON(!bus); \ - phb = bus->private; \ - spin_lock_irqsave(&phb->lock, flags); \ - wsp_pci_wait_io_idle(phb, aa); \ - __do_##name al; \ - spin_unlock_irqrestore(&phb->lock, flags); \ -} - -#define DEF_PCI_AC_RET_mem(name, ret, at, al, aa) -#define DEF_PCI_AC_NORET_mem(name, at, al, aa) - -#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ - DEF_PCI_AC_RET_##space(name, ret, at, al, aa) - -#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ - DEF_PCI_AC_NORET_##space(name, at, al, aa) \ - - -#include <asm/io-defs.h> - -#undef DEF_PCI_AC_RET -#undef DEF_PCI_AC_NORET - -static struct ppc_pci_io wsp_pci_iops = { - .inb = wsp_pci_inb, - .inw = wsp_pci_inw, - .inl = wsp_pci_inl, - .outb = wsp_pci_outb, - .outw = wsp_pci_outw, - .outl = wsp_pci_outl, - .insb = wsp_pci_insb, - .insw = wsp_pci_insw, - .insl = wsp_pci_insl, - .outsb = wsp_pci_outsb, - .outsw = wsp_pci_outsw, - .outsl = wsp_pci_outsl, -}; - -static int __init wsp_setup_one_phb(struct device_node *np) -{ - struct pci_controller *hose; - struct wsp_phb *phb; - - pr_info("PCI: Setting up PCIe host bridge 0x%s\n", np->full_name); - - phb = zalloc_maybe_bootmem(sizeof(struct wsp_phb), GFP_KERNEL); - if (!phb) - return -ENOMEM; - hose = pcibios_alloc_controller(np); - if (!hose) { - /* Can't really free the phb */ - return -ENOMEM; - } - hose->private_data = phb; - phb->hose = hose; - - INIT_LIST_HEAD(&phb->dma_tables); - spin_lock_init(&phb->lock); - - /* XXX Use bus-range property ? */ - hose->first_busno = 0; - hose->last_busno = 0xff; - - /* We use cfg_data as the address for the whole bridge MMIO space - */ - hose->cfg_data = of_iomap(hose->dn, 0); - - pr_debug("PCIe registers mapped at 0x%p\n", hose->cfg_data); - - /* Get the ranges of the device-tree */ - pci_process_bridge_OF_ranges(hose, np, 0); - - /* XXX Force re-assigning of everything for now */ - pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC | - PCI_ENABLE_PROC_DOMAINS); - - /* Calculate how the TCE space is divided */ - phb->dma32_base = 0; - phb->dma32_num_regions = NUM_DMA32_REGIONS; - if (phb->dma32_num_regions > MAX_TABLE_TVT_COUNT) { - pr_warning("IOMMU: Clamped to %d DMA32 regions\n", - MAX_TABLE_TVT_COUNT); - phb->dma32_num_regions = MAX_TABLE_TVT_COUNT; - } - phb->dma32_region_size = 0x80000000 / phb->dma32_num_regions; - - BUG_ON(!is_power_of_2(phb->dma32_region_size)); - - /* Setup config ops */ - hose->ops = &wsp_pcie_pci_ops; - - /* Configure the HW */ - wsp_pcie_configure_hw(hose); - - /* Instanciate IO workarounds */ - iowa_register_bus(hose, &wsp_pci_iops, NULL, phb); -#ifdef CONFIG_PCI_MSI - wsp_setup_phb_msi(hose); -#endif - - /* Add to global list */ - list_add(&phb->all, &wsp_phbs); - - return 0; -} - -void __init wsp_setup_pci(void) -{ - struct device_node *np; - int rc; - - /* Find host bridges */ - for_each_compatible_node(np, "pciex", PCIE_COMPATIBLE) { - rc = wsp_setup_one_phb(np); - if (rc) - pr_err("Failed to setup PCIe bridge %s, rc=%d\n", - np->full_name, rc); - } - - /* Establish device-tree linkage */ - pci_devs_phb_init(); - - /* Set DMA ops to use TCEs */ - if (iommu_is_off) { - pr_info("PCI-E: Disabled TCEs, using direct DMA\n"); - set_pci_dma_ops(&dma_direct_ops); - } else { - ppc_md.pci_dma_dev_setup = wsp_pci_dma_dev_setup; - ppc_md.tce_build = tce_build_wsp; - ppc_md.tce_free = tce_free_wsp; - set_pci_dma_ops(&dma_iommu_ops); - } -} - -#define err_debug(fmt...) pr_debug(fmt) -//#define err_debug(fmt...) - -static int __init wsp_pci_get_err_irq_no_dt(struct device_node *np) -{ - const u32 *prop; - int hw_irq; - - /* Ok, no interrupts property, let's try to find our child P2P */ - np = of_get_next_child(np, NULL); - if (np == NULL) - return 0; - - /* Grab it's interrupt map */ - prop = of_get_property(np, "interrupt-map", NULL); - if (prop == NULL) - return 0; - - /* Grab one of the interrupts in there, keep the low 4 bits */ - hw_irq = prop[5] & 0xf; - - /* 0..4 for PHB 0 and 5..9 for PHB 1 */ - if (hw_irq < 5) - hw_irq = 4; - else - hw_irq = 9; - hw_irq |= prop[5] & ~0xf; - - err_debug("PCI: Using 0x%x as error IRQ for %s\n", - hw_irq, np->parent->full_name); - return irq_create_mapping(NULL, hw_irq); -} - -static const struct { - u32 offset; - const char *name; -} wsp_pci_regs[] = { -#define DREG(x) { PCIE_REG_##x, #x } -#define DUTL(x) { PCIE_UTL_##x, "UTL_" #x } - /* Architected registers except CONFIG_ and IODA - * to avoid side effects - */ - DREG(DMA_CHAN_STATUS), - DREG(CPU_LOADSTORE_STATUS), - DREG(LOCK0), - DREG(LOCK1), - DREG(PHB_CONFIG), - DREG(IO_BASE_ADDR), - DREG(IO_BASE_MASK), - DREG(IO_START_ADDR), - DREG(M32A_BASE_ADDR), - DREG(M32A_BASE_MASK), - DREG(M32A_START_ADDR), - DREG(M32B_BASE_ADDR), - DREG(M32B_BASE_MASK), - DREG(M32B_START_ADDR), - DREG(M64_BASE_ADDR), - DREG(M64_BASE_MASK), - DREG(M64_START_ADDR), - DREG(TCE_KILL), - DREG(LOCK2), - DREG(PHB_GEN_CAP), - DREG(PHB_TCE_CAP), - DREG(PHB_IRQ_CAP), - DREG(PHB_EEH_CAP), - DREG(PAPR_ERR_INJ_CONTROL), - DREG(PAPR_ERR_INJ_ADDR), - DREG(PAPR_ERR_INJ_MASK), - - /* UTL core regs */ - DUTL(SYS_BUS_CONTROL), - DUTL(STATUS), - DUTL(SYS_BUS_AGENT_STATUS), - DUTL(SYS_BUS_AGENT_ERR_SEV), - DUTL(SYS_BUS_AGENT_IRQ_EN), - DUTL(SYS_BUS_BURST_SZ_CONF), - DUTL(REVISION_ID), - DUTL(OUT_POST_HDR_BUF_ALLOC), - DUTL(OUT_POST_DAT_BUF_ALLOC), - DUTL(IN_POST_HDR_BUF_ALLOC), - DUTL(IN_POST_DAT_BUF_ALLOC), - DUTL(OUT_NP_BUF_ALLOC), - DUTL(IN_NP_BUF_ALLOC), - DUTL(PCIE_TAGS_ALLOC), - DUTL(GBIF_READ_TAGS_ALLOC), - - DUTL(PCIE_PORT_CONTROL), - DUTL(PCIE_PORT_STATUS), - DUTL(PCIE_PORT_ERROR_SEV), - DUTL(PCIE_PORT_IRQ_EN), - DUTL(RC_STATUS), - DUTL(RC_ERR_SEVERITY), - DUTL(RC_IRQ_EN), - DUTL(EP_STATUS), - DUTL(EP_ERR_SEVERITY), - DUTL(EP_ERR_IRQ_EN), - DUTL(PCI_PM_CTRL1), - DUTL(PCI_PM_CTRL2), - - /* PCIe stack regs */ - DREG(SYSTEM_CONFIG1), - DREG(SYSTEM_CONFIG2), - DREG(EP_SYSTEM_CONFIG), - DREG(EP_FLR), - DREG(EP_BAR_CONFIG), - DREG(LINK_CONFIG), - DREG(PM_CONFIG), - DREG(DLP_CONTROL), - DREG(DLP_STATUS), - DREG(ERR_REPORT_CONTROL), - DREG(SLOT_CONTROL1), - DREG(SLOT_CONTROL2), - DREG(UTL_CONFIG), - DREG(BUFFERS_CONFIG), - DREG(ERROR_INJECT), - DREG(SRIOV_CONFIG), - DREG(PF0_SRIOV_STATUS), - DREG(PF1_SRIOV_STATUS), - DREG(PORT_NUMBER), - DREG(POR_SYSTEM_CONFIG), - - /* Internal logic regs */ - DREG(PHB_VERSION), - DREG(RESET), - DREG(PHB_CONTROL), - DREG(PHB_TIMEOUT_CONTROL1), - DREG(PHB_QUIESCE_DMA), - DREG(PHB_DMA_READ_TAG_ACTV), - DREG(PHB_TCE_READ_TAG_ACTV), - - /* FIR registers */ - DREG(LEM_FIR_ACCUM), - DREG(LEM_FIR_AND_MASK), - DREG(LEM_FIR_OR_MASK), - DREG(LEM_ACTION0), - DREG(LEM_ACTION1), - DREG(LEM_ERROR_MASK), - DREG(LEM_ERROR_AND_MASK), - DREG(LEM_ERROR_OR_MASK), - - /* Error traps registers */ - DREG(PHB_ERR_STATUS), - DREG(PHB_ERR_STATUS), - DREG(PHB_ERR1_STATUS), - DREG(PHB_ERR_INJECT), - DREG(PHB_ERR_LEM_ENABLE), - DREG(PHB_ERR_IRQ_ENABLE), - DREG(PHB_ERR_FREEZE_ENABLE), - DREG(PHB_ERR_SIDE_ENABLE), - DREG(PHB_ERR_LOG_0), - DREG(PHB_ERR_LOG_1), - DREG(PHB_ERR_STATUS_MASK), - DREG(PHB_ERR1_STATUS_MASK), - DREG(MMIO_ERR_STATUS), - DREG(MMIO_ERR1_STATUS), - DREG(MMIO_ERR_INJECT), - DREG(MMIO_ERR_LEM_ENABLE), - DREG(MMIO_ERR_IRQ_ENABLE), - DREG(MMIO_ERR_FREEZE_ENABLE), - DREG(MMIO_ERR_SIDE_ENABLE), - DREG(MMIO_ERR_LOG_0), - DREG(MMIO_ERR_LOG_1), - DREG(MMIO_ERR_STATUS_MASK), - DREG(MMIO_ERR1_STATUS_MASK), - DREG(DMA_ERR_STATUS), - DREG(DMA_ERR1_STATUS), - DREG(DMA_ERR_INJECT), - DREG(DMA_ERR_LEM_ENABLE), - DREG(DMA_ERR_IRQ_ENABLE), - DREG(DMA_ERR_FREEZE_ENABLE), - DREG(DMA_ERR_SIDE_ENABLE), - DREG(DMA_ERR_LOG_0), - DREG(DMA_ERR_LOG_1), - DREG(DMA_ERR_STATUS_MASK), - DREG(DMA_ERR1_STATUS_MASK), - - /* Debug and Trace registers */ - DREG(PHB_DEBUG_CONTROL0), - DREG(PHB_DEBUG_STATUS0), - DREG(PHB_DEBUG_CONTROL1), - DREG(PHB_DEBUG_STATUS1), - DREG(PHB_DEBUG_CONTROL2), - DREG(PHB_DEBUG_STATUS2), - DREG(PHB_DEBUG_CONTROL3), - DREG(PHB_DEBUG_STATUS3), - DREG(PHB_DEBUG_CONTROL4), - DREG(PHB_DEBUG_STATUS4), - DREG(PHB_DEBUG_CONTROL5), - DREG(PHB_DEBUG_STATUS5), - - /* Don't seem to exist ... - DREG(PHB_DEBUG_CONTROL6), - DREG(PHB_DEBUG_STATUS6), - */ -}; - -static int wsp_pci_regs_show(struct seq_file *m, void *private) -{ - struct wsp_phb *phb = m->private; - struct pci_controller *hose = phb->hose; - int i; - - for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) { - /* Skip write-only regs */ - if (wsp_pci_regs[i].offset == 0xc08 || - wsp_pci_regs[i].offset == 0xc10 || - wsp_pci_regs[i].offset == 0xc38 || - wsp_pci_regs[i].offset == 0xc40) - continue; - seq_printf(m, "0x%03x: 0x%016llx %s\n", - wsp_pci_regs[i].offset, - in_be64(hose->cfg_data + wsp_pci_regs[i].offset), - wsp_pci_regs[i].name); - } - return 0; -} - -static int wsp_pci_regs_open(struct inode *inode, struct file *file) -{ - return single_open(file, wsp_pci_regs_show, inode->i_private); -} - -static const struct file_operations wsp_pci_regs_fops = { - .open = wsp_pci_regs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int wsp_pci_reg_set(void *data, u64 val) -{ - out_be64((void __iomem *)data, val); - return 0; -} - -static int wsp_pci_reg_get(void *data, u64 *val) -{ - *val = in_be64((void __iomem *)data); - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(wsp_pci_reg_fops, wsp_pci_reg_get, wsp_pci_reg_set, "0x%llx\n"); - -static irqreturn_t wsp_pci_err_irq(int irq, void *dev_id) -{ - struct wsp_phb *phb = dev_id; - struct pci_controller *hose = phb->hose; - irqreturn_t handled = IRQ_NONE; - struct wsp_pcie_err_log_data ed; - - pr_err("PCI: Error interrupt on %s (PHB %d)\n", - hose->dn->full_name, hose->global_number); - again: - memset(&ed, 0, sizeof(ed)); - - /* Read and clear UTL errors */ - ed.utl_sys_err = in_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS); - if (ed.utl_sys_err) - out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS, ed.utl_sys_err); - ed.utl_port_err = in_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS); - if (ed.utl_port_err) - out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS, ed.utl_port_err); - ed.utl_rc_err = in_be64(hose->cfg_data + PCIE_UTL_RC_STATUS); - if (ed.utl_rc_err) - out_be64(hose->cfg_data + PCIE_UTL_RC_STATUS, ed.utl_rc_err); - - /* Read and clear main trap errors */ - ed.phb_err = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS); - if (ed.phb_err) { - ed.phb_err1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS); - ed.phb_log0 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_0); - ed.phb_log1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_1); - out_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS, 0); - out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS, 0); - } - ed.mmio_err = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS); - if (ed.mmio_err) { - ed.mmio_err1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS); - ed.mmio_log0 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_0); - ed.mmio_log1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_1); - out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS, 0); - out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS, 0); - } - ed.dma_err = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS); - if (ed.dma_err) { - ed.dma_err1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS); - ed.dma_log0 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_0); - ed.dma_log1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_1); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS, 0); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS, 0); - } - - /* Now print things out */ - if (ed.phb_err) { - pr_err(" PHB Error Status : 0x%016llx\n", ed.phb_err); - pr_err(" PHB First Error Status: 0x%016llx\n", ed.phb_err1); - pr_err(" PHB Error Log 0 : 0x%016llx\n", ed.phb_log0); - pr_err(" PHB Error Log 1 : 0x%016llx\n", ed.phb_log1); - } - if (ed.mmio_err) { - pr_err(" MMIO Error Status : 0x%016llx\n", ed.mmio_err); - pr_err(" MMIO First Error Status: 0x%016llx\n", ed.mmio_err1); - pr_err(" MMIO Error Log 0 : 0x%016llx\n", ed.mmio_log0); - pr_err(" MMIO Error Log 1 : 0x%016llx\n", ed.mmio_log1); - } - if (ed.dma_err) { - pr_err(" DMA Error Status : 0x%016llx\n", ed.dma_err); - pr_err(" DMA First Error Status: 0x%016llx\n", ed.dma_err1); - pr_err(" DMA Error Log 0 : 0x%016llx\n", ed.dma_log0); - pr_err(" DMA Error Log 1 : 0x%016llx\n", ed.dma_log1); - } - if (ed.utl_sys_err) - pr_err(" UTL Sys Error Status : 0x%016llx\n", ed.utl_sys_err); - if (ed.utl_port_err) - pr_err(" UTL Port Error Status : 0x%016llx\n", ed.utl_port_err); - if (ed.utl_rc_err) - pr_err(" UTL RC Error Status : 0x%016llx\n", ed.utl_rc_err); - - /* Interrupts are caused by the error traps. If we had any error there - * we loop again in case the UTL buffered some new stuff between - * going there and going to the traps - */ - if (ed.dma_err || ed.mmio_err || ed.phb_err) { - handled = IRQ_HANDLED; - goto again; - } - return handled; -} - -static void __init wsp_setup_pci_err_reporting(struct wsp_phb *phb) -{ - struct pci_controller *hose = phb->hose; - int err_irq, i, rc; - char fname[16]; - - /* Create a debugfs file for that PHB */ - sprintf(fname, "phb%d", phb->hose->global_number); - phb->ddir = debugfs_create_dir(fname, powerpc_debugfs_root); - - /* Some useful debug output */ - if (phb->ddir) { - struct dentry *d = debugfs_create_dir("regs", phb->ddir); - char tmp[64]; - - for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) { - sprintf(tmp, "%03x_%s", wsp_pci_regs[i].offset, - wsp_pci_regs[i].name); - debugfs_create_file(tmp, 0600, d, - hose->cfg_data + wsp_pci_regs[i].offset, - &wsp_pci_reg_fops); - } - debugfs_create_file("all_regs", 0600, phb->ddir, phb, &wsp_pci_regs_fops); - } - - /* Find the IRQ number for that PHB */ - err_irq = irq_of_parse_and_map(hose->dn, 0); - if (err_irq == 0) - /* XXX Error IRQ lacking from device-tree */ - err_irq = wsp_pci_get_err_irq_no_dt(hose->dn); - if (err_irq == 0) { - pr_err("PCI: Failed to fetch error interrupt for %s\n", - hose->dn->full_name); - return; - } - /* Request it */ - rc = request_irq(err_irq, wsp_pci_err_irq, 0, "wsp_pci error", phb); - if (rc) { - pr_err("PCI: Failed to request interrupt for %s\n", - hose->dn->full_name); - } - /* Enable interrupts for all errors for now */ - out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_IRQ_ENABLE, 0xffffffffffffffffull); - out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_IRQ_ENABLE, 0xffffffffffffffffull); - out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_IRQ_ENABLE, 0xffffffffffffffffull); -} - -/* - * This is called later to hookup with the error interrupt - */ -static int __init wsp_setup_pci_late(void) -{ - struct wsp_phb *phb; - - list_for_each_entry(phb, &wsp_phbs, all) - wsp_setup_pci_err_reporting(phb); - - return 0; -} -arch_initcall(wsp_setup_pci_late); diff --git a/arch/powerpc/platforms/wsp/wsp_pci.h b/arch/powerpc/platforms/wsp/wsp_pci.h deleted file mode 100644 index 52e9bd95250d..000000000000 --- a/arch/powerpc/platforms/wsp/wsp_pci.h +++ /dev/null @@ -1,268 +0,0 @@ -/* - * Copyright 2010 Ben Herrenschmidt, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __WSP_PCI_H -#define __WSP_PCI_H - -/* Architected registers */ -#define PCIE_REG_DMA_CHAN_STATUS 0x110 -#define PCIE_REG_CPU_LOADSTORE_STATUS 0x120 - -#define PCIE_REG_CONFIG_DATA 0x130 -#define PCIE_REG_LOCK0 0x138 -#define PCIE_REG_CONFIG_ADDRESS 0x140 -#define PCIE_REG_CA_ENABLE 0x8000000000000000ull -#define PCIE_REG_CA_BUS_MASK 0x0ff0000000000000ull -#define PCIE_REG_CA_BUS_SHIFT (20+32) -#define PCIE_REG_CA_DEV_MASK 0x000f800000000000ull -#define PCIE_REG_CA_DEV_SHIFT (15+32) -#define PCIE_REG_CA_FUNC_MASK 0x0000700000000000ull -#define PCIE_REG_CA_FUNC_SHIFT (12+32) -#define PCIE_REG_CA_REG_MASK 0x00000fff00000000ull -#define PCIE_REG_CA_REG_SHIFT ( 0+32) -#define PCIE_REG_CA_BE_MASK 0x00000000f0000000ull -#define PCIE_REG_CA_BE_SHIFT ( 28) -#define PCIE_REG_LOCK1 0x148 - -#define PCIE_REG_PHB_CONFIG 0x160 -#define PCIE_REG_PHBC_64B_TCE_EN 0x2000000000000000ull -#define PCIE_REG_PHBC_MMIO_DMA_FREEZE_EN 0x1000000000000000ull -#define PCIE_REG_PHBC_32BIT_MSI_EN 0x0080000000000000ull -#define PCIE_REG_PHBC_M64_EN 0x0040000000000000ull -#define PCIE_REG_PHBC_IO_EN 0x0008000000000000ull -#define PCIE_REG_PHBC_64BIT_MSI_EN 0x0002000000000000ull -#define PCIE_REG_PHBC_M32A_EN 0x0000800000000000ull -#define PCIE_REG_PHBC_M32B_EN 0x0000400000000000ull -#define PCIE_REG_PHBC_MSI_PE_VALIDATE 0x0000200000000000ull -#define PCIE_REG_PHBC_DMA_XLATE_BYPASS 0x0000100000000000ull - -#define PCIE_REG_IO_BASE_ADDR 0x170 -#define PCIE_REG_IO_BASE_MASK 0x178 -#define PCIE_REG_IO_START_ADDR 0x180 - -#define PCIE_REG_M32A_BASE_ADDR 0x190 -#define PCIE_REG_M32A_BASE_MASK 0x198 -#define PCIE_REG_M32A_START_ADDR 0x1a0 - -#define PCIE_REG_M32B_BASE_ADDR 0x1b0 -#define PCIE_REG_M32B_BASE_MASK 0x1b8 -#define PCIE_REG_M32B_START_ADDR 0x1c0 - -#define PCIE_REG_M64_BASE_ADDR 0x1e0 -#define PCIE_REG_M64_BASE_MASK 0x1e8 -#define PCIE_REG_M64_START_ADDR 0x1f0 - -#define PCIE_REG_TCE_KILL 0x210 -#define PCIE_REG_TCEKILL_SINGLE 0x8000000000000000ull -#define PCIE_REG_TCEKILL_ADDR_MASK 0x000003fffffffff8ull -#define PCIE_REG_TCEKILL_PS_4K 0 -#define PCIE_REG_TCEKILL_PS_64K 1 -#define PCIE_REG_TCEKILL_PS_16M 2 -#define PCIE_REG_TCEKILL_PS_16G 3 - -#define PCIE_REG_IODA_ADDR 0x220 -#define PCIE_REG_IODA_AD_AUTOINC 0x8000000000000000ull -#define PCIE_REG_IODA_AD_TBL_MVT 0x0005000000000000ull -#define PCIE_REG_IODA_AD_TBL_PELT 0x0006000000000000ull -#define PCIE_REG_IODA_AD_TBL_PESTA 0x0007000000000000ull -#define PCIE_REG_IODA_AD_TBL_PESTB 0x0008000000000000ull -#define PCIE_REG_IODA_AD_TBL_TVT 0x0009000000000000ull -#define PCIE_REG_IODA_AD_TBL_TCE 0x000a000000000000ull -#define PCIE_REG_IODA_DATA0 0x228 -#define PCIE_REG_IODA_DATA1 0x230 - -#define PCIE_REG_LOCK2 0x240 - -#define PCIE_REG_PHB_GEN_CAP 0x250 -#define PCIE_REG_PHB_TCE_CAP 0x258 -#define PCIE_REG_PHB_IRQ_CAP 0x260 -#define PCIE_REG_PHB_EEH_CAP 0x268 - -#define PCIE_REG_PAPR_ERR_INJ_CONTROL 0x2b0 -#define PCIE_REG_PAPR_ERR_INJ_ADDR 0x2b8 -#define PCIE_REG_PAPR_ERR_INJ_MASK 0x2c0 - - -#define PCIE_REG_SYS_CFG1 0x600 -#define PCIE_REG_SYS_CFG1_CLASS_CODE 0x0000000000ffffffull - -#define IODA_TVT0_TTA_MASK 0x000fffffffff0000ull -#define IODA_TVT0_TTA_SHIFT 4 -#define IODA_TVT0_BUSNUM_VALID_MASK 0x000000000000e000ull -#define IODA_TVT0_TCE_TABLE_SIZE_MASK 0x0000000000001f00ull -#define IODA_TVT0_TCE_TABLE_SIZE_SHIFT 8 -#define IODA_TVT0_BUSNUM_VALUE_MASK 0x00000000000000ffull -#define IODA_TVT0_BUSNUM_VALID_SHIFT 0 -#define IODA_TVT1_DEVNUM_VALID 0x2000000000000000ull -#define IODA_TVT1_DEVNUM_VALUE_MASK 0x1f00000000000000ull -#define IODA_TVT1_DEVNUM_VALUE_SHIFT 56 -#define IODA_TVT1_FUNCNUM_VALID 0x0008000000000000ull -#define IODA_TVT1_FUNCNUM_VALUE_MASK 0x0007000000000000ull -#define IODA_TVT1_FUNCNUM_VALUE_SHIFT 48 -#define IODA_TVT1_IO_PAGE_SIZE_MASK 0x00001f0000000000ull -#define IODA_TVT1_IO_PAGE_SIZE_SHIFT 40 -#define IODA_TVT1_PE_NUMBER_MASK 0x000000000000003full -#define IODA_TVT1_PE_NUMBER_SHIFT 0 - -#define IODA_TVT_COUNT 64 - -/* UTL Core registers */ -#define PCIE_UTL_SYS_BUS_CONTROL 0x400 -#define PCIE_UTL_STATUS 0x408 -#define PCIE_UTL_SYS_BUS_AGENT_STATUS 0x410 -#define PCIE_UTL_SYS_BUS_AGENT_ERR_SEV 0x418 -#define PCIE_UTL_SYS_BUS_AGENT_IRQ_EN 0x420 -#define PCIE_UTL_SYS_BUS_BURST_SZ_CONF 0x440 -#define PCIE_UTL_REVISION_ID 0x448 - -#define PCIE_UTL_OUT_POST_HDR_BUF_ALLOC 0x4c0 -#define PCIE_UTL_OUT_POST_DAT_BUF_ALLOC 0x4d0 -#define PCIE_UTL_IN_POST_HDR_BUF_ALLOC 0x4e0 -#define PCIE_UTL_IN_POST_DAT_BUF_ALLOC 0x4f0 -#define PCIE_UTL_OUT_NP_BUF_ALLOC 0x500 -#define PCIE_UTL_IN_NP_BUF_ALLOC 0x510 -#define PCIE_UTL_PCIE_TAGS_ALLOC 0x520 -#define PCIE_UTL_GBIF_READ_TAGS_ALLOC 0x530 - -#define PCIE_UTL_PCIE_PORT_CONTROL 0x540 -#define PCIE_UTL_PCIE_PORT_STATUS 0x548 -#define PCIE_UTL_PCIE_PORT_ERROR_SEV 0x550 -#define PCIE_UTL_PCIE_PORT_IRQ_EN 0x558 -#define PCIE_UTL_RC_STATUS 0x560 -#define PCIE_UTL_RC_ERR_SEVERITY 0x568 -#define PCIE_UTL_RC_IRQ_EN 0x570 -#define PCIE_UTL_EP_STATUS 0x578 -#define PCIE_UTL_EP_ERR_SEVERITY 0x580 -#define PCIE_UTL_EP_ERR_IRQ_EN 0x588 - -#define PCIE_UTL_PCI_PM_CTRL1 0x590 -#define PCIE_UTL_PCI_PM_CTRL2 0x598 - -/* PCIe stack registers */ -#define PCIE_REG_SYSTEM_CONFIG1 0x600 -#define PCIE_REG_SYSTEM_CONFIG2 0x608 -#define PCIE_REG_EP_SYSTEM_CONFIG 0x618 -#define PCIE_REG_EP_FLR 0x620 -#define PCIE_REG_EP_BAR_CONFIG 0x628 -#define PCIE_REG_LINK_CONFIG 0x630 -#define PCIE_REG_PM_CONFIG 0x640 -#define PCIE_REG_DLP_CONTROL 0x650 -#define PCIE_REG_DLP_STATUS 0x658 -#define PCIE_REG_ERR_REPORT_CONTROL 0x660 -#define PCIE_REG_SLOT_CONTROL1 0x670 -#define PCIE_REG_SLOT_CONTROL2 0x678 -#define PCIE_REG_UTL_CONFIG 0x680 -#define PCIE_REG_BUFFERS_CONFIG 0x690 -#define PCIE_REG_ERROR_INJECT 0x698 -#define PCIE_REG_SRIOV_CONFIG 0x6a0 -#define PCIE_REG_PF0_SRIOV_STATUS 0x6a8 -#define PCIE_REG_PF1_SRIOV_STATUS 0x6b0 -#define PCIE_REG_PORT_NUMBER 0x700 -#define PCIE_REG_POR_SYSTEM_CONFIG 0x708 - -/* PHB internal logic registers */ -#define PCIE_REG_PHB_VERSION 0x800 -#define PCIE_REG_RESET 0x808 -#define PCIE_REG_PHB_CONTROL 0x810 -#define PCIE_REG_PHB_TIMEOUT_CONTROL1 0x878 -#define PCIE_REG_PHB_QUIESCE_DMA 0x888 -#define PCIE_REG_PHB_DMA_READ_TAG_ACTV 0x900 -#define PCIE_REG_PHB_TCE_READ_TAG_ACTV 0x908 - -/* FIR registers */ -#define PCIE_REG_LEM_FIR_ACCUM 0xc00 -#define PCIE_REG_LEM_FIR_AND_MASK 0xc08 -#define PCIE_REG_LEM_FIR_OR_MASK 0xc10 -#define PCIE_REG_LEM_ACTION0 0xc18 -#define PCIE_REG_LEM_ACTION1 0xc20 -#define PCIE_REG_LEM_ERROR_MASK 0xc30 -#define PCIE_REG_LEM_ERROR_AND_MASK 0xc38 -#define PCIE_REG_LEM_ERROR_OR_MASK 0xc40 - -/* PHB Error registers */ -#define PCIE_REG_PHB_ERR_STATUS 0xc80 -#define PCIE_REG_PHB_ERR1_STATUS 0xc88 -#define PCIE_REG_PHB_ERR_INJECT 0xc90 -#define PCIE_REG_PHB_ERR_LEM_ENABLE 0xc98 -#define PCIE_REG_PHB_ERR_IRQ_ENABLE 0xca0 -#define PCIE_REG_PHB_ERR_FREEZE_ENABLE 0xca8 -#define PCIE_REG_PHB_ERR_SIDE_ENABLE 0xcb8 -#define PCIE_REG_PHB_ERR_LOG_0 0xcc0 -#define PCIE_REG_PHB_ERR_LOG_1 0xcc8 -#define PCIE_REG_PHB_ERR_STATUS_MASK 0xcd0 -#define PCIE_REG_PHB_ERR1_STATUS_MASK 0xcd8 - -#define PCIE_REG_MMIO_ERR_STATUS 0xd00 -#define PCIE_REG_MMIO_ERR1_STATUS 0xd08 -#define PCIE_REG_MMIO_ERR_INJECT 0xd10 -#define PCIE_REG_MMIO_ERR_LEM_ENABLE 0xd18 -#define PCIE_REG_MMIO_ERR_IRQ_ENABLE 0xd20 -#define PCIE_REG_MMIO_ERR_FREEZE_ENABLE 0xd28 -#define PCIE_REG_MMIO_ERR_SIDE_ENABLE 0xd38 -#define PCIE_REG_MMIO_ERR_LOG_0 0xd40 -#define PCIE_REG_MMIO_ERR_LOG_1 0xd48 -#define PCIE_REG_MMIO_ERR_STATUS_MASK 0xd50 -#define PCIE_REG_MMIO_ERR1_STATUS_MASK 0xd58 - -#define PCIE_REG_DMA_ERR_STATUS 0xd80 -#define PCIE_REG_DMA_ERR1_STATUS 0xd88 -#define PCIE_REG_DMA_ERR_INJECT 0xd90 -#define PCIE_REG_DMA_ERR_LEM_ENABLE 0xd98 -#define PCIE_REG_DMA_ERR_IRQ_ENABLE 0xda0 -#define PCIE_REG_DMA_ERR_FREEZE_ENABLE 0xda8 -#define PCIE_REG_DMA_ERR_SIDE_ENABLE 0xdb8 -#define PCIE_REG_DMA_ERR_LOG_0 0xdc0 -#define PCIE_REG_DMA_ERR_LOG_1 0xdc8 -#define PCIE_REG_DMA_ERR_STATUS_MASK 0xdd0 -#define PCIE_REG_DMA_ERR1_STATUS_MASK 0xdd8 - -/* Shortcuts for access to the above using the PHB definitions - * with an offset - */ -#define PCIE_REG_ERR_PHB_OFFSET 0x0 -#define PCIE_REG_ERR_MMIO_OFFSET 0x80 -#define PCIE_REG_ERR_DMA_OFFSET 0x100 - -/* Debug and Trace registers */ -#define PCIE_REG_PHB_DEBUG_CONTROL0 0xe00 -#define PCIE_REG_PHB_DEBUG_STATUS0 0xe08 -#define PCIE_REG_PHB_DEBUG_CONTROL1 0xe10 -#define PCIE_REG_PHB_DEBUG_STATUS1 0xe18 -#define PCIE_REG_PHB_DEBUG_CONTROL2 0xe20 -#define PCIE_REG_PHB_DEBUG_STATUS2 0xe28 -#define PCIE_REG_PHB_DEBUG_CONTROL3 0xe30 -#define PCIE_REG_PHB_DEBUG_STATUS3 0xe38 -#define PCIE_REG_PHB_DEBUG_CONTROL4 0xe40 -#define PCIE_REG_PHB_DEBUG_STATUS4 0xe48 -#define PCIE_REG_PHB_DEBUG_CONTROL5 0xe50 -#define PCIE_REG_PHB_DEBUG_STATUS5 0xe58 -#define PCIE_REG_PHB_DEBUG_CONTROL6 0xe60 -#define PCIE_REG_PHB_DEBUG_STATUS6 0xe68 - -/* Definition for PCIe errors */ -struct wsp_pcie_err_log_data { - __u64 phb_err; - __u64 phb_err1; - __u64 phb_log0; - __u64 phb_log1; - __u64 mmio_err; - __u64 mmio_err1; - __u64 mmio_log0; - __u64 mmio_log1; - __u64 dma_err; - __u64 dma_err1; - __u64 dma_log0; - __u64 dma_log1; - __u64 utl_sys_err; - __u64 utl_port_err; - __u64 utl_rc_err; - __u64 unused; -}; - -#endif /* __WSP_PCI_H */ diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index 9dee47071af8..de8d9483bbe8 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -26,6 +26,7 @@ #include <asm/errno.h> #include <asm/xics.h> #include <asm/kvm_ppc.h> +#include <asm/dbell.h> struct icp_ipl { union { @@ -145,7 +146,13 @@ static unsigned int icp_native_get_irq(void) static void icp_native_cause_ipi(int cpu, unsigned long data) { kvmppc_set_host_ipi(cpu, 1); - icp_native_set_qirr(cpu, IPI_PRIORITY); +#ifdef CONFIG_PPC_DOORBELL + if (cpu_has_feature(CPU_FTR_DBELL) && + (cpumask_test_cpu(cpu, cpu_sibling_mask(smp_processor_id())))) + doorbell_cause_ipi(cpu, data); + else +#endif + icp_native_set_qirr(cpu, IPI_PRIORITY); } void xics_wake_cpu(int cpu) diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c index bce3dcfe5058..c98748617896 100644 --- a/arch/powerpc/xmon/nonstdio.c +++ b/arch/powerpc/xmon/nonstdio.c @@ -122,7 +122,7 @@ void xmon_printf(const char *format, ...) if (n && rc == 0) { /* No udbg hooks, fallback to printk() - dangerous */ - printk(xmon_outbuf); + printk("%s", xmon_outbuf); } } diff --git a/arch/um/Makefile b/arch/um/Makefile index 36e658a4291c..e4b1a9639c4d 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -111,8 +111,7 @@ endef KBUILD_KCONFIG := $(HOST_DIR)/um/Kconfig archheaders: - $(Q)$(MAKE) -C '$(srctree)' KBUILD_SRC= \ - ARCH=$(HEADER_ARCH) O='$(objtree)' archheaders + $(Q)$(MAKE) KBUILD_SRC= ARCH=$(HEADER_ARCH) archheaders archprepare: include/generated/user_constants.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b660088c220d..fcefdda5136d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -121,6 +121,7 @@ config X86 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_QUEUE_RWLOCK select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION select OLD_SIGACTION if X86_32 select COMPAT_OLD_SIGACTION if IA32_EMULATION diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 4582e8e1cd1a..7730c1c5c83a 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -57,6 +57,12 @@ .long (from) - . ; \ .long (to) - . + 0x7ffffff0 ; \ .popsection + +# define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist","aw" ; \ + _ASM_ALIGN ; \ + _ASM_PTR (entry); \ + .popsection #else # define _ASM_EXTABLE(from,to) \ " .pushsection \"__ex_table\",\"a\"\n" \ @@ -71,6 +77,7 @@ " .long (" #from ") - .\n" \ " .long (" #to ") - . + 0x7ffffff0\n" \ " .popsection\n" +/* For C file, we already have NOKPROBE_SYMBOL macro */ #endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 9454c167629f..53cdfb2857ab 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -116,4 +116,6 @@ struct kprobe_ctlblk { extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); +extern int kprobe_int3_handler(struct pt_regs *regs); +extern int kprobe_debug_handler(struct pt_regs *regs); #endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h new file mode 100644 index 000000000000..70f46f07f94e --- /dev/null +++ b/arch/x86/include/asm/qrwlock.h @@ -0,0 +1,17 @@ +#ifndef _ASM_X86_QRWLOCK_H +#define _ASM_X86_QRWLOCK_H + +#include <asm-generic/qrwlock_types.h> + +#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) +#define queue_write_unlock queue_write_unlock +static inline void queue_write_unlock(struct qrwlock *lock) +{ + barrier(); + ACCESS_ONCE(*(u8 *)&lock->cnts) = 0; +} +#endif + +#include <asm-generic/qrwlock.h> + +#endif /* _ASM_X86_QRWLOCK_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 0f62f5482d91..54f1c8068c02 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -187,6 +187,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) cpu_relax(); } +#ifndef CONFIG_QUEUE_RWLOCK /* * Read-write spinlocks, allowing multiple readers * but only one writer. @@ -269,6 +270,9 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); } +#else +#include <asm/qrwlock.h> +#endif /* CONFIG_QUEUE_RWLOCK */ #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index 4f1bea19945b..73c4c007200f 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -34,6 +34,10 @@ typedef struct arch_spinlock { #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } +#ifdef CONFIG_QUEUE_RWLOCK +#include <asm-generic/qrwlock_types.h> +#else #include <asm/rwlock.h> +#endif #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 8ba18842c48e..bc8352e7010a 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -68,7 +68,7 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long); dotraplinkage void do_stack_segment(struct pt_regs *, long); #ifdef CONFIG_X86_64 dotraplinkage void do_double_fault(struct pt_regs *, long); -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); +asmlinkage struct pt_regs *sync_regs(struct pt_regs *); #endif dotraplinkage void do_general_protection(struct pt_regs *, long); dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); @@ -103,7 +103,6 @@ static inline int get_si_code(unsigned long condition) extern int panic_on_unrecovered_nmi; -void math_error(struct pt_regs *, int, int); void math_emulate(struct math_emu_info *); #ifndef CONFIG_X86_32 asmlinkage void smp_thermal_interrupt(void); diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index 93bee7b93854..74f4c2ff6427 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -41,18 +41,18 @@ struct arch_uprobe { u8 ixol[MAX_UINSN_BYTES]; }; - u16 fixups; const struct uprobe_xol_ops *ops; union { -#ifdef CONFIG_X86_64 - unsigned long rip_rela_target_address; -#endif struct { s32 offs; u8 ilen; u8 opc1; - } branch; + } branch; + struct { + u8 fixups; + u8 ilen; + } defparam; }; }; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index df94598ad05a..703130f469ec 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -5,7 +5,6 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/stringify.h> -#include <linux/kprobes.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/memory.h> @@ -551,7 +550,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode, * * Note: Must be called under text_mutex. */ -void *__kprobes text_poke(void *addr, const void *opcode, size_t len) +void *text_poke(void *addr, const void *opcode, size_t len) { unsigned long flags; char *vaddr; diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index eab67047dec3..c3fcb5de5083 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -60,7 +60,7 @@ void arch_trigger_all_cpu_backtrace(void) smp_mb__after_atomic(); } -static int __kprobes +static int arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { int cpu; @@ -80,6 +80,7 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) return NMI_DONE; } +NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler); static int __init register_trigger_all_cpu_backtrace(void) { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9d0a9795a0f8..81e08eff05ee 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2297,7 +2297,7 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, int err; if (!config_enabled(CONFIG_SMP)) - return -1; + return -EPERM; if (!cpumask_intersects(mask, cpu_online_mask)) return -EINVAL; @@ -2328,7 +2328,7 @@ int native_ioapic_set_affinity(struct irq_data *data, int ret; if (!config_enabled(CONFIG_SMP)) - return -1; + return -EPERM; raw_spin_lock_irqsave(&ioapic_lock, flags); ret = __ioapic_set_affinity(data, mask, &dest); @@ -3001,9 +3001,11 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) struct irq_cfg *cfg = data->chip_data; struct msi_msg msg; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; __get_cached_msi_msg(data->msi_desc, &msg); @@ -3100,9 +3102,11 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_cfg *cfg = data->chip_data; unsigned int dest, irq = data->irq; struct msi_msg msg; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; dmar_msi_read(irq, &msg); @@ -3149,9 +3153,11 @@ static int hpet_msi_set_affinity(struct irq_data *data, struct irq_cfg *cfg = data->chip_data; struct msi_msg msg; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; hpet_msi_read(data->handler_data, &msg); @@ -3218,9 +3224,11 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { struct irq_cfg *cfg = data->chip_data; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; target_ht_irq(data->irq, dest, cfg->vector); return IRQ_SET_MASK_OK_NOCOPY; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2cbbf88d8f2c..ef1b93f18ed1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -8,6 +8,7 @@ #include <linux/delay.h> #include <linux/sched.h> #include <linux/init.h> +#include <linux/kprobes.h> #include <linux/kgdb.h> #include <linux/smp.h> #include <linux/io.h> @@ -1193,6 +1194,7 @@ int is_debug_stack(unsigned long addr) (addr <= __get_cpu_var(debug_stack_addr) && addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); } +NOKPROBE_SYMBOL(is_debug_stack); DEFINE_PER_CPU(u32, debug_idt_ctr); @@ -1201,6 +1203,7 @@ void debug_stack_set_zero(void) this_cpu_inc(debug_idt_ctr); load_current_idt(); } +NOKPROBE_SYMBOL(debug_stack_set_zero); void debug_stack_reset(void) { @@ -1209,6 +1212,7 @@ void debug_stack_reset(void) if (this_cpu_dec_return(debug_idt_ctr) == 0) load_current_idt(); } +NOKPROBE_SYMBOL(debug_stack_reset); #else /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 76f98fe5b35c..a450373e8e91 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -132,15 +132,6 @@ static void __init ms_hyperv_init_platform(void) lapic_timer_frequency = hv_lapic_frequency; printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n", lapic_timer_frequency); - - /* - * On Hyper-V, when we are booting off an EFI firmware stack, - * we do not have many legacy devices including PIC, PIT etc. - */ - if (efi_enabled(EFI_BOOT)) { - printk(KERN_INFO "HyperV: Using null_legacy_pic\n"); - legacy_pic = &null_legacy_pic; - } } #endif diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 89f3b7c1af20..2bdfbff8a4f6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -303,15 +303,6 @@ int x86_setup_perfctr(struct perf_event *event) hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); - } else { - /* - * If we have a PMU initialized but no APIC - * interrupts, we cannot sample hardware - * events (user-space has to fall back and - * sample via a hrtimer based software event): - */ - if (!x86_pmu.apic) - return -EOPNOTSUPP; } if (attr->type == PERF_TYPE_RAW) @@ -1293,7 +1284,7 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } -static int __kprobes +static int perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { u64 start_clock; @@ -1311,6 +1302,7 @@ perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) return ret; } +NOKPROBE_SYMBOL(perf_event_nmi_handler); struct event_constraint emptyconstraint; struct event_constraint unconstrained; @@ -1366,6 +1358,15 @@ static void __init pmu_check_apic(void) x86_pmu.apic = 0; pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); pr_info("no hardware sampling interrupt available.\n"); + + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * events (user-space has to fall back and + * sample via a hrtimer based software event): + */ + pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + } static struct attribute_group x86_pmu_format_group = { diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 4c36bbe3173a..cbb1be3ed9e4 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -593,7 +593,7 @@ out: return 1; } -static int __kprobes +static int perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) { int handled = 0; @@ -606,6 +606,7 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) return handled; } +NOKPROBE_SYMBOL(perf_ibs_nmi_handler); static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d82d155aca8c..9dd2459a4c73 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -384,6 +384,9 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_NO_TX) mask |= X86_BR_NO_TX; + if (br_type & PERF_SAMPLE_BRANCH_COND) + mask |= X86_BR_JCC; + /* * stash actual user request into reg, it may * be used by fixup code for some CPU @@ -678,6 +681,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { * NHM/WSM erratum: must include IND_JMP to capture IND_CALL */ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, }; static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { @@ -689,6 +693,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | LBR_FAR, [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, }; /* core */ diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index d9c12d3022a7..b74ebc7c4402 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -200,7 +200,7 @@ static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; -unsigned __kprobes long oops_begin(void) +unsigned long oops_begin(void) { int cpu; unsigned long flags; @@ -223,8 +223,9 @@ unsigned __kprobes long oops_begin(void) return flags; } EXPORT_SYMBOL_GPL(oops_begin); +NOKPROBE_SYMBOL(oops_begin); -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { if (regs && kexec_should_crash(current)) crash_kexec(regs); @@ -247,8 +248,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) panic("Fatal exception"); do_exit(signr); } +NOKPROBE_SYMBOL(oops_end); -int __kprobes __die(const char *str, struct pt_regs *regs, long err) +int __die(const char *str, struct pt_regs *regs, long err) { #ifdef CONFIG_X86_32 unsigned short ss; @@ -291,6 +293,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) #endif return 0; } +NOKPROBE_SYMBOL(__die); /* * This is gone through when something in the kernel has done something bad diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 98313ffaae6a..f0da82b8e634 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -315,10 +315,6 @@ ENTRY(ret_from_kernel_thread) ENDPROC(ret_from_kernel_thread) /* - * Interrupt exit functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" -/* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to * go as quickly as possible which is why some of this is @@ -372,10 +368,6 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -495,10 +487,6 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) -/* - * syscall stub including irq exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -690,10 +678,6 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC -/* - * End of kprobes section - */ - .popsection .macro FIXUP_ESPFIX_STACK /* @@ -784,10 +768,6 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC -/* - * Irq entries should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -964,10 +944,6 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) -/* - * End of kprobes section - */ - .popsection #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter @@ -1242,11 +1218,6 @@ return_to_handler: jmp *%ecx #endif -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - #ifdef CONFIG_TRACING ENTRY(trace_page_fault) RING0_EC_FRAME @@ -1460,7 +1431,3 @@ ENTRY(async_page_fault) END(async_page_fault) #endif -/* - * End of kprobes section - */ - .popsection diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 48a2644a082a..b25ca969edd2 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -284,8 +284,6 @@ ENDPROC(native_usergs_sysret64) TRACE_IRQS_OFF .endm -/* save complete stack frame */ - .pushsection .kprobes.text, "ax" ENTRY(save_paranoid) XCPT_FRAME 1 RDI+8 cld @@ -314,7 +312,6 @@ ENTRY(save_paranoid) 1: ret CFI_ENDPROC END(save_paranoid) - .popsection /* * A newly forked process directly context switches into this address. @@ -772,10 +769,6 @@ END(interrupt) call \func .endm -/* - * Interrupt entry/exit should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -983,11 +976,6 @@ END(__do_double_fault) #endif /* - * End of kprobes section - */ - .popsection - -/* * APIC interrupts. */ .macro apicinterrupt3 num sym do_sym @@ -1321,11 +1309,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ hyperv_callback_vector hyperv_vector_handler #endif /* CONFIG_HYPERV */ -/* - * Some functions should be protected against kprobes - */ - .pushsection .kprobes.text, "ax" - idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 @@ -1742,7 +1725,3 @@ ENTRY(ignore_sysret) CFI_ENDPROC END(ignore_sysret) -/* - * End of kprobes section - */ - .popsection diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a67b47c31314..5f9cf20cdb68 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -32,7 +32,6 @@ #include <linux/irqflags.h> #include <linux/notifier.h> #include <linux/kallsyms.h> -#include <linux/kprobes.h> #include <linux/percpu.h> #include <linux/kdebug.h> #include <linux/kernel.h> @@ -424,7 +423,7 @@ EXPORT_SYMBOL_GPL(hw_breakpoint_restore); * NOTIFY_STOP returned for all other cases * */ -static int __kprobes hw_breakpoint_handler(struct die_args *args) +static int hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct perf_event *bp; @@ -511,7 +510,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) /* * Handle debug exception notifications. */ -int __kprobes hw_breakpoint_exceptions_notify( +int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) { if (val != DIE_DEBUG) diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 2e977b5d61dd..8af817105e29 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -299,13 +299,31 @@ static void unmask_8259A(void) static void init_8259A(int auto_eoi) { unsigned long flags; + unsigned char probe_val = ~(1 << PIC_CASCADE_IR); + unsigned char new_val; i8259A_auto_eoi = auto_eoi; raw_spin_lock_irqsave(&i8259A_lock, flags); - outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ + /* + * Check to see if we have a PIC. + * Mask all except the cascade and read + * back the value we just wrote. If we don't + * have a PIC, we will read 0xff as opposed to the + * value we wrote. + */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ + outb(probe_val, PIC_MASTER_IMR); + new_val = inb(PIC_MASTER_IMR); + if (new_val != probe_val) { + printk(KERN_INFO "Using NULL legacy PIC\n"); + legacy_pic = &null_legacy_pic; + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + return; + } + + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ /* * outb_pic - this has to work on a wide range of PC hardware. diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 11ccfb0a63e7..922d28581024 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -365,6 +365,7 @@ void fixup_irqs(void) struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; + int ret; for_each_irq_desc(irq, desc) { int break_affinity = 0; @@ -403,10 +404,14 @@ void fixup_irqs(void) if (!irqd_can_move_in_process_context(data) && chip->irq_mask) chip->irq_mask(data); - if (chip->irq_set_affinity) - chip->irq_set_affinity(data, affinity, true); - else if (!(warned++)) - set_affinity = 0; + if (chip->irq_set_affinity) { + ret = chip->irq_set_affinity(data, affinity, true); + if (ret == -ENOSPC) + pr_crit("IRQ %d set affinity failed because there are no available vectors. The device assigned to this IRQ is unstable.\n", irq); + } else { + if (!(warned++)) + set_affinity = 0; + } /* * We unmask if the irq was not marked masked by the diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 61b17dc2c277..7596df664901 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -112,7 +112,8 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); -static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) +static nokprobe_inline void +__synthesize_relative_insn(void *from, void *to, u8 op) { struct __arch_relative_insn { u8 op; @@ -125,21 +126,23 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) } /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -void __kprobes synthesize_reljump(void *from, void *to) +void synthesize_reljump(void *from, void *to) { __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); } +NOKPROBE_SYMBOL(synthesize_reljump); /* Insert a call instruction at address 'from', which calls address 'to'.*/ -void __kprobes synthesize_relcall(void *from, void *to) +void synthesize_relcall(void *from, void *to) { __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); } +NOKPROBE_SYMBOL(synthesize_relcall); /* * Skip the prefixes of the instruction. */ -static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) +static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn) { insn_attr_t attr; @@ -154,12 +157,13 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) #endif return insn; } +NOKPROBE_SYMBOL(skip_prefixes); /* * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int __kprobes can_boost(kprobe_opcode_t *opcodes) +int can_boost(kprobe_opcode_t *opcodes) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; @@ -260,7 +264,7 @@ unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long add } /* Check if paddr is at an instruction boundary */ -static int __kprobes can_probe(unsigned long paddr) +static int can_probe(unsigned long paddr) { unsigned long addr, __addr, offset = 0; struct insn insn; @@ -299,7 +303,7 @@ static int __kprobes can_probe(unsigned long paddr) /* * Returns non-zero if opcode modifies the interrupt flag. */ -static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) +static int is_IF_modifier(kprobe_opcode_t *insn) { /* Skip prefixes */ insn = skip_prefixes(insn); @@ -322,7 +326,7 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) * If not, return null. * Only applicable to 64-bit x86. */ -int __kprobes __copy_instruction(u8 *dest, u8 *src) +int __copy_instruction(u8 *dest, u8 *src) { struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; @@ -365,7 +369,7 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src) return insn.length; } -static int __kprobes arch_copy_kprobe(struct kprobe *p) +static int arch_copy_kprobe(struct kprobe *p) { int ret; @@ -392,7 +396,7 @@ static int __kprobes arch_copy_kprobe(struct kprobe *p) return 0; } -int __kprobes arch_prepare_kprobe(struct kprobe *p) +int arch_prepare_kprobe(struct kprobe *p) { if (alternatives_text_reserved(p->addr, p->addr)) return -EINVAL; @@ -407,17 +411,17 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return arch_copy_kprobe(p); } -void __kprobes arch_arm_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); } -void __kprobes arch_disarm_kprobe(struct kprobe *p) +void arch_disarm_kprobe(struct kprobe *p) { text_poke(p->addr, &p->opcode, 1); } -void __kprobes arch_remove_kprobe(struct kprobe *p) +void arch_remove_kprobe(struct kprobe *p) { if (p->ainsn.insn) { free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); @@ -425,7 +429,8 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) } } -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void +save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; @@ -433,7 +438,8 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; } -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static nokprobe_inline void +restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; @@ -441,8 +447,9 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; } -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static nokprobe_inline void +set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_flags = kcb->kprobe_old_flags @@ -451,7 +458,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; } -static void __kprobes clear_btf(void) +static nokprobe_inline void clear_btf(void) { if (test_thread_flag(TIF_BLOCKSTEP)) { unsigned long debugctl = get_debugctlmsr(); @@ -461,7 +468,7 @@ static void __kprobes clear_btf(void) } } -static void __kprobes restore_btf(void) +static nokprobe_inline void restore_btf(void) { if (test_thread_flag(TIF_BLOCKSTEP)) { unsigned long debugctl = get_debugctlmsr(); @@ -471,8 +478,7 @@ static void __kprobes restore_btf(void) } } -void __kprobes -arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) +void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { unsigned long *sara = stack_addr(regs); @@ -481,9 +487,10 @@ arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) /* Replace the return addr with trampoline addr */ *sara = (unsigned long) &kretprobe_trampoline; } +NOKPROBE_SYMBOL(arch_prepare_kretprobe); -static void __kprobes -setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) +static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) { if (setup_detour_execution(p, regs, reenter)) return; @@ -519,22 +526,24 @@ setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k else regs->ip = (unsigned long)p->ainsn.insn; } +NOKPROBE_SYMBOL(setup_singlestep); /* * We have reentered the kprobe_handler(), since another probe was hit while * within the handler. We save the original kprobes variables and just single * step on the instruction of the new probe without calling any user handlers. */ -static int __kprobes -reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SS: kprobes_inc_nmissed_count(p); setup_singlestep(p, regs, kcb, 1); break; - case KPROBE_HIT_SS: + case KPROBE_REENTER: /* A probe has been hit in the codepath leading up to, or just * after, single-stepping of a probed instruction. This entire * codepath should strictly reside in .kprobes.text section. @@ -553,12 +562,13 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +NOKPROBE_SYMBOL(reenter_kprobe); /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. */ -static int __kprobes kprobe_handler(struct pt_regs *regs) +int kprobe_int3_handler(struct pt_regs *regs) { kprobe_opcode_t *addr; struct kprobe *p; @@ -621,12 +631,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) preempt_enable_no_resched(); return 0; } +NOKPROBE_SYMBOL(kprobe_int3_handler); /* * When a retprobed function returns, this code saves registers and * calls trampoline_handler() runs, which calls the kretprobe's handler. */ -static void __used __kprobes kretprobe_trampoline_holder(void) +static void __used kretprobe_trampoline_holder(void) { asm volatile ( ".global kretprobe_trampoline\n" @@ -657,11 +668,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void) #endif " ret\n"); } +NOKPROBE_SYMBOL(kretprobe_trampoline_holder); +NOKPROBE_SYMBOL(kretprobe_trampoline); /* * Called from kretprobe_trampoline */ -__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) +__visible __used void *trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; @@ -747,6 +760,7 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) } return (void *)orig_ret_address; } +NOKPROBE_SYMBOL(trampoline_handler); /* * Called after single-stepping. p->addr is the address of the @@ -775,8 +789,8 @@ __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) * jump instruction after the copied instruction, that jumps to the next * instruction after the probepoint. */ -static void __kprobes -resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +static void resume_execution(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { unsigned long *tos = stack_addr(regs); unsigned long copy_ip = (unsigned long)p->ainsn.insn; @@ -851,12 +865,13 @@ resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *k no_change: restore_btf(); } +NOKPROBE_SYMBOL(resume_execution); /* * Interrupts are disabled on entry as trap1 is an interrupt gate and they * remain disabled throughout this function. */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) +int kprobe_debug_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -891,8 +906,9 @@ out: return 1; } +NOKPROBE_SYMBOL(kprobe_debug_handler); -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -949,12 +965,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) return 0; } +NOKPROBE_SYMBOL(kprobe_fault_handler); /* * Wrapper routine for handling exceptions. */ -int __kprobes -kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) +int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, + void *data) { struct die_args *args = data; int ret = NOTIFY_DONE; @@ -962,22 +979,7 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d if (args->regs && user_mode_vm(args->regs)) return ret; - switch (val) { - case DIE_INT3: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) { - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; - ret = NOTIFY_STOP; - } - break; - case DIE_GPF: + if (val == DIE_GPF) { /* * To be potentially processing a kprobe fault and to * trust the result from kprobe_running(), we have @@ -986,14 +988,12 @@ kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *d if (!preemptible() && kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; - break; - default: - break; } return ret; } +NOKPROBE_SYMBOL(kprobe_exceptions_notify); -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -1017,8 +1017,9 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->ip = (unsigned long)(jp->entry); return 1; } +NOKPROBE_SYMBOL(setjmp_pre_handler); -void __kprobes jprobe_return(void) +void jprobe_return(void) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -1034,8 +1035,10 @@ void __kprobes jprobe_return(void) " nop \n"::"b" (kcb->jprobe_saved_sp):"memory"); } +NOKPROBE_SYMBOL(jprobe_return); +NOKPROBE_SYMBOL(jprobe_return_end); -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->ip - 1); @@ -1063,13 +1066,22 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } +NOKPROBE_SYMBOL(longjmp_break_handler); + +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + return (addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)__entry_text_start && + addr < (unsigned long)__entry_text_end); +} int __init arch_init_kprobes(void) { return 0; } -int __kprobes arch_trampoline_kprobe(struct kprobe *p) +int arch_trampoline_kprobe(struct kprobe *p) { return 0; } diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 23ef5c556f06..717b02a22e67 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,8 +25,9 @@ #include "common.h" -static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static nokprobe_inline +int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { /* * Emulate singlestep (and also recover regs->ip) @@ -41,18 +42,19 @@ static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, return 1; } -int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { if (kprobe_ftrace(p)) return __skip_singlestep(p, regs, kcb); else return 0; } +NOKPROBE_SYMBOL(skip_singlestep); /* Ftrace callback handler for kprobes */ -void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) { struct kprobe *p; struct kprobe_ctlblk *kcb; @@ -84,8 +86,9 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, end: local_irq_restore(flags); } +NOKPROBE_SYMBOL(kprobe_ftrace_handler); -int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +int arch_prepare_kprobe_ftrace(struct kprobe *p) { p->ainsn.insn = NULL; p->ainsn.boostable = -1; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 898160b42e43..f304773285ae 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -77,7 +77,7 @@ found: } /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ -static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) +static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) { #ifdef CONFIG_X86_64 *addr++ = 0x48; @@ -138,7 +138,8 @@ asm ( #define INT3_SIZE sizeof(kprobe_opcode_t) /* Optimized kprobe call back function: called from optinsn */ -static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) +static void +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long flags; @@ -168,8 +169,9 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_ } local_irq_restore(flags); } +NOKPROBE_SYMBOL(optimized_callback); -static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) +static int copy_optimized_instructions(u8 *dest, u8 *src) { int len = 0, ret; @@ -189,7 +191,7 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether insn is indirect jump */ -static int __kprobes insn_is_indirect_jump(struct insn *insn) +static int insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -224,7 +226,7 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) } /* Decode whole function to ensure any instructions don't jump into target */ -static int __kprobes can_optimize(unsigned long paddr) +static int can_optimize(unsigned long paddr) { unsigned long addr, size = 0, offset = 0; struct insn insn; @@ -275,7 +277,7 @@ static int __kprobes can_optimize(unsigned long paddr) } /* Check optimized_kprobe can actually be optimized. */ -int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) +int arch_check_optimized_kprobe(struct optimized_kprobe *op) { int i; struct kprobe *p; @@ -290,15 +292,15 @@ int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) } /* Check the addr is within the optimized instructions. */ -int __kprobes -arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) +int arch_within_optimized_kprobe(struct optimized_kprobe *op, + unsigned long addr) { return ((unsigned long)op->kp.addr <= addr && (unsigned long)op->kp.addr + op->optinsn.size > addr); } /* Free optimized instruction slot */ -static __kprobes +static void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) { if (op->optinsn.insn) { @@ -308,7 +310,7 @@ void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) } } -void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) +void arch_remove_optimized_kprobe(struct optimized_kprobe *op) { __arch_remove_optimized_kprobe(op, 1); } @@ -318,7 +320,7 @@ void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) * Target instructions MUST be relocatable (checked inside) * This is called when new aggr(opt)probe is allocated or reused. */ -int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) { u8 *buf; int ret; @@ -372,7 +374,7 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) * Replace breakpoints (int3) with relative jumps. * Caller must call with locking kprobe_mutex and text_mutex. */ -void __kprobes arch_optimize_kprobes(struct list_head *oplist) +void arch_optimize_kprobes(struct list_head *oplist) { struct optimized_kprobe *op, *tmp; u8 insn_buf[RELATIVEJUMP_SIZE]; @@ -398,7 +400,7 @@ void __kprobes arch_optimize_kprobes(struct list_head *oplist) } /* Replace a relative jump with a breakpoint (int3). */ -void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) +void arch_unoptimize_kprobe(struct optimized_kprobe *op) { u8 insn_buf[RELATIVEJUMP_SIZE]; @@ -424,8 +426,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist, } } -int __kprobes -setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) +int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) { struct optimized_kprobe *op; @@ -441,3 +442,4 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) } return 0; } +NOKPROBE_SYMBOL(setup_detour_execution); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7e97371387fd..3dd8e2c4d74a 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -251,8 +251,9 @@ u32 kvm_read_and_reset_pf_reason(void) return reason; } EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); +NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); -dotraplinkage void __kprobes +dotraplinkage void do_async_page_fault(struct pt_regs *regs, unsigned long error_code) { enum ctx_state prev_state; @@ -276,6 +277,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) break; } } +NOKPROBE_SYMBOL(do_async_page_fault); static void __init paravirt_ops_setup(void) { diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index b4872b999a71..c3e985d1751c 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -110,7 +110,7 @@ static void nmi_max_handler(struct irq_work *w) a->handler, whole_msecs, decimal_msecs); } -static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) +static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) { struct nmi_desc *desc = nmi_to_desc(type); struct nmiaction *a; @@ -146,6 +146,7 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 /* return total number of NMI events handled */ return handled; } +NOKPROBE_SYMBOL(nmi_handle); int __register_nmi_handler(unsigned int type, struct nmiaction *action) { @@ -208,7 +209,7 @@ void unregister_nmi_handler(unsigned int type, const char *name) } EXPORT_SYMBOL_GPL(unregister_nmi_handler); -static __kprobes void +static void pci_serr_error(unsigned char reason, struct pt_regs *regs) { /* check to see if anyone registered against these types of errors */ @@ -238,8 +239,9 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; outb(reason, NMI_REASON_PORT); } +NOKPROBE_SYMBOL(pci_serr_error); -static __kprobes void +static void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; @@ -269,8 +271,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs) reason &= ~NMI_REASON_CLEAR_IOCHK; outb(reason, NMI_REASON_PORT); } +NOKPROBE_SYMBOL(io_check_error); -static __kprobes void +static void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { int handled; @@ -298,11 +301,12 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) pr_emerg("Dazed and confused, but trying to continue\n"); } +NOKPROBE_SYMBOL(unknown_nmi_error); static DEFINE_PER_CPU(bool, swallow_nmi); static DEFINE_PER_CPU(unsigned long, last_nmi_rip); -static __kprobes void default_do_nmi(struct pt_regs *regs) +static void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int handled; @@ -401,6 +405,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) else unknown_nmi_error(reason, regs); } +NOKPROBE_SYMBOL(default_do_nmi); /* * NMIs can hit breakpoints which will cause it to lose its @@ -520,7 +525,7 @@ static inline void nmi_nesting_postprocess(void) } #endif -dotraplinkage notrace __kprobes void +dotraplinkage notrace void do_nmi(struct pt_regs *regs, long error_code) { nmi_nesting_preprocess(regs); @@ -537,6 +542,7 @@ do_nmi(struct pt_regs *regs, long error_code) /* On i386, may loop back to preprocess */ nmi_nesting_postprocess(); } +NOKPROBE_SYMBOL(do_nmi); void stop_nmi(void) { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b10af835c31..548d25f00c90 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -23,6 +23,7 @@ #include <linux/efi.h> #include <linux/bcd.h> #include <linux/highmem.h> +#include <linux/kprobes.h> #include <asm/bug.h> #include <asm/paravirt.h> @@ -389,6 +390,11 @@ __visible struct pv_cpu_ops pv_cpu_ops = { .end_context_switch = paravirt_nop, }; +/* At this point, native_get/set_debugreg has real function entries */ +NOKPROBE_SYMBOL(native_get_debugreg); +NOKPROBE_SYMBOL(native_set_debugreg); +NOKPROBE_SYMBOL(native_load_idt); + struct pv_apic_ops pv_apic_ops = { #ifdef CONFIG_X86_LOCAL_APIC .startup_ipi_hook = paravirt_nop, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 898d077617a9..ca5b02d405c3 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -413,12 +413,11 @@ void set_personality_ia32(bool x32) set_thread_flag(TIF_ADDR32); /* Mark the associated mm as containing 32-bit tasks. */ - if (current->mm) - current->mm->context.ia32_compat = 1; - if (x32) { clear_thread_flag(TIF_IA32); set_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_X32; current->personality &= ~READ_IMPLIES_EXEC; /* is_compat_task() uses the presence of the x32 syscall bit flag to determine compat status */ @@ -426,6 +425,8 @@ void set_personality_ia32(bool x32) } else { set_thread_flag(TIF_IA32); clear_thread_flag(TIF_X32); + if (current->mm) + current->mm->context.ia32_compat = TIF_IA32; current->personality |= force_personality32; /* Prepare the first "return" to user space */ current_thread_info()->status |= TS_COMPAT; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f73b5d435bdc..c6eb418c5627 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -23,6 +23,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/ptrace.h> +#include <linux/uprobes.h> #include <linux/string.h> #include <linux/delay.h> #include <linux/errno.h> @@ -106,7 +107,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_count_dec(); } -static int __kprobes +static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, struct pt_regs *regs, long error_code) { @@ -136,7 +137,38 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, return -1; } -static void __kprobes +static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, + siginfo_t *info) +{ + unsigned long siaddr; + int sicode; + + switch (trapnr) { + default: + return SEND_SIG_PRIV; + + case X86_TRAP_DE: + sicode = FPE_INTDIV; + siaddr = uprobe_get_trap_addr(regs); + break; + case X86_TRAP_UD: + sicode = ILL_ILLOPN; + siaddr = uprobe_get_trap_addr(regs); + break; + case X86_TRAP_AC: + sicode = BUS_ADRALN; + siaddr = 0; + break; + } + + info->si_signo = signr; + info->si_errno = 0; + info->si_code = sicode; + info->si_addr = (void __user *)siaddr; + return info; +} + +static void do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) { @@ -168,60 +200,43 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, } #endif - if (info) - force_sig_info(signr, info, tsk); - else - force_sig(signr, tsk); + force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); } +NOKPROBE_SYMBOL(do_trap); -#define DO_ERROR(trapnr, signr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - enum ctx_state prev_state; \ - \ - prev_state = exception_enter(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, \ - trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(prev_state); \ - return; \ - } \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, NULL); \ - exception_exit(prev_state); \ +static void do_error_trap(struct pt_regs *regs, long error_code, char *str, + unsigned long trapnr, int signr) +{ + enum ctx_state prev_state = exception_enter(); + siginfo_t info; + + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != + NOTIFY_STOP) { + conditional_sti(regs); + do_trap(trapnr, signr, str, regs, error_code, + fill_trap_info(regs, signr, trapnr, &info)); + } + + exception_exit(prev_state); } -#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ +#define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - siginfo_t info; \ - enum ctx_state prev_state; \ - \ - info.si_signo = signr; \ - info.si_errno = 0; \ - info.si_code = sicode; \ - info.si_addr = (void __user *)siaddr; \ - prev_state = exception_enter(); \ - if (notify_die(DIE_TRAP, str, regs, error_code, \ - trapnr, signr) == NOTIFY_STOP) { \ - exception_exit(prev_state); \ - return; \ - } \ - conditional_sti(regs); \ - do_trap(trapnr, signr, str, regs, error_code, &info); \ - exception_exit(prev_state); \ + do_error_trap(regs, error_code, str, trapnr, signr); \ } -DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) -DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) -DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) -DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) -DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) -DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) -DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) +DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) +DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) +DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) +DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) +DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) +DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) +DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) #ifdef CONFIG_X86_32 -DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) +DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) #endif -DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) +DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) #ifdef CONFIG_X86_64 /* Runs on IST stack */ @@ -263,7 +278,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) } #endif -dotraplinkage void __kprobes +dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; @@ -305,13 +320,14 @@ do_general_protection(struct pt_regs *regs, long error_code) pr_cont("\n"); } - force_sig(SIGSEGV, tsk); + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_general_protection); /* May run on IST stack. */ -dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) +dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { enum ctx_state prev_state; @@ -327,13 +343,18 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co if (poke_int3_handler(regs)) return; - prev_state = exception_enter(); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) goto exit; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ +#ifdef CONFIG_KPROBES + if (kprobe_int3_handler(regs)) + return; +#endif + prev_state = exception_enter(); + if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) goto exit; @@ -350,6 +371,7 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_int3); #ifdef CONFIG_X86_64 /* @@ -357,7 +379,7 @@ exit: * for scheduling or signal handling. The actual stack switch is done in * entry.S */ -asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; /* Did already sync */ @@ -376,6 +398,7 @@ asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) *regs = *eregs; return regs; } +NOKPROBE_SYMBOL(sync_regs); #endif /* @@ -402,7 +425,7 @@ asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) * * May run on IST stack. */ -dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) +dotraplinkage void do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; enum ctx_state prev_state; @@ -410,8 +433,6 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; - prev_state = exception_enter(); - get_debugreg(dr6, 6); /* Filter out all the reserved bits which are preset to 1 */ @@ -440,6 +461,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; +#ifdef CONFIG_KPROBES + if (kprobe_debug_handler(regs)) + goto exit; +#endif + prev_state = exception_enter(); + if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, SIGTRAP) == NOTIFY_STOP) goto exit; @@ -482,13 +509,14 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) exit: exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_debug); /* * Note that we play around with the 'TS' bit in an attempt to get * the correct behaviour even in the presence of the asynchronous * IRQ13 behaviour */ -void math_error(struct pt_regs *regs, int error_code, int trapnr) +static void math_error(struct pt_regs *regs, int error_code, int trapnr) { struct task_struct *task = current; siginfo_t info; @@ -518,7 +546,7 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr) task->thread.error_code = error_code; info.si_signo = SIGFPE; info.si_errno = 0; - info.si_addr = (void __user *)regs->ip; + info.si_addr = (void __user *)uprobe_get_trap_addr(regs); if (trapnr == X86_TRAP_MF) { unsigned short cwd, swd; /* @@ -645,7 +673,7 @@ void math_state_restore(void) */ if (unlikely(restore_fpu_checking(tsk))) { drop_init_fpu(tsk); - force_sig(SIGSEGV, tsk); + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); return; } @@ -653,7 +681,7 @@ void math_state_restore(void) } EXPORT_SYMBOL_GPL(math_state_restore); -dotraplinkage void __kprobes +dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { enum ctx_state prev_state; @@ -679,6 +707,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) #endif exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_device_not_available); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index ace22916ade3..5d1cbfe4ae58 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -32,20 +32,20 @@ /* Post-execution fixups. */ -/* No fixup needed */ -#define UPROBE_FIX_NONE 0x0 - /* Adjust IP back to vicinity of actual insn */ -#define UPROBE_FIX_IP 0x1 +#define UPROBE_FIX_IP 0x01 /* Adjust the return address of a call insn */ -#define UPROBE_FIX_CALL 0x2 +#define UPROBE_FIX_CALL 0x02 /* Instruction will modify TF, don't change it */ -#define UPROBE_FIX_SETF 0x4 +#define UPROBE_FIX_SETF 0x04 -#define UPROBE_FIX_RIP_AX 0x8000 -#define UPROBE_FIX_RIP_CX 0x4000 +#define UPROBE_FIX_RIP_SI 0x08 +#define UPROBE_FIX_RIP_DI 0x10 +#define UPROBE_FIX_RIP_BX 0x20 +#define UPROBE_FIX_RIP_MASK \ + (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX) #define UPROBE_TRAP_NR UINT_MAX @@ -67,6 +67,7 @@ * to keep gcc from statically optimizing it out, as variable_test_bit makes * some versions of gcc to think only *(unsigned long*) is used. */ +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static volatile u32 good_insns_32[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ @@ -89,33 +90,12 @@ static volatile u32 good_insns_32[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; +#else +#define good_insns_32 NULL +#endif -/* Using this for both 64-bit and 32-bit apps */ -static volatile u32 good_2byte_insns[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ---------------------------------------------- */ - W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ - W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ - W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ - W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ - W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ - W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ - W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ - W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ - /* ---------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; - -#ifdef CONFIG_X86_64 /* Good-instruction tables for 64-bit apps */ +#if defined(CONFIG_X86_64) static volatile u32 good_insns_64[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ /* ---------------------------------------------- */ @@ -138,7 +118,33 @@ static volatile u32 good_insns_64[256 / 32] = { /* ---------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; +#else +#define good_insns_64 NULL #endif + +/* Using this for both 64-bit and 32-bit apps */ +static volatile u32 good_2byte_insns[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ---------------------------------------------- */ + W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ + W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ + W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ + W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ + W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ + W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ + W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ + W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ + W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ + W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ + W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ + W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ + W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ + W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ + W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ + W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ + /* ---------------------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; #undef W /* @@ -209,16 +215,25 @@ static bool is_prefix_bad(struct insn *insn) return false; } -static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) +static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64) { - insn_init(insn, auprobe->insn, false); + u32 volatile *good_insns; + + insn_init(insn, auprobe->insn, x86_64); + /* has the side-effect of processing the entire instruction */ + insn_get_length(insn); + if (WARN_ON_ONCE(!insn_complete(insn))) + return -ENOEXEC; - /* Skip good instruction prefixes; reject "bad" ones. */ - insn_get_opcode(insn); if (is_prefix_bad(insn)) return -ENOTSUPP; - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) + if (x86_64) + good_insns = good_insns_64; + else + good_insns = good_insns_32; + + if (test_bit(OPCODE1(insn), (unsigned long *)good_insns)) return 0; if (insn->opcode.nbytes == 2) { @@ -230,14 +245,18 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) } #ifdef CONFIG_X86_64 +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return !config_enabled(CONFIG_IA32_EMULATION) || + !(mm->context.ia32_compat == TIF_IA32); +} /* * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses * its memory operand indirectly through a scratch register. Set - * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address - * accordingly. (The contents of the scratch register will be saved - * before we single-step the modified instruction, and restored - * afterward.) + * defparam->fixups accordingly. (The contents of the scratch register + * will be saved before we single-step the modified instruction, + * and restored afterward). * * We do this because a rip-relative instruction can access only a * relatively small area (+/- 2 GB from the instruction), and the XOL @@ -248,164 +267,192 @@ static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) * * Some useful facts about rip-relative instructions: * - * - There's always a modrm byte. + * - There's always a modrm byte with bit layout "00 reg 101". * - There's never a SIB byte. * - The displacement is always 4 bytes. + * - REX.B=1 bit in REX prefix, which normally extends r/m field, + * has no effect on rip-relative mode. It doesn't make modrm byte + * with r/m=101 refer to register 1101 = R13. */ -static void -handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { u8 *cursor; u8 reg; + u8 reg2; if (!insn_rip_relative(insn)) return; /* - * insn_rip_relative() would have decoded rex_prefix, modrm. + * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm. * Clear REX.b bit (extension of MODRM.rm field): - * we want to encode rax/rcx, not r8/r9. + * we want to encode low numbered reg, not r8+. */ if (insn->rex_prefix.nbytes) { cursor = auprobe->insn + insn_offset_rex_prefix(insn); - *cursor &= 0xfe; /* Clearing REX.B bit */ + /* REX byte has 0100wrxb layout, clearing REX.b bit */ + *cursor &= 0xfe; + } + /* + * Similar treatment for VEX3 prefix. + * TODO: add XOP/EVEX treatment when insn decoder supports them + */ + if (insn->vex_prefix.nbytes == 3) { + /* + * vex2: c5 rvvvvLpp (has no b bit) + * vex3/xop: c4/8f rxbmmmmm wvvvvLpp + * evex: 62 rxbR00mm wvvvv1pp zllBVaaa + * (evex will need setting of both b and x since + * in non-sib encoding evex.x is 4th bit of MODRM.rm) + * Setting VEX3.b (setting because it has inverted meaning): + */ + cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; + *cursor |= 0x20; } /* + * Convert from rip-relative addressing to register-relative addressing + * via a scratch register. + * + * This is tricky since there are insns with modrm byte + * which also use registers not encoded in modrm byte: + * [i]div/[i]mul: implicitly use dx:ax + * shift ops: implicitly use cx + * cmpxchg: implicitly uses ax + * cmpxchg8/16b: implicitly uses dx:ax and bx:cx + * Encoding: 0f c7/1 modrm + * The code below thinks that reg=1 (cx), chooses si as scratch. + * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m. + * First appeared in Haswell (BMI2 insn). It is vex-encoded. + * Example where none of bx,cx,dx can be used as scratch reg: + * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx + * [v]pcmpistri: implicitly uses cx, xmm0 + * [v]pcmpistrm: implicitly uses xmm0 + * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0 + * [v]pcmpestrm: implicitly uses ax, dx, xmm0 + * Evil SSE4.2 string comparison ops from hell. + * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination. + * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm. + * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi). + * AMD says it has no 3-operand form (vex.vvvv must be 1111) + * and that it can have only register operands, not mem + * (its modrm byte must have mode=11). + * If these restrictions will ever be lifted, + * we'll need code to prevent selection of di as scratch reg! + * + * Summary: I don't know any insns with modrm byte which + * use SI register implicitly. DI register is used only + * by one insn (maskmovq) and BX register is used + * only by one too (cmpxchg8b). + * BP is stack-segment based (may be a problem?). + * AX, DX, CX are off-limits (many implicit users). + * SP is unusable (it's stack pointer - think about "pop mem"; + * also, rsp+disp32 needs sib encoding -> insn length change). + */ + + reg = MODRM_REG(insn); /* Fetch modrm.reg */ + reg2 = 0xff; /* Fetch vex.vvvv */ + if (insn->vex_prefix.nbytes == 2) + reg2 = insn->vex_prefix.bytes[1]; + else if (insn->vex_prefix.nbytes == 3) + reg2 = insn->vex_prefix.bytes[2]; + /* + * TODO: add XOP, EXEV vvvv reading. + * + * vex.vvvv field is in bits 6-3, bits are inverted. + * But in 32-bit mode, high-order bit may be ignored. + * Therefore, let's consider only 3 low-order bits. + */ + reg2 = ((reg2 >> 3) & 0x7) ^ 0x7; + /* + * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15. + * + * Choose scratch reg. Order is important: must not select bx + * if we can use si (cmpxchg8b case!) + */ + if (reg != 6 && reg2 != 6) { + reg2 = 6; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI; + } else if (reg != 7 && reg2 != 7) { + reg2 = 7; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI; + /* TODO (paranoia): force maskmovq to not use di */ + } else { + reg2 = 3; + auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX; + } + /* * Point cursor at the modrm byte. The next 4 bytes are the * displacement. Beyond the displacement, for some instructions, * is the immediate operand. */ cursor = auprobe->insn + insn_offset_modrm(insn); - insn_get_length(insn); - /* - * Convert from rip-relative addressing to indirect addressing - * via a scratch register. Change the r/m field from 0x5 (%rip) - * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. + * Change modrm from "00 reg 101" to "10 reg reg2". Example: + * 89 05 disp32 mov %eax,disp32(%rip) becomes + * 89 86 disp32 mov %eax,disp32(%rsi) */ - reg = MODRM_REG(insn); - if (reg == 0) { - /* - * The register operand (if any) is either the A register - * (%rax, %eax, etc.) or (if the 0x4 bit is set in the - * REX prefix) %r8. In any case, we know the C register - * is NOT the register operand, so we use %rcx (register - * #1) for the scratch register. - */ - auprobe->fixups = UPROBE_FIX_RIP_CX; - /* Change modrm from 00 000 101 to 00 000 001. */ - *cursor = 0x1; - } else { - /* Use %rax (register #0) for the scratch register. */ - auprobe->fixups = UPROBE_FIX_RIP_AX; - /* Change modrm from 00 xxx 101 to 00 xxx 000 */ - *cursor = (reg << 3); - } - - /* Target address = address of next instruction + (signed) offset */ - auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; + *cursor = 0x80 | (reg << 3) | reg2; +} - /* Displacement field is gone; slide immediate field (if any) over. */ - if (insn->immediate.nbytes) { - cursor++; - memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); - } +static inline unsigned long * +scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_SI) + return ®s->si; + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_DI) + return ®s->di; + return ®s->bx; } /* * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register. */ -static void -pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) -{ - if (auprobe->fixups & UPROBE_FIX_RIP_AX) { - autask->saved_scratch_register = regs->ax; - regs->ax = current->utask->vaddr; - regs->ax += auprobe->rip_rela_target_address; - } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { - autask->saved_scratch_register = regs->cx; - regs->cx = current->utask->vaddr; - regs->cx += auprobe->rip_rela_target_address; - } -} - -static void -handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { - struct arch_uprobe_task *autask; - - autask = ¤t->utask->autask; - if (auprobe->fixups & UPROBE_FIX_RIP_AX) - regs->ax = autask->saved_scratch_register; - else - regs->cx = autask->saved_scratch_register; + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { + struct uprobe_task *utask = current->utask; + unsigned long *sr = scratch_reg(auprobe, regs); - /* - * The original instruction includes a displacement, and so - * is 4 bytes longer than what we've just single-stepped. - * Caller may need to apply other fixups to handle stuff - * like "jmpq *...(%rip)" and "callq *...(%rip)". - */ - if (correction) - *correction += 4; + utask->autask.saved_scratch_register = *sr; + *sr = utask->vaddr + auprobe->defparam.ilen; } } -static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { - insn_init(insn, auprobe->insn, true); - - /* Skip good instruction prefixes; reject "bad" ones. */ - insn_get_opcode(insn); - if (is_prefix_bad(insn)) - return -ENOTSUPP; + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { + struct uprobe_task *utask = current->utask; + unsigned long *sr = scratch_reg(auprobe, regs); - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) - return 0; - - if (insn->opcode.nbytes == 2) { - if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) - return 0; + *sr = utask->autask.saved_scratch_register; } - return -ENOTSUPP; } - -static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) +#else /* 32-bit: */ +static inline bool is_64bit_mm(struct mm_struct *mm) { - if (mm->context.ia32_compat) - return validate_insn_32bits(auprobe, insn); - return validate_insn_64bits(auprobe, insn); + return false; } -#else /* 32-bit: */ /* * No RIP-relative addressing on 32-bit */ -static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) +static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) { } -static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, - struct arch_uprobe_task *autask) +static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } -static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, - long *correction) +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { } - -static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) -{ - return validate_insn_32bits(auprobe, insn); -} #endif /* CONFIG_X86_64 */ struct uprobe_xol_ops { bool (*emulate)(struct arch_uprobe *, struct pt_regs *); int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); int (*post_xol)(struct arch_uprobe *, struct pt_regs *); + void (*abort)(struct arch_uprobe *, struct pt_regs *); }; static inline int sizeof_long(void) @@ -415,50 +462,67 @@ static inline int sizeof_long(void) static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { - pre_xol_rip_insn(auprobe, regs, ¤t->utask->autask); + riprel_pre_xol(auprobe, regs); return 0; } -/* - * Adjust the return address pushed by a call insn executed out of line. - */ -static int adjust_ret_addr(unsigned long sp, long correction) +static int push_ret_address(struct pt_regs *regs, unsigned long ip) { - int rasize = sizeof_long(); - long ra; - - if (copy_from_user(&ra, (void __user *)sp, rasize)) - return -EFAULT; + unsigned long new_sp = regs->sp - sizeof_long(); - ra += correction; - if (copy_to_user((void __user *)sp, &ra, rasize)) + if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) return -EFAULT; + regs->sp = new_sp; return 0; } +/* + * We have to fix things up as follows: + * + * Typically, the new ip is relative to the copied instruction. We need + * to make it relative to the original instruction (FIX_IP). Exceptions + * are return instructions and absolute or indirect jump or call instructions. + * + * If the single-stepped instruction was a call, the return address that + * is atop the stack is the address following the copied instruction. We + * need to make it the address following the original instruction (FIX_CALL). + * + * If the original instruction was a rip-relative instruction such as + * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent + * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)". + * We need to restore the contents of the scratch register + * (FIX_RIP_reg). + */ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - long correction = (long)(utask->vaddr - utask->xol_vaddr); - handle_riprel_post_xol(auprobe, regs, &correction); - if (auprobe->fixups & UPROBE_FIX_IP) + riprel_post_xol(auprobe, regs); + if (auprobe->defparam.fixups & UPROBE_FIX_IP) { + long correction = utask->vaddr - utask->xol_vaddr; regs->ip += correction; - - if (auprobe->fixups & UPROBE_FIX_CALL) { - if (adjust_ret_addr(regs->sp, correction)) { - regs->sp += sizeof_long(); + } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { + regs->sp += sizeof_long(); /* Pop incorrect return address */ + if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen)) return -ERESTART; - } } + /* popf; tell the caller to not touch TF */ + if (auprobe->defparam.fixups & UPROBE_FIX_SETF) + utask->autask.saved_tf = true; return 0; } +static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + riprel_post_xol(auprobe, regs); +} + static struct uprobe_xol_ops default_xol_ops = { .pre_xol = default_pre_xol_op, .post_xol = default_post_xol_op, + .abort = default_abort_op, }; static bool branch_is_call(struct arch_uprobe *auprobe) @@ -520,7 +584,6 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) unsigned long offs = (long)auprobe->branch.offs; if (branch_is_call(auprobe)) { - unsigned long new_sp = regs->sp - sizeof_long(); /* * If it fails we execute this (mangled, see the comment in * branch_clear_offset) insn out-of-line. In the likely case @@ -530,9 +593,8 @@ static bool branch_emulate_op(struct arch_uprobe *auprobe, struct pt_regs *regs) * * But there is corner case, see the comment in ->post_xol(). */ - if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) + if (push_ret_address(regs, new_ip)) return false; - regs->sp = new_sp; } else if (!check_jmp_cond(auprobe, regs)) { offs = 0; } @@ -583,11 +645,7 @@ static struct uprobe_xol_ops branch_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); - - /* has the side-effect of processing the entire instruction */ - insn_get_length(insn); - if (WARN_ON_ONCE(!insn_complete(insn))) - return -ENOEXEC; + int i; switch (opc1) { case 0xeb: /* jmp 8 */ @@ -612,6 +670,16 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) return -ENOSYS; } + /* + * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported. + * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. + * No one uses these insns, reject any branch insns with such prefix. + */ + for (i = 0; i < insn->prefixes.nbytes; i++) { + if (insn->prefixes.bytes[i] == 0x66) + return -ENOTSUPP; + } + auprobe->branch.opc1 = opc1; auprobe->branch.ilen = insn->length; auprobe->branch.offs = insn->immediate.value; @@ -630,10 +698,10 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) { struct insn insn; - bool fix_ip = true, fix_call = false; + u8 fix_ip_or_call = UPROBE_FIX_IP; int ret; - ret = validate_insn_bits(auprobe, mm, &insn); + ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm)); if (ret) return ret; @@ -642,44 +710,39 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, return ret; /* - * Figure out which fixups arch_uprobe_post_xol() will need to perform, - * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups - * is either zero or it reflects rip-related fixups. + * Figure out which fixups default_post_xol_op() will need to perform, + * and annotate defparam->fixups accordingly. */ switch (OPCODE1(&insn)) { case 0x9d: /* popf */ - auprobe->fixups |= UPROBE_FIX_SETF; + auprobe->defparam.fixups |= UPROBE_FIX_SETF; break; case 0xc3: /* ret or lret -- ip is correct */ case 0xcb: case 0xc2: case 0xca: - fix_ip = false; + case 0xea: /* jmp absolute -- ip is correct */ + fix_ip_or_call = 0; break; case 0x9a: /* call absolute - Fix return addr, not ip */ - fix_call = true; - fix_ip = false; - break; - case 0xea: /* jmp absolute -- ip is correct */ - fix_ip = false; + fix_ip_or_call = UPROBE_FIX_CALL; break; case 0xff: - insn_get_modrm(&insn); switch (MODRM_REG(&insn)) { case 2: case 3: /* call or lcall, indirect */ - fix_call = true; + fix_ip_or_call = UPROBE_FIX_CALL; + break; case 4: case 5: /* jmp or ljmp, indirect */ - fix_ip = false; + fix_ip_or_call = 0; + break; } /* fall through */ default: - handle_riprel_insn(auprobe, &insn); + riprel_analyze(auprobe, &insn); } - if (fix_ip) - auprobe->fixups |= UPROBE_FIX_IP; - if (fix_call) - auprobe->fixups |= UPROBE_FIX_CALL; + auprobe->defparam.ilen = insn.length; + auprobe->defparam.fixups |= fix_ip_or_call; auprobe->ops = &default_xol_ops; return 0; @@ -694,6 +757,12 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + if (auprobe->ops->pre_xol) { + int err = auprobe->ops->pre_xol(auprobe, regs); + if (err) + return err; + } + regs->ip = utask->xol_vaddr; utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; @@ -703,8 +772,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) set_task_blockstep(current, false); - if (auprobe->ops->pre_xol) - return auprobe->ops->pre_xol(auprobe, regs); return 0; } @@ -732,56 +799,42 @@ bool arch_uprobe_xol_was_trapped(struct task_struct *t) * single-step, we single-stepped a copy of the instruction. * * This function prepares to resume execution after the single-step. - * We have to fix things up as follows: - * - * Typically, the new ip is relative to the copied instruction. We need - * to make it relative to the original instruction (FIX_IP). Exceptions - * are return instructions and absolute or indirect jump or call instructions. - * - * If the single-stepped instruction was a call, the return address that - * is atop the stack is the address following the copied instruction. We - * need to make it the address following the original instruction (FIX_CALL). - * - * If the original instruction was a rip-relative instruction such as - * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent - * instruction using a scratch register -- e.g., "movl %edx,(%rax)". - * We need to restore the contents of the scratch register and adjust - * the ip, keeping in mind that the instruction we executed is 4 bytes - * shorter than the original instruction (since we squeezed out the offset - * field). (FIX_RIP_AX or FIX_RIP_CX) */ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; + bool send_sigtrap = utask->autask.saved_tf; + int err = 0; WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); + current->thread.trap_nr = utask->autask.saved_trap_nr; if (auprobe->ops->post_xol) { - int err = auprobe->ops->post_xol(auprobe, regs); + err = auprobe->ops->post_xol(auprobe, regs); if (err) { - arch_uprobe_abort_xol(auprobe, regs); /* - * Restart the probed insn. ->post_xol() must ensure - * this is really possible if it returns -ERESTART. + * Restore ->ip for restart or post mortem analysis. + * ->post_xol() must not return -ERESTART unless this + * is really possible. */ + regs->ip = utask->vaddr; if (err == -ERESTART) - return 0; - return err; + err = 0; + send_sigtrap = false; } } - - current->thread.trap_nr = utask->autask.saved_trap_nr; /* * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP * so we can get an extra SIGTRAP if we do not clear TF. We need * to examine the opcode to make it right. */ - if (utask->autask.saved_tf) + if (send_sigtrap) send_sig(SIGTRAP, current, 0); - else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + + if (!utask->autask.saved_tf) regs->flags &= ~X86_EFLAGS_TF; - return 0; + return err; } /* callback routine for handling exceptions. */ @@ -815,18 +868,18 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, /* * This function gets called when XOL instruction either gets trapped or - * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. - * Reset the instruction pointer to its probed address for the potential - * restart or for post mortem analysis. + * the thread has a fatal signal. Reset the instruction pointer to its + * probed address for the potential restart or for post mortem analysis. */ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) { struct uprobe_task *utask = current->utask; - current->thread.trap_nr = utask->autask.saved_trap_nr; - handle_riprel_post_xol(auprobe, regs, NULL); - instruction_pointer_set(regs, utask->vaddr); + if (auprobe->ops->abort) + auprobe->ops->abort(auprobe, regs); + current->thread.trap_nr = utask->autask.saved_trap_nr; + regs->ip = utask->vaddr; /* clear TF if it was set by us in arch_uprobe_pre_xol() */ if (!utask->autask.saved_tf) regs->flags &= ~X86_EFLAGS_TF; diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 2930ae05d773..28f85c916712 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -4,8 +4,8 @@ * (inspired by Andi Kleen's thunk_64.S) * Subject to the GNU public license, v.2. No warranty of any kind. */ - #include <linux/linkage.h> + #include <asm/asm.h> #ifdef CONFIG_TRACE_IRQFLAGS /* put return address in eax (arg1) */ @@ -22,6 +22,7 @@ popl %ecx popl %eax ret + _ASM_NOKPROBE(\name) .endm thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index a63efd6bb6a5..92d9feaff42b 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/calling.h> +#include <asm/asm.h> /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 @@ -25,6 +26,7 @@ call \func jmp restore CFI_ENDPROC + _ASM_NOKPROBE(\name) .endm #ifdef CONFIG_TRACE_IRQFLAGS @@ -43,3 +45,4 @@ restore: RESTORE_ARGS ret CFI_ENDPROC + _ASM_NOKPROBE(restore) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 858b47b5221b..36642793e315 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -8,7 +8,7 @@ #include <linux/kdebug.h> /* oops_begin/end, ... */ #include <linux/module.h> /* search_exception_table */ #include <linux/bootmem.h> /* max_low_pfn */ -#include <linux/kprobes.h> /* __kprobes, ... */ +#include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ #include <linux/mmiotrace.h> /* kmmio_handler, ... */ #include <linux/perf_event.h> /* perf_sw_event */ #include <linux/hugetlb.h> /* hstate_index_to_shift */ @@ -46,7 +46,7 @@ enum x86_pf_error_code { * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ -static inline int __kprobes +static nokprobe_inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) @@ -55,7 +55,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int __kprobes kprobes_fault(struct pt_regs *regs) +static nokprobe_inline int kprobes_fault(struct pt_regs *regs) { int ret = 0; @@ -262,7 +262,7 @@ void vmalloc_sync_all(void) * * Handle a fault on the vmalloc or module mapping area */ -static noinline __kprobes int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; @@ -292,6 +292,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) return 0; } +NOKPROBE_SYMBOL(vmalloc_fault); /* * Did it hit the DOS screen memory VA from vm86 mode? @@ -359,7 +360,7 @@ void vmalloc_sync_all(void) * * This assumes no large pages in there. */ -static noinline __kprobes int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -426,6 +427,7 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) return 0; } +NOKPROBE_SYMBOL(vmalloc_fault); #ifdef CONFIG_CPU_SUP_AMD static const char errata93_warning[] = @@ -928,7 +930,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline __kprobes int +static noinline int spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; @@ -976,6 +978,7 @@ spurious_fault(unsigned long error_code, unsigned long address) return ret; } +NOKPROBE_SYMBOL(spurious_fault); int show_unhandled_signals = 1; @@ -1031,7 +1034,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * {,trace_}do_page_fault() have notrace on. Having this an actual function * guarantees there's a function trace entry. */ -static void __kprobes noinline +static noinline void __do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { @@ -1254,8 +1257,9 @@ good_area: up_read(&mm->mmap_sem); } +NOKPROBE_SYMBOL(__do_page_fault); -dotraplinkage void __kprobes notrace +dotraplinkage void notrace do_page_fault(struct pt_regs *regs, unsigned long error_code) { unsigned long address = read_cr2(); /* Get the faulting address */ @@ -1273,10 +1277,12 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +NOKPROBE_SYMBOL(do_page_fault); #ifdef CONFIG_TRACING -static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, - unsigned long error_code) +static nokprobe_inline void +trace_page_fault_entries(unsigned long address, struct pt_regs *regs, + unsigned long error_code) { if (user_mode(regs)) trace_page_fault_user(address, regs, error_code); @@ -1284,7 +1290,7 @@ static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs trace_page_fault_kernel(address, regs, error_code); } -dotraplinkage void __kprobes notrace +dotraplinkage void notrace trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) { /* @@ -1301,4 +1307,5 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +NOKPROBE_SYMBOL(trace_do_page_fault); #endif /* CONFIG_TRACING */ diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 9769df094035..3c0809a0631f 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -9,18 +9,9 @@ VDSOX32-$(CONFIG_X86_X32_ABI) := y VDSO32-$(CONFIG_X86_32) := y VDSO32-$(CONFIG_COMPAT) := y -vdso-install-$(VDSO64-y) += vdso.so -vdso-install-$(VDSOX32-y) += vdsox32.so -vdso-install-$(VDSO32-y) += $(vdso32-images) - - # files to link into the vdso -vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o - -vobjs-$(VDSOX32-y) += $(vobjx32s-compat) - -# Filter out x32 objects. -vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) +vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o +vobjs-nox32 := vdso-fakesections.o # files to link into kernel obj-y += vma.o @@ -34,7 +25,7 @@ vdso_img-$(VDSO32-y) += 32-sysenter obj-$(VDSO32-y) += vdso32-setup.o -vobjs := $(foreach F,$(vobj64s),$(obj)/$F) +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) $(obj)/vdso.o: $(obj)/vdso.so @@ -104,7 +95,13 @@ VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \ -Wl,-z,max-page-size=4096 \ -Wl,-z,common-page-size=4096 -vobjx32s-y := $(vobj64s:.o=-x32.o) +# 64-bit objects to re-brand as x32 +vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y)) + +# x32-rebranded versions +vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o) + +# same thing, but in the output directory vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F) # Convert 64bit object file to x32 for x32 vDSO. @@ -176,15 +173,20 @@ VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ GCOV_PROFILE := n # -# Install the unstripped copy of vdso*.so listed in $(vdso-install-y). +# Install the unstripped copies of vdso*.so. # -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ -$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE +quiet_cmd_vdso_install = INSTALL $(@:install_%=%) + cmd_vdso_install = cp $< $(MODLIB)/vdso/$(@:install_%=%) + +vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) + +$(MODLIB)/vdso: FORCE @mkdir -p $(MODLIB)/vdso + +$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE $(call cmd,vdso_install) -PHONY += vdso_install $(vdso-install-y) -vdso_install: $(vdso-install-y) +PHONY += vdso_install $(vdso_img_insttargets) +vdso_install: $(vdso_img_insttargets) FORCE clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c new file mode 100644 index 000000000000..cb8a8d72c24b --- /dev/null +++ b/arch/x86/vdso/vdso-fakesections.c @@ -0,0 +1,32 @@ +/* + * Copyright 2014 Andy Lutomirski + * Subject to the GNU Public License, v.2 + * + * Hack to keep broken Go programs working. + * + * The Go runtime had a couple of bugs: it would read the section table to try + * to figure out how many dynamic symbols there were (it shouldn't have looked + * at the section table at all) and, if there were no SHT_SYNDYM section table + * entry, it would use an uninitialized value for the number of symbols. As a + * workaround, we supply a minimal section table. vdso2c will adjust the + * in-memory image so that "vdso_fake_sections" becomes the section table. + * + * The bug was introduced by: + * https://code.google.com/p/go/source/detail?r=56ea40aac72b (2012-08-31) + * and is being addressed in the Go runtime in this issue: + * https://code.google.com/p/go/issues/detail?id=8197 + */ + +#ifndef __x86_64__ +#error This hack is specific to the 64-bit vDSO +#endif + +#include <linux/elf.h> + +extern const __visible struct elf64_shdr vdso_fake_sections[]; +const __visible struct elf64_shdr vdso_fake_sections[] = { + { + .sh_type = SHT_DYNSYM, + .sh_entsize = sizeof(Elf64_Sym), + } +}; diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c index 450ac6eaf613..7a6bf50f9165 100644 --- a/arch/x86/vdso/vdso2c.c +++ b/arch/x86/vdso/vdso2c.c @@ -54,7 +54,7 @@ static void fail(const char *format, ...) } /* - * Evil macros to do a little-endian read. + * Evil macros for little-endian reads and writes */ #define GLE(x, bits, ifnot) \ __builtin_choose_expr( \ @@ -62,11 +62,24 @@ static void fail(const char *format, ...) (__typeof__(*(x)))get_unaligned_le##bits(x), ifnot) extern void bad_get_le(void); -#define LAST_LE(x) \ +#define LAST_GLE(x) \ __builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le()) #define GET_LE(x) \ - GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_LE(x)))) + GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x)))) + +#define PLE(x, val, bits, ifnot) \ + __builtin_choose_expr( \ + (sizeof(*(x)) == bits/8), \ + put_unaligned_le##bits((val), (x)), ifnot) + +extern void bad_put_le(void); +#define LAST_PLE(x, val) \ + __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le()) + +#define PUT_LE(x, val) \ + PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val)))) + #define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h index 8a074637a576..c6eefaf389b9 100644 --- a/arch/x86/vdso/vdso2c.h +++ b/arch/x86/vdso/vdso2c.h @@ -18,6 +18,8 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) const char *secstrings; uint64_t syms[NSYMS] = {}; + uint64_t fake_sections_value = 0, fake_sections_size = 0; + Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(&hdr->e_phoff)); /* Walk the segment table. */ @@ -84,6 +86,7 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) GET_LE(&symtab_hdr->sh_entsize) * i; const char *name = addr + GET_LE(&strtab_hdr->sh_offset) + GET_LE(&sym->st_name); + for (k = 0; k < NSYMS; k++) { if (!strcmp(name, required_syms[k])) { if (syms[k]) { @@ -93,6 +96,13 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) syms[k] = GET_LE(&sym->st_value); } } + + if (!strcmp(name, "vdso_fake_sections")) { + if (fake_sections_value) + fail("duplicate vdso_fake_sections\n"); + fake_sections_value = GET_LE(&sym->st_value); + fake_sections_size = GET_LE(&sym->st_size); + } } /* Validate mapping addresses. */ @@ -112,11 +122,14 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) if (syms[sym_end_mapping] % 4096) fail("end_mapping must be a multiple of 4096\n"); - /* Remove sections. */ - hdr->e_shoff = 0; - hdr->e_shentsize = 0; - hdr->e_shnum = 0; - hdr->e_shstrndx = htole16(SHN_UNDEF); + /* Remove sections or use fakes */ + if (fake_sections_size % sizeof(Elf_Shdr)) + fail("vdso_fake_sections size is not a multiple of %ld\n", + (long)sizeof(Elf_Shdr)); + PUT_LE(&hdr->e_shoff, fake_sections_value); + PUT_LE(&hdr->e_shentsize, fake_sections_value ? sizeof(Elf_Shdr) : 0); + PUT_LE(&hdr->e_shnum, fake_sections_size / sizeof(Elf_Shdr)); + PUT_LE(&hdr->e_shstrndx, SHN_UNDEF); if (!name) { fwrite(addr, load_size, 1, outfile); diff --git a/block/blk-core.c b/block/blk-core.c index 9aca8c71e70b..f6f6b9af3e3f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -43,6 +43,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_split); EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); DEFINE_IDA(blk_queue_ida); diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index a842c71dcc21..02351e217165 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #include <linux/nvme.h> @@ -46,16 +42,26 @@ #include <scsi/sg.h> #include <asm-generic/io-64-nonatomic-lo-hi.h> -#define NVME_Q_DEPTH 1024 +#include <trace/events/block.h> + +#define NVME_Q_DEPTH 1024 #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) #define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) -#define ADMIN_TIMEOUT (60 * HZ) -#define IOD_TIMEOUT (4 * NVME_IO_TIMEOUT) +#define ADMIN_TIMEOUT (admin_timeout * HZ) +#define IOD_TIMEOUT (retry_time * HZ) + +static unsigned char admin_timeout = 60; +module_param(admin_timeout, byte, 0644); +MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); -unsigned char io_timeout = 30; -module_param(io_timeout, byte, 0644); +unsigned char nvme_io_timeout = 30; +module_param_named(io_timeout, nvme_io_timeout, byte, 0644); MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); +static unsigned char retry_time = 30; +module_param(retry_time, byte, 0644); +MODULE_PARM_DESC(retry_time, "time in seconds to retry failed I/O"); + static int nvme_major; module_param(nvme_major, int, 0); @@ -67,6 +73,7 @@ static LIST_HEAD(dev_list); static struct task_struct *nvme_thread; static struct workqueue_struct *nvme_workq; static wait_queue_head_t nvme_kthread_wait; +static struct notifier_block nvme_nb; static void nvme_reset_failed_dev(struct work_struct *ws); @@ -199,16 +206,13 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx, #define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) #define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) #define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) -#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE) -#define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE) +#define CMD_CTX_ABORT (0x318 + CMD_CTX_BASE) static void special_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_completion *cqe) { if (ctx == CMD_CTX_CANCELLED) return; - if (ctx == CMD_CTX_FLUSH) - return; if (ctx == CMD_CTX_ABORT) { ++nvmeq->dev->abort_limit; return; @@ -247,8 +251,9 @@ static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid, void *ctx; struct nvme_cmd_info *info = nvme_cmd_info(nvmeq); - if (cmdid >= nvmeq->q_depth) { - *fn = special_completion; + if (cmdid >= nvmeq->q_depth || !info[cmdid].fn) { + if (fn) + *fn = special_completion; return CMD_CTX_INVALID; } if (fn) @@ -281,9 +286,17 @@ static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid) static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU) { + struct nvme_queue *nvmeq; unsigned queue_id = get_cpu_var(*dev->io_queue); + rcu_read_lock(); - return rcu_dereference(dev->queues[queue_id]); + nvmeq = rcu_dereference(dev->queues[queue_id]); + if (nvmeq) + return nvmeq; + + rcu_read_unlock(); + put_cpu_var(*dev->io_queue); + return NULL; } static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) @@ -295,8 +308,15 @@ static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx) __acquires(RCU) { + struct nvme_queue *nvmeq; + rcu_read_lock(); - return rcu_dereference(dev->queues[q_idx]); + nvmeq = rcu_dereference(dev->queues[q_idx]); + if (nvmeq) + return nvmeq; + + rcu_read_unlock(); + return NULL; } static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU) @@ -387,25 +407,30 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) static void nvme_start_io_acct(struct bio *bio) { struct gendisk *disk = bio->bi_bdev->bd_disk; - const int rw = bio_data_dir(bio); - int cpu = part_stat_lock(); - part_round_stats(cpu, &disk->part0); - part_stat_inc(cpu, &disk->part0, ios[rw]); - part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio)); - part_inc_in_flight(&disk->part0, rw); - part_stat_unlock(); + if (blk_queue_io_stat(disk->queue)) { + const int rw = bio_data_dir(bio); + int cpu = part_stat_lock(); + part_round_stats(cpu, &disk->part0); + part_stat_inc(cpu, &disk->part0, ios[rw]); + part_stat_add(cpu, &disk->part0, sectors[rw], + bio_sectors(bio)); + part_inc_in_flight(&disk->part0, rw); + part_stat_unlock(); + } } static void nvme_end_io_acct(struct bio *bio, unsigned long start_time) { struct gendisk *disk = bio->bi_bdev->bd_disk; - const int rw = bio_data_dir(bio); - unsigned long duration = jiffies - start_time; - int cpu = part_stat_lock(); - part_stat_add(cpu, &disk->part0, ticks[rw], duration); - part_round_stats(cpu, &disk->part0); - part_dec_in_flight(&disk->part0, rw); - part_stat_unlock(); + if (blk_queue_io_stat(disk->queue)) { + const int rw = bio_data_dir(bio); + unsigned long duration = jiffies - start_time; + int cpu = part_stat_lock(); + part_stat_add(cpu, &disk->part0, ticks[rw], duration); + part_round_stats(cpu, &disk->part0); + part_dec_in_flight(&disk->part0, rw); + part_stat_unlock(); + } } static void bio_completion(struct nvme_queue *nvmeq, void *ctx, @@ -414,6 +439,7 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_iod *iod = ctx; struct bio *bio = iod->private; u16 status = le16_to_cpup(&cqe->status) >> 1; + int error = 0; if (unlikely(status)) { if (!(status & NVME_SC_DNR || @@ -426,6 +452,7 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, wake_up(&nvmeq->sq_full); return; } + error = -EIO; } if (iod->nents) { dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents, @@ -433,10 +460,9 @@ static void bio_completion(struct nvme_queue *nvmeq, void *ctx, nvme_end_io_acct(bio, iod->start_time); } nvme_free_iod(nvmeq->dev, iod); - if (status) - bio_endio(bio, -EIO); - else - bio_endio(bio, 0); + + trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, error); + bio_endio(bio, error); } /* length is in bytes. gfp flags indicates whether we may sleep. */ @@ -525,6 +551,8 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq, if (!split) return -ENOMEM; + trace_block_split(bdev_get_queue(bio->bi_bdev), bio, + split->bi_iter.bi_sector); bio_chain(split, bio); if (!waitqueue_active(&nvmeq->sq_full)) @@ -627,16 +655,6 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, return 0; } -int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns) -{ - int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH, - special_completion, NVME_IO_TIMEOUT); - if (unlikely(cmdid < 0)) - return cmdid; - - return nvme_submit_flush(nvmeq, ns, cmdid); -} - static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) { struct bio *bio = iod->private; @@ -652,7 +670,7 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) if (bio->bi_rw & REQ_DISCARD) return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); - if ((bio->bi_rw & REQ_FLUSH) && !iod->nents) + if (bio->bi_rw & REQ_FLUSH) return nvme_submit_flush(nvmeq, ns, cmdid); control = 0; @@ -686,6 +704,26 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod) return 0; } +static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio) +{ + struct bio *split = bio_clone(bio, GFP_ATOMIC); + if (!split) + return -ENOMEM; + + split->bi_iter.bi_size = 0; + split->bi_phys_segments = 0; + bio->bi_rw &= ~REQ_FLUSH; + bio_chain(split, bio); + + if (!waitqueue_active(&nvmeq->sq_full)) + add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); + bio_list_add(&nvmeq->sq_cong, split); + bio_list_add(&nvmeq->sq_cong, bio); + wake_up_process(nvme_thread); + + return 0; +} + /* * Called with local interrupts disabled and the q_lock held. May not sleep. */ @@ -696,11 +734,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, int psegs = bio_phys_segments(ns->queue, bio); int result; - if ((bio->bi_rw & REQ_FLUSH) && psegs) { - result = nvme_submit_flush_data(nvmeq, ns); - if (result) - return result; - } + if ((bio->bi_rw & REQ_FLUSH) && psegs) + return nvme_split_flush_data(nvmeq, bio); iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC); if (!iod) @@ -795,7 +830,6 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio) int result = -EBUSY; if (!nvmeq) { - put_nvmeq(NULL); bio_endio(bio, -EIO); return; } @@ -870,10 +904,8 @@ static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx, struct nvme_queue *nvmeq; nvmeq = lock_nvmeq(dev, q_idx); - if (!nvmeq) { - unlock_nvmeq(nvmeq); + if (!nvmeq) return -ENODEV; - } cmdinfo.task = current; cmdinfo.status = -EINTR; @@ -898,9 +930,10 @@ static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx, if (cmdinfo.status == -EINTR) { nvmeq = lock_nvmeq(dev, q_idx); - if (nvmeq) + if (nvmeq) { nvme_abort_command(nvmeq, cmdid); - unlock_nvmeq(nvmeq); + unlock_nvmeq(nvmeq); + } return -EINTR; } @@ -1358,7 +1391,8 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) return -EINTR; if (time_after(jiffies, timeout)) { dev_err(&dev->pci_dev->dev, - "Device not ready; aborting initialisation\n"); + "Device not ready; aborting %s\n", enabled ? + "initialisation" : "reset"); return -ENODEV; } } @@ -1481,7 +1515,11 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, goto put_pages; } + err = -ENOMEM; iod = nvme_alloc_iod(count, length, GFP_KERNEL); + if (!iod) + goto put_pages; + sg = iod->sg; sg_init_table(sg, count); for (i = 0; i < count; i++) { @@ -1494,7 +1532,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, sg_mark_end(&sg[i - 1]); iod->nents = count; - err = -ENOMEM; nents = dma_map_sg(&dev->pci_dev->dev, sg, count, write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (!nents) @@ -1894,6 +1931,8 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid, blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); if (dev->max_hw_sectors) blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); + if (dev->vwc & NVME_CTRL_VWC_PRESENT) + blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); disk->major = nvme_major; disk->first_minor = 0; @@ -2062,8 +2101,13 @@ static int set_queue_count(struct nvme_dev *dev, int count) status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, &result); - if (status) - return status < 0 ? -EIO : -EBUSY; + if (status < 0) + return status; + if (status > 0) { + dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n", + status); + return -EBUSY; + } return min(result & 0xffff, result >> 16) + 1; } @@ -2072,14 +2116,25 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); } +static void nvme_cpu_workfn(struct work_struct *work) +{ + struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work); + if (dev->initialized) + nvme_assign_io_queues(dev); +} + static int nvme_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - struct nvme_dev *dev = container_of(self, struct nvme_dev, nb); + struct nvme_dev *dev; + switch (action) { case CPU_ONLINE: case CPU_DEAD: - nvme_assign_io_queues(dev); + spin_lock(&dev_list_lock); + list_for_each_entry(dev, &dev_list, node) + schedule_work(&dev->cpu_work); + spin_unlock(&dev_list_lock); break; } return NOTIFY_OK; @@ -2148,11 +2203,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) nvme_free_queues(dev, nr_io_queues + 1); nvme_assign_io_queues(dev); - dev->nb.notifier_call = &nvme_cpu_notify; - result = register_hotcpu_notifier(&dev->nb); - if (result) - goto free_queues; - return 0; free_queues: @@ -2184,6 +2234,7 @@ static int nvme_dev_add(struct nvme_dev *dev) res = nvme_identify(dev, 0, 1, dma_addr); if (res) { + dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res); res = -EIO; goto out; } @@ -2192,6 +2243,7 @@ static int nvme_dev_add(struct nvme_dev *dev) nn = le32_to_cpup(&ctrl->nn); dev->oncs = le16_to_cpup(&ctrl->oncs); dev->abort_limit = ctrl->acl + 1; + dev->vwc = ctrl->vwc; memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); @@ -2450,8 +2502,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) int i; dev->initialized = 0; - unregister_hotcpu_notifier(&dev->nb); - nvme_dev_list_remove(dev); if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) { @@ -2722,6 +2772,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&dev->namespaces); dev->reset_workfn = nvme_reset_failed_dev; INIT_WORK(&dev->reset_work, nvme_reset_workfn); + INIT_WORK(&dev->cpu_work, nvme_cpu_workfn); dev->pci_dev = pdev; pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); @@ -2801,6 +2852,7 @@ static void nvme_remove(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); flush_work(&dev->reset_work); + flush_work(&dev->cpu_work); misc_deregister(&dev->miscdev); nvme_dev_remove(dev); nvme_dev_shutdown(dev); @@ -2889,11 +2941,18 @@ static int __init nvme_init(void) else if (result > 0) nvme_major = result; - result = pci_register_driver(&nvme_driver); + nvme_nb.notifier_call = &nvme_cpu_notify; + result = register_hotcpu_notifier(&nvme_nb); if (result) goto unregister_blkdev; + + result = pci_register_driver(&nvme_driver); + if (result) + goto unregister_hotcpu; return 0; + unregister_hotcpu: + unregister_hotcpu_notifier(&nvme_nb); unregister_blkdev: unregister_blkdev(nvme_major, "nvme"); kill_workq: @@ -2904,9 +2963,11 @@ static int __init nvme_init(void) static void __exit nvme_exit(void) { pci_unregister_driver(&nvme_driver); + unregister_hotcpu_notifier(&nvme_nb); unregister_blkdev(nvme_major, "nvme"); destroy_workqueue(nvme_workq); BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); + _nvme_check_size(); } MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index 2c3f5be06da1..a4cd6d691c63 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -1,6 +1,6 @@ /* * NVM Express device driver - * Copyright (c) 2011, Intel Corporation. + * Copyright (c) 2011-2014, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,10 +10,6 @@ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ /* @@ -243,8 +239,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #define READ_CAP_16_RESP_SIZE 32 /* NVMe Namespace and Command Defines */ -#define NVME_GET_SMART_LOG_PAGE 0x02 -#define NVME_GET_FEAT_TEMP_THRESH 0x04 #define BYTES_TO_DWORDS 4 #define NVME_MAX_FIRMWARE_SLOT 7 @@ -686,6 +680,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, u8 resp_data_format = 0x02; u8 protect; u8 cmdque = 0x01 << 1; + u8 fw_offset = sizeof(dev->firmware_rev); mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL); @@ -721,7 +716,11 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns, inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */ strncpy(&inq_response[8], "NVMe ", 8); strncpy(&inq_response[16], dev->model, 16); - strncpy(&inq_response[32], dev->firmware_rev, 4); + + while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4) + fw_offset--; + fw_offset -= 4; + strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4); xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH); res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len); @@ -1018,8 +1017,8 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, c.common.opcode = nvme_admin_get_log_page; c.common.nsid = cpu_to_le32(0xFFFFFFFF); c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE); + c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / + BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); res = nvme_submit_admin_cmd(dev, &c, NULL); if (res != NVME_SC_SUCCESS) { temp_c = LOG_TEMP_UNKNOWN; @@ -1086,8 +1085,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, c.common.opcode = nvme_admin_get_log_page; c.common.nsid = cpu_to_le32(0xFFFFFFFF); c.common.prp1 = cpu_to_le64(dma_addr); - c.common.cdw10[0] = cpu_to_le32(((sizeof(struct nvme_smart_log) / - BYTES_TO_DWORDS) << 16) | NVME_GET_SMART_LOG_PAGE); + c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) / + BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART); res = nvme_submit_admin_cmd(dev, &c, NULL); if (res != NVME_SC_SUCCESS) { temp_c_cur = LOG_TEMP_UNKNOWN; @@ -1477,7 +1476,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, goto out_dma; } id_ctrl = mem; - lowest_pow_st = id_ctrl->npss - 1; + lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1)); switch (pc) { case NVME_POWER_STATE_START_VALID: @@ -1494,20 +1493,19 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr, break; case NVME_POWER_STATE_IDLE: /* Action unspecified if POWER CONDITION MODIFIER != [0,1,2] */ - /* min of desired state and (lps-1) because lps is STOP */ if (pcmod == 0x0) - ps_desired = min(POWER_STATE_1, (lowest_pow_st - 1)); + ps_desired = POWER_STATE_1; else if (pcmod == 0x1) - ps_desired = min(POWER_STATE_2, (lowest_pow_st - 1)); + ps_desired = POWER_STATE_2; else if (pcmod == 0x2) - ps_desired = min(POWER_STATE_3, (lowest_pow_st - 1)); + ps_desired = POWER_STATE_3; break; case NVME_POWER_STATE_STANDBY: /* Action unspecified if POWER CONDITION MODIFIER != [0,1] */ if (pcmod == 0x0) - ps_desired = max(0, (lowest_pow_st - 2)); + ps_desired = max(POWER_STATE_0, (lowest_pow_st - 2)); else if (pcmod == 0x1) - ps_desired = max(0, (lowest_pow_st - 1)); + ps_desired = max(POWER_STATE_0, (lowest_pow_st - 1)); break; case NVME_POWER_STATE_LU_CONTROL: default: diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4c95b503b09e..bbeb404b3a07 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -541,7 +541,6 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) return -ENOENT; (void) get_device(&rbd_dev->dev); - set_device_ro(bdev, rbd_dev->mapping.read_only); return 0; } @@ -559,10 +558,76 @@ static void rbd_release(struct gendisk *disk, fmode_t mode) put_device(&rbd_dev->dev); } +static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) +{ + int ret = 0; + int val; + bool ro; + bool ro_changed = false; + + /* get_user() may sleep, so call it before taking rbd_dev->lock */ + if (get_user(val, (int __user *)(arg))) + return -EFAULT; + + ro = val ? true : false; + /* Snapshot doesn't allow to write*/ + if (rbd_dev->spec->snap_id != CEPH_NOSNAP && !ro) + return -EROFS; + + spin_lock_irq(&rbd_dev->lock); + /* prevent others open this device */ + if (rbd_dev->open_count > 1) { + ret = -EBUSY; + goto out; + } + + if (rbd_dev->mapping.read_only != ro) { + rbd_dev->mapping.read_only = ro; + ro_changed = true; + } + +out: + spin_unlock_irq(&rbd_dev->lock); + /* set_disk_ro() may sleep, so call it after releasing rbd_dev->lock */ + if (ret == 0 && ro_changed) + set_disk_ro(rbd_dev->disk, ro ? 1 : 0); + + return ret; +} + +static int rbd_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct rbd_device *rbd_dev = bdev->bd_disk->private_data; + int ret = 0; + + switch (cmd) { + case BLKROSET: + ret = rbd_ioctl_set_ro(rbd_dev, arg); + break; + default: + ret = -ENOTTY; + } + + return ret; +} + +#ifdef CONFIG_COMPAT +static int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + return rbd_ioctl(bdev, mode, cmd, arg); +} +#endif /* CONFIG_COMPAT */ + static const struct block_device_operations rbd_bd_ops = { .owner = THIS_MODULE, .open = rbd_open, .release = rbd_release, + .ioctl = rbd_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = rbd_compat_ioctl, +#endif }; /* @@ -1382,6 +1447,13 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request) kref_put(&obj_request->kref, rbd_obj_request_destroy); } +static void rbd_img_request_get(struct rbd_img_request *img_request) +{ + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_get(&img_request->kref); +} + static bool img_request_child_test(struct rbd_img_request *img_request); static void rbd_parent_request_destroy(struct kref *kref); static void rbd_img_request_destroy(struct kref *kref); @@ -2142,6 +2214,7 @@ static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) img_request->next_completion = which; out: spin_unlock_irq(&img_request->completion_lock); + rbd_img_request_put(img_request); if (!more) rbd_img_request_complete(img_request); @@ -2242,6 +2315,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, goto out_unwind; obj_request->osd_req = osd_req; obj_request->callback = rbd_img_obj_callback; + rbd_img_request_get(img_request); if (write_request) { osd_req_op_alloc_hint_init(osd_req, which, @@ -2872,56 +2946,55 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) } /* - * Request sync osd watch/unwatch. The value of "start" determines - * whether a watch request is being initiated or torn down. + * Initiate a watch request, synchronously. */ -static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start) +static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; int ret; - rbd_assert(start ^ !!rbd_dev->watch_event); - rbd_assert(start ^ !!rbd_dev->watch_request); + rbd_assert(!rbd_dev->watch_event); + rbd_assert(!rbd_dev->watch_request); - if (start) { - ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, - &rbd_dev->watch_event); - if (ret < 0) - return ret; - rbd_assert(rbd_dev->watch_event != NULL); - } + ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, + &rbd_dev->watch_event); + if (ret < 0) + return ret; + + rbd_assert(rbd_dev->watch_event); - ret = -ENOMEM; obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, - OBJ_REQUEST_NODATA); - if (!obj_request) + OBJ_REQUEST_NODATA); + if (!obj_request) { + ret = -ENOMEM; goto out_cancel; + } obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1, obj_request); - if (!obj_request->osd_req) - goto out_cancel; + if (!obj_request->osd_req) { + ret = -ENOMEM; + goto out_put; + } - if (start) - ceph_osdc_set_request_linger(osdc, obj_request->osd_req); - else - ceph_osdc_unregister_linger_request(osdc, - rbd_dev->watch_request->osd_req); + ceph_osdc_set_request_linger(osdc, obj_request->osd_req); osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, - rbd_dev->watch_event->cookie, 0, start ? 1 : 0); + rbd_dev->watch_event->cookie, 0, 1); rbd_osd_req_format_write(obj_request); ret = rbd_obj_request_submit(osdc, obj_request); if (ret) - goto out_cancel; + goto out_linger; + ret = rbd_obj_request_wait(obj_request); if (ret) - goto out_cancel; + goto out_linger; + ret = obj_request->result; if (ret) - goto out_cancel; + goto out_linger; /* * A watch request is set to linger, so the underlying osd @@ -2931,36 +3004,84 @@ static int __rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start) * it. We'll drop that reference (below) after we've * unregistered it. */ - if (start) { - rbd_dev->watch_request = obj_request; + rbd_dev->watch_request = obj_request; - return 0; + return 0; + +out_linger: + ceph_osdc_unregister_linger_request(osdc, obj_request->osd_req); +out_put: + rbd_obj_request_put(obj_request); +out_cancel: + ceph_osdc_cancel_event(rbd_dev->watch_event); + rbd_dev->watch_event = NULL; + + return ret; +} + +/* + * Tear down a watch request, synchronously. + */ +static int __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev) +{ + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; + int ret; + + rbd_assert(rbd_dev->watch_event); + rbd_assert(rbd_dev->watch_request); + + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) { + ret = -ENOMEM; + goto out_cancel; + } + + obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1, + obj_request); + if (!obj_request->osd_req) { + ret = -ENOMEM; + goto out_put; } + osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, + rbd_dev->watch_event->cookie, 0, 0); + rbd_osd_req_format_write(obj_request); + + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out_put; + + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out_put; + + ret = obj_request->result; + if (ret) + goto out_put; + /* We have successfully torn down the watch request */ + ceph_osdc_unregister_linger_request(osdc, + rbd_dev->watch_request->osd_req); rbd_obj_request_put(rbd_dev->watch_request); rbd_dev->watch_request = NULL; + +out_put: + rbd_obj_request_put(obj_request); out_cancel: - /* Cancel the event if we're tearing down, or on error */ ceph_osdc_cancel_event(rbd_dev->watch_event); rbd_dev->watch_event = NULL; - if (obj_request) - rbd_obj_request_put(obj_request); return ret; } -static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev) -{ - return __rbd_dev_header_watch_sync(rbd_dev, true); -} - static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev) { int ret; - ret = __rbd_dev_header_watch_sync(rbd_dev, false); + ret = __rbd_dev_header_unwatch_sync(rbd_dev); if (ret) { rbd_warn(rbd_dev, "unable to tear down watch request: %d\n", ret); @@ -3058,7 +3179,6 @@ static void rbd_request_fn(struct request_queue *q) __releases(q->queue_lock) __acquires(q->queue_lock) { struct rbd_device *rbd_dev = q->queuedata; - bool read_only = rbd_dev->mapping.read_only; struct request *rq; int result; @@ -3094,7 +3214,7 @@ static void rbd_request_fn(struct request_queue *q) if (write_request) { result = -EROFS; - if (read_only) + if (rbd_dev->mapping.read_only) goto end_request; rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); } @@ -4683,6 +4803,38 @@ out_err: } /* + * Return pool id (>= 0) or a negative error code. + */ +static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name) +{ + u64 newest_epoch; + unsigned long timeout = rbdc->client->options->mount_timeout * HZ; + int tries = 0; + int ret; + +again: + ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name); + if (ret == -ENOENT && tries++ < 1) { + ret = ceph_monc_do_get_version(&rbdc->client->monc, "osdmap", + &newest_epoch); + if (ret < 0) + return ret; + + if (rbdc->client->osdc.osdmap->epoch < newest_epoch) { + ceph_monc_request_next_osdmap(&rbdc->client->monc); + (void) ceph_monc_wait_osdmap(&rbdc->client->monc, + newest_epoch, timeout); + goto again; + } else { + /* the osdmap we have is new enough */ + return -ENOENT; + } + } + + return ret; +} + +/* * An rbd format 2 image has a unique identifier, distinct from the * name given to it by the user. Internally, that identifier is * what's used to specify the names of objects related to the image. @@ -4752,7 +4904,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) image_id = ceph_extract_encoded_string(&p, p + ret, NULL, GFP_NOIO); - ret = IS_ERR(image_id) ? PTR_ERR(image_id) : 0; + ret = PTR_ERR_OR_ZERO(image_id); if (!ret) rbd_dev->image_format = 2; } else { @@ -4907,6 +5059,7 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) if (ret) goto err_out_disk; set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); + set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only); ret = rbd_bus_add_dev(rbd_dev); if (ret) @@ -5053,7 +5206,6 @@ static ssize_t do_rbd_add(struct bus_type *bus, struct rbd_options *rbd_opts = NULL; struct rbd_spec *spec = NULL; struct rbd_client *rbdc; - struct ceph_osd_client *osdc; bool read_only; int rc = -ENOMEM; @@ -5075,8 +5227,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, } /* pick the pool */ - osdc = &rbdc->client->osdc; - rc = ceph_pg_poolid_by_name(osdc->osdmap, spec->pool_name); + rc = rbd_add_get_pool_id(rbdc, spec->pool_name); if (rc < 0) goto err_out_client; spec->pool_id = (u64)rc; @@ -5387,6 +5538,7 @@ err_out_slab: static void __exit rbd_exit(void) { + ida_destroy(&rbd_dev_id_ida); rbd_sysfs_cleanup(); if (single_major) unregister_blkdev(rbd_major, RBD_DRV_NAME); diff --git a/drivers/clk/sunxi/Makefile b/drivers/clk/sunxi/Makefile index b5bac917612c..762fd64dbd1f 100644 --- a/drivers/clk/sunxi/Makefile +++ b/drivers/clk/sunxi/Makefile @@ -3,3 +3,7 @@ # obj-y += clk-sunxi.o clk-factors.o +obj-y += clk-a10-hosc.o +obj-y += clk-a20-gmac.o + +obj-$(CONFIG_MFD_SUN6I_PRCM) += clk-sun6i-ar100.o clk-sun6i-apb0.o clk-sun6i-apb0-gates.o diff --git a/drivers/clk/sunxi/clk-a10-hosc.c b/drivers/clk/sunxi/clk-a10-hosc.c new file mode 100644 index 000000000000..0481d5d673d6 --- /dev/null +++ b/drivers/clk/sunxi/clk-a10-hosc.c @@ -0,0 +1,73 @@ +/* + * Copyright 2013 Emilio López + * + * Emilio López <emilio@elopez.com.ar> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk-provider.h> +#include <linux/clkdev.h> +#include <linux/of.h> +#include <linux/of_address.h> + +#define SUNXI_OSC24M_GATE 0 + +static DEFINE_SPINLOCK(hosc_lock); + +static void __init sun4i_osc_clk_setup(struct device_node *node) +{ + struct clk *clk; + struct clk_fixed_rate *fixed; + struct clk_gate *gate; + const char *clk_name = node->name; + u32 rate; + + if (of_property_read_u32(node, "clock-frequency", &rate)) + return; + + /* allocate fixed-rate and gate clock structs */ + fixed = kzalloc(sizeof(struct clk_fixed_rate), GFP_KERNEL); + if (!fixed) + return; + gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); + if (!gate) + goto err_free_fixed; + + of_property_read_string(node, "clock-output-names", &clk_name); + + /* set up gate and fixed rate properties */ + gate->reg = of_iomap(node, 0); + gate->bit_idx = SUNXI_OSC24M_GATE; + gate->lock = &hosc_lock; + fixed->fixed_rate = rate; + + clk = clk_register_composite(NULL, clk_name, + NULL, 0, + NULL, NULL, + &fixed->hw, &clk_fixed_rate_ops, + &gate->hw, &clk_gate_ops, + CLK_IS_ROOT); + + if (IS_ERR(clk)) + goto err_free_gate; + + of_clk_add_provider(node, of_clk_src_simple_get, clk); + clk_register_clkdev(clk, clk_name, NULL); + + return; + +err_free_gate: + kfree(gate); +err_free_fixed: + kfree(fixed); +} +CLK_OF_DECLARE(sun4i_osc, "allwinner,sun4i-a10-osc-clk", sun4i_osc_clk_setup); diff --git a/drivers/clk/sunxi/clk-a20-gmac.c b/drivers/clk/sunxi/clk-a20-gmac.c new file mode 100644 index 000000000000..633ddc4389ef --- /dev/null +++ b/drivers/clk/sunxi/clk-a20-gmac.c @@ -0,0 +1,119 @@ +/* + * Copyright 2013 Emilio López + * Emilio López <emilio@elopez.com.ar> + * + * Copyright 2013 Chen-Yu Tsai + * Chen-Yu Tsai <wens@csie.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/clk-provider.h> +#include <linux/clkdev.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/slab.h> + +static DEFINE_SPINLOCK(gmac_lock); + +/** + * sun7i_a20_gmac_clk_setup - Setup function for A20/A31 GMAC clock module + * + * This clock looks something like this + * ________________________ + * MII TX clock from PHY >-----|___________ _________|----> to GMAC core + * GMAC Int. RGMII TX clk >----|___________\__/__gate---|----> to PHY + * Ext. 125MHz RGMII TX clk >--|__divider__/ | + * |________________________| + * + * The external 125 MHz reference is optional, i.e. GMAC can use its + * internal TX clock just fine. The A31 GMAC clock module does not have + * the divider controls for the external reference. + * + * To keep it simple, let the GMAC use either the MII TX clock for MII mode, + * and its internal TX clock for GMII and RGMII modes. The GMAC driver should + * select the appropriate source and gate/ungate the output to the PHY. + * + * Only the GMAC should use this clock. Altering the clock so that it doesn't + * match the GMAC's operation parameters will result in the GMAC not being + * able to send traffic out. The GMAC driver should set the clock rate and + * enable/disable this clock to configure the required state. The clock + * driver then responds by auto-reparenting the clock. + */ + +#define SUN7I_A20_GMAC_GPIT 2 +#define SUN7I_A20_GMAC_MASK 0x3 +#define SUN7I_A20_GMAC_PARENTS 2 + +static void __init sun7i_a20_gmac_clk_setup(struct device_node *node) +{ + struct clk *clk; + struct clk_mux *mux; + struct clk_gate *gate; + const char *clk_name = node->name; + const char *parents[SUN7I_A20_GMAC_PARENTS]; + void *reg; + + if (of_property_read_string(node, "clock-output-names", &clk_name)) + return; + + /* allocate mux and gate clock structs */ + mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL); + if (!mux) + return; + + gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); + if (!gate) + goto free_mux; + + /* gmac clock requires exactly 2 parents */ + parents[0] = of_clk_get_parent_name(node, 0); + parents[1] = of_clk_get_parent_name(node, 1); + if (!parents[0] || !parents[1]) + goto free_gate; + + reg = of_iomap(node, 0); + if (!reg) + goto free_gate; + + /* set up gate and fixed rate properties */ + gate->reg = reg; + gate->bit_idx = SUN7I_A20_GMAC_GPIT; + gate->lock = &gmac_lock; + mux->reg = reg; + mux->mask = SUN7I_A20_GMAC_MASK; + mux->flags = CLK_MUX_INDEX_BIT; + mux->lock = &gmac_lock; + + clk = clk_register_composite(NULL, clk_name, + parents, SUN7I_A20_GMAC_PARENTS, + &mux->hw, &clk_mux_ops, + NULL, NULL, + &gate->hw, &clk_gate_ops, + 0); + + if (IS_ERR(clk)) + goto iounmap_reg; + + of_clk_add_provider(node, of_clk_src_simple_get, clk); + clk_register_clkdev(clk, clk_name, NULL); + + return; + +iounmap_reg: + iounmap(reg); +free_gate: + kfree(gate); +free_mux: + kfree(mux); +} +CLK_OF_DECLARE(sun7i_a20_gmac, "allwinner,sun7i-a20-gmac-clk", + sun7i_a20_gmac_clk_setup); diff --git a/drivers/clk/sunxi/clk-sun6i-apb0-gates.c b/drivers/clk/sunxi/clk-sun6i-apb0-gates.c new file mode 100644 index 000000000000..44cd27c5c401 --- /dev/null +++ b/drivers/clk/sunxi/clk-sun6i-apb0-gates.c @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2014 Free Electrons + * + * License Terms: GNU General Public License v2 + * Author: Boris BREZILLON <boris.brezillon@free-electrons.com> + * + * Allwinner A31 APB0 clock gates driver + * + */ + +#include <linux/clk-provider.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +#define SUN6I_APB0_GATES_MAX_SIZE 32 + +static int sun6i_a31_apb0_gates_clk_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct clk_onecell_data *clk_data; + const char *clk_parent; + const char *clk_name; + struct resource *r; + void __iomem *reg; + int gate_id; + int ngates; + int i; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + reg = devm_ioremap_resource(&pdev->dev, r); + if (!reg) + return PTR_ERR(reg); + + clk_parent = of_clk_get_parent_name(np, 0); + if (!clk_parent) + return -EINVAL; + + ngates = of_property_count_strings(np, "clock-output-names"); + if (ngates < 0) + return ngates; + + if (!ngates || ngates > SUN6I_APB0_GATES_MAX_SIZE) + return -EINVAL; + + clk_data = devm_kzalloc(&pdev->dev, sizeof(struct clk_onecell_data), + GFP_KERNEL); + if (!clk_data) + return -ENOMEM; + + clk_data->clks = devm_kzalloc(&pdev->dev, + SUN6I_APB0_GATES_MAX_SIZE * + sizeof(struct clk *), + GFP_KERNEL); + if (!clk_data->clks) + return -ENOMEM; + + for (i = 0; i < ngates; i++) { + of_property_read_string_index(np, "clock-output-names", + i, &clk_name); + + gate_id = i; + of_property_read_u32_index(np, "clock-indices", i, &gate_id); + + WARN_ON(gate_id >= SUN6I_APB0_GATES_MAX_SIZE); + if (gate_id >= SUN6I_APB0_GATES_MAX_SIZE) + continue; + + clk_data->clks[gate_id] = clk_register_gate(&pdev->dev, + clk_name, + clk_parent, 0, + reg, gate_id, + 0, NULL); + WARN_ON(IS_ERR(clk_data->clks[gate_id])); + } + + clk_data->clk_num = ngates; + + return of_clk_add_provider(np, of_clk_src_onecell_get, clk_data); +} + +const struct of_device_id sun6i_a31_apb0_gates_clk_dt_ids[] = { + { .compatible = "allwinner,sun6i-a31-apb0-gates-clk" }, + { /* sentinel */ } +}; + +static struct platform_driver sun6i_a31_apb0_gates_clk_driver = { + .driver = { + .name = "sun6i-a31-apb0-gates-clk", + .owner = THIS_MODULE, + .of_match_table = sun6i_a31_apb0_gates_clk_dt_ids, + }, + .probe = sun6i_a31_apb0_gates_clk_probe, +}; +module_platform_driver(sun6i_a31_apb0_gates_clk_driver); + +MODULE_AUTHOR("Boris BREZILLON <boris.brezillon@free-electrons.com>"); +MODULE_DESCRIPTION("Allwinner A31 APB0 gate clocks driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/sunxi/clk-sun6i-apb0.c b/drivers/clk/sunxi/clk-sun6i-apb0.c new file mode 100644 index 000000000000..11f17c34c2ae --- /dev/null +++ b/drivers/clk/sunxi/clk-sun6i-apb0.c @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2014 Free Electrons + * + * License Terms: GNU General Public License v2 + * Author: Boris BREZILLON <boris.brezillon@free-electrons.com> + * + * Allwinner A31 APB0 clock driver + * + */ + +#include <linux/clk-provider.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +/* + * The APB0 clk has a configurable divisor. + * + * We must use a clk_div_table and not a regular power of 2 + * divisor here, because the first 2 values divide the clock + * by 2. + */ +static const struct clk_div_table sun6i_a31_apb0_divs[] = { + { .val = 0, .div = 2, }, + { .val = 1, .div = 2, }, + { .val = 2, .div = 4, }, + { .val = 3, .div = 8, }, + { /* sentinel */ }, +}; + +static int sun6i_a31_apb0_clk_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + const char *clk_name = np->name; + const char *clk_parent; + struct resource *r; + void __iomem *reg; + struct clk *clk; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + reg = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(reg)) + return PTR_ERR(reg); + + clk_parent = of_clk_get_parent_name(np, 0); + if (!clk_parent) + return -EINVAL; + + of_property_read_string(np, "clock-output-names", &clk_name); + + clk = clk_register_divider_table(&pdev->dev, clk_name, clk_parent, + 0, reg, 0, 2, 0, sun6i_a31_apb0_divs, + NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + return of_clk_add_provider(np, of_clk_src_simple_get, clk); +} + +const struct of_device_id sun6i_a31_apb0_clk_dt_ids[] = { + { .compatible = "allwinner,sun6i-a31-apb0-clk" }, + { /* sentinel */ } +}; + +static struct platform_driver sun6i_a31_apb0_clk_driver = { + .driver = { + .name = "sun6i-a31-apb0-clk", + .owner = THIS_MODULE, + .of_match_table = sun6i_a31_apb0_clk_dt_ids, + }, + .probe = sun6i_a31_apb0_clk_probe, +}; +module_platform_driver(sun6i_a31_apb0_clk_driver); + +MODULE_AUTHOR("Boris BREZILLON <boris.brezillon@free-electrons.com>"); +MODULE_DESCRIPTION("Allwinner A31 APB0 clock Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/sunxi/clk-sun6i-ar100.c b/drivers/clk/sunxi/clk-sun6i-ar100.c new file mode 100644 index 000000000000..f73cc051f0dd --- /dev/null +++ b/drivers/clk/sunxi/clk-sun6i-ar100.c @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2014 Free Electrons + * + * License Terms: GNU General Public License v2 + * Author: Boris BREZILLON <boris.brezillon@free-electrons.com> + * + * Allwinner A31 AR100 clock driver + * + */ + +#include <linux/clk-provider.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> + +#define SUN6I_AR100_MAX_PARENTS 4 +#define SUN6I_AR100_SHIFT_MASK 0x3 +#define SUN6I_AR100_SHIFT_MAX SUN6I_AR100_SHIFT_MASK +#define SUN6I_AR100_SHIFT_SHIFT 4 +#define SUN6I_AR100_DIV_MASK 0x1f +#define SUN6I_AR100_DIV_MAX (SUN6I_AR100_DIV_MASK + 1) +#define SUN6I_AR100_DIV_SHIFT 8 +#define SUN6I_AR100_MUX_MASK 0x3 +#define SUN6I_AR100_MUX_SHIFT 16 + +struct ar100_clk { + struct clk_hw hw; + void __iomem *reg; +}; + +static inline struct ar100_clk *to_ar100_clk(struct clk_hw *hw) +{ + return container_of(hw, struct ar100_clk, hw); +} + +static unsigned long ar100_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct ar100_clk *clk = to_ar100_clk(hw); + u32 val = readl(clk->reg); + int shift = (val >> SUN6I_AR100_SHIFT_SHIFT) & SUN6I_AR100_SHIFT_MASK; + int div = (val >> SUN6I_AR100_DIV_SHIFT) & SUN6I_AR100_DIV_MASK; + + return (parent_rate >> shift) / (div + 1); +} + +static long ar100_determine_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *best_parent_rate, + struct clk **best_parent_clk) +{ + int nparents = __clk_get_num_parents(hw->clk); + long best_rate = -EINVAL; + int i; + + *best_parent_clk = NULL; + + for (i = 0; i < nparents; i++) { + unsigned long parent_rate; + unsigned long tmp_rate; + struct clk *parent; + unsigned long div; + int shift; + + parent = clk_get_parent_by_index(hw->clk, i); + parent_rate = __clk_get_rate(parent); + div = DIV_ROUND_UP(parent_rate, rate); + + /* + * The AR100 clk contains 2 divisors: + * - one power of 2 divisor + * - one regular divisor + * + * First check if we can safely shift (or divide by a power + * of 2) without losing precision on the requested rate. + */ + shift = ffs(div) - 1; + if (shift > SUN6I_AR100_SHIFT_MAX) + shift = SUN6I_AR100_SHIFT_MAX; + + div >>= shift; + + /* + * Then if the divisor is still bigger than what the HW + * actually supports, use a bigger shift (or power of 2 + * divider) value and accept to lose some precision. + */ + while (div > SUN6I_AR100_DIV_MAX) { + shift++; + div >>= 1; + if (shift > SUN6I_AR100_SHIFT_MAX) + break; + } + + /* + * If the shift value (or power of 2 divider) is bigger + * than what the HW actually support, skip this parent. + */ + if (shift > SUN6I_AR100_SHIFT_MAX) + continue; + + tmp_rate = (parent_rate >> shift) / div; + if (!*best_parent_clk || tmp_rate > best_rate) { + *best_parent_clk = parent; + *best_parent_rate = parent_rate; + best_rate = tmp_rate; + } + } + + return best_rate; +} + +static int ar100_set_parent(struct clk_hw *hw, u8 index) +{ + struct ar100_clk *clk = to_ar100_clk(hw); + u32 val = readl(clk->reg); + + if (index >= SUN6I_AR100_MAX_PARENTS) + return -EINVAL; + + val &= ~(SUN6I_AR100_MUX_MASK << SUN6I_AR100_MUX_SHIFT); + val |= (index << SUN6I_AR100_MUX_SHIFT); + writel(val, clk->reg); + + return 0; +} + +static u8 ar100_get_parent(struct clk_hw *hw) +{ + struct ar100_clk *clk = to_ar100_clk(hw); + return (readl(clk->reg) >> SUN6I_AR100_MUX_SHIFT) & + SUN6I_AR100_MUX_MASK; +} + +static int ar100_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + unsigned long div = parent_rate / rate; + struct ar100_clk *clk = to_ar100_clk(hw); + u32 val = readl(clk->reg); + int shift; + + if (parent_rate % rate) + return -EINVAL; + + shift = ffs(div) - 1; + if (shift > SUN6I_AR100_SHIFT_MAX) + shift = SUN6I_AR100_SHIFT_MAX; + + div >>= shift; + + if (div > SUN6I_AR100_DIV_MAX) + return -EINVAL; + + val &= ~((SUN6I_AR100_SHIFT_MASK << SUN6I_AR100_SHIFT_SHIFT) | + (SUN6I_AR100_DIV_MASK << SUN6I_AR100_DIV_SHIFT)); + val |= (shift << SUN6I_AR100_SHIFT_SHIFT) | + (div << SUN6I_AR100_DIV_SHIFT); + writel(val, clk->reg); + + return 0; +} + +struct clk_ops ar100_ops = { + .recalc_rate = ar100_recalc_rate, + .determine_rate = ar100_determine_rate, + .set_parent = ar100_set_parent, + .get_parent = ar100_get_parent, + .set_rate = ar100_set_rate, +}; + +static int sun6i_a31_ar100_clk_probe(struct platform_device *pdev) +{ + const char *parents[SUN6I_AR100_MAX_PARENTS]; + struct device_node *np = pdev->dev.of_node; + const char *clk_name = np->name; + struct clk_init_data init; + struct ar100_clk *ar100; + struct resource *r; + struct clk *clk; + int nparents; + int i; + + ar100 = devm_kzalloc(&pdev->dev, sizeof(*ar100), GFP_KERNEL); + if (!ar100) + return -ENOMEM; + + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ar100->reg = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(ar100->reg)) + return PTR_ERR(ar100->reg); + + nparents = of_clk_get_parent_count(np); + if (nparents > SUN6I_AR100_MAX_PARENTS) + nparents = SUN6I_AR100_MAX_PARENTS; + + for (i = 0; i < nparents; i++) + parents[i] = of_clk_get_parent_name(np, i); + + of_property_read_string(np, "clock-output-names", &clk_name); + + init.name = clk_name; + init.ops = &ar100_ops; + init.parent_names = parents; + init.num_parents = nparents; + init.flags = 0; + + ar100->hw.init = &init; + + clk = clk_register(&pdev->dev, &ar100->hw); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + return of_clk_add_provider(np, of_clk_src_simple_get, clk); +} + +const struct of_device_id sun6i_a31_ar100_clk_dt_ids[] = { + { .compatible = "allwinner,sun6i-a31-ar100-clk" }, + { /* sentinel */ } +}; + +static struct platform_driver sun6i_a31_ar100_clk_driver = { + .driver = { + .name = "sun6i-a31-ar100-clk", + .owner = THIS_MODULE, + .of_match_table = sun6i_a31_ar100_clk_dt_ids, + }, + .probe = sun6i_a31_ar100_clk_probe, +}; +module_platform_driver(sun6i_a31_ar100_clk_driver); + +MODULE_AUTHOR("Boris BREZILLON <boris.brezillon@free-electrons.com>"); +MODULE_DESCRIPTION("Allwinner A31 AR100 clock Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c index 426483422d3d..fb2ce8440f0e 100644 --- a/drivers/clk/sunxi/clk-sunxi.c +++ b/drivers/clk/sunxi/clk-sunxi.c @@ -28,63 +28,6 @@ static DEFINE_SPINLOCK(clk_lock); #define SUNXI_MAX_PARENTS 5 /** - * sun4i_osc_clk_setup() - Setup function for gatable oscillator - */ - -#define SUNXI_OSC24M_GATE 0 - -static void __init sun4i_osc_clk_setup(struct device_node *node) -{ - struct clk *clk; - struct clk_fixed_rate *fixed; - struct clk_gate *gate; - const char *clk_name = node->name; - u32 rate; - - if (of_property_read_u32(node, "clock-frequency", &rate)) - return; - - /* allocate fixed-rate and gate clock structs */ - fixed = kzalloc(sizeof(struct clk_fixed_rate), GFP_KERNEL); - if (!fixed) - return; - gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); - if (!gate) - goto err_free_fixed; - - of_property_read_string(node, "clock-output-names", &clk_name); - - /* set up gate and fixed rate properties */ - gate->reg = of_iomap(node, 0); - gate->bit_idx = SUNXI_OSC24M_GATE; - gate->lock = &clk_lock; - fixed->fixed_rate = rate; - - clk = clk_register_composite(NULL, clk_name, - NULL, 0, - NULL, NULL, - &fixed->hw, &clk_fixed_rate_ops, - &gate->hw, &clk_gate_ops, - CLK_IS_ROOT); - - if (IS_ERR(clk)) - goto err_free_gate; - - of_clk_add_provider(node, of_clk_src_simple_get, clk); - clk_register_clkdev(clk, clk_name, NULL); - - return; - -err_free_gate: - kfree(gate); -err_free_fixed: - kfree(fixed); -} -CLK_OF_DECLARE(sun4i_osc, "allwinner,sun4i-a10-osc-clk", sun4i_osc_clk_setup); - - - -/** * sun4i_get_pll1_factors() - calculates n, k, m, p factors for PLL1 * PLL1 rate is calculated as follows * rate = (parent_rate * n * (k + 1) >> p) / (m + 1); @@ -408,104 +351,6 @@ static void sun7i_a20_get_out_factors(u32 *freq, u32 parent_rate, *p = calcp; } - - -/** - * sun7i_a20_gmac_clk_setup - Setup function for A20/A31 GMAC clock module - * - * This clock looks something like this - * ________________________ - * MII TX clock from PHY >-----|___________ _________|----> to GMAC core - * GMAC Int. RGMII TX clk >----|___________\__/__gate---|----> to PHY - * Ext. 125MHz RGMII TX clk >--|__divider__/ | - * |________________________| - * - * The external 125 MHz reference is optional, i.e. GMAC can use its - * internal TX clock just fine. The A31 GMAC clock module does not have - * the divider controls for the external reference. - * - * To keep it simple, let the GMAC use either the MII TX clock for MII mode, - * and its internal TX clock for GMII and RGMII modes. The GMAC driver should - * select the appropriate source and gate/ungate the output to the PHY. - * - * Only the GMAC should use this clock. Altering the clock so that it doesn't - * match the GMAC's operation parameters will result in the GMAC not being - * able to send traffic out. The GMAC driver should set the clock rate and - * enable/disable this clock to configure the required state. The clock - * driver then responds by auto-reparenting the clock. - */ - -#define SUN7I_A20_GMAC_GPIT 2 -#define SUN7I_A20_GMAC_MASK 0x3 -#define SUN7I_A20_GMAC_PARENTS 2 - -static void __init sun7i_a20_gmac_clk_setup(struct device_node *node) -{ - struct clk *clk; - struct clk_mux *mux; - struct clk_gate *gate; - const char *clk_name = node->name; - const char *parents[SUN7I_A20_GMAC_PARENTS]; - void *reg; - - if (of_property_read_string(node, "clock-output-names", &clk_name)) - return; - - /* allocate mux and gate clock structs */ - mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL); - if (!mux) - return; - - gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); - if (!gate) - goto free_mux; - - /* gmac clock requires exactly 2 parents */ - parents[0] = of_clk_get_parent_name(node, 0); - parents[1] = of_clk_get_parent_name(node, 1); - if (!parents[0] || !parents[1]) - goto free_gate; - - reg = of_iomap(node, 0); - if (!reg) - goto free_gate; - - /* set up gate and fixed rate properties */ - gate->reg = reg; - gate->bit_idx = SUN7I_A20_GMAC_GPIT; - gate->lock = &clk_lock; - mux->reg = reg; - mux->mask = SUN7I_A20_GMAC_MASK; - mux->flags = CLK_MUX_INDEX_BIT; - mux->lock = &clk_lock; - - clk = clk_register_composite(NULL, clk_name, - parents, SUN7I_A20_GMAC_PARENTS, - &mux->hw, &clk_mux_ops, - NULL, NULL, - &gate->hw, &clk_gate_ops, - 0); - - if (IS_ERR(clk)) - goto iounmap_reg; - - of_clk_add_provider(node, of_clk_src_simple_get, clk); - clk_register_clkdev(clk, clk_name, NULL); - - return; - -iounmap_reg: - iounmap(reg); -free_gate: - kfree(gate); -free_mux: - kfree(mux); -} -CLK_OF_DECLARE(sun7i_a20_gmac, "allwinner,sun7i-a20-gmac-clk", - sun7i_a20_gmac_clk_setup); - - - /** * clk_sunxi_mmc_phase_control() - configures MMC clock phase control */ @@ -1009,6 +854,11 @@ static const struct gates_data sun5i_a13_usb_gates_data __initconst = { .reset_mask = 0x03, }; +static const struct gates_data sun6i_a31_usb_gates_data __initconst = { + .mask = { BIT(18) | BIT(17) | BIT(16) | BIT(10) | BIT(9) | BIT(8) }, + .reset_mask = BIT(2) | BIT(1) | BIT(0), +}; + static void __init sunxi_gates_clk_setup(struct device_node *node, struct gates_data *data) { @@ -1304,6 +1154,7 @@ static const struct of_device_id clk_gates_match[] __initconst = { {.compatible = "allwinner,sun6i-a31-apb2-gates-clk", .data = &sun6i_a31_apb2_gates_data,}, {.compatible = "allwinner,sun4i-a10-usb-clk", .data = &sun4i_a10_usb_gates_data,}, {.compatible = "allwinner,sun5i-a13-usb-clk", .data = &sun5i_a13_usb_gates_data,}, + {.compatible = "allwinner,sun6i-a31-usb-clk", .data = &sun6i_a31_usb_gates_data,}, {} }; @@ -1321,33 +1172,10 @@ static void __init of_sunxi_table_clock_setup(const struct of_device_id *clk_mat } } -/** - * System clock protection - * - * By enabling these critical clocks, we prevent their accidental gating - * by the framework - */ -static void __init sunxi_clock_protect(void) +static void __init sunxi_init_clocks(const char *clocks[], int nclocks) { - struct clk *clk; - - /* memory bus clock - sun5i+ */ - clk = clk_get(NULL, "mbus"); - if (!IS_ERR(clk)) { - clk_prepare_enable(clk); - clk_put(clk); - } - - /* DDR clock - sun4i+ */ - clk = clk_get(NULL, "pll5_ddr"); - if (!IS_ERR(clk)) { - clk_prepare_enable(clk); - clk_put(clk); - } -} + unsigned int i; -static void __init sunxi_init_clocks(struct device_node *np) -{ /* Register factor clocks */ of_sunxi_table_clock_setup(clk_factors_match, sunxi_factors_clk_setup); @@ -1363,11 +1191,48 @@ static void __init sunxi_init_clocks(struct device_node *np) /* Register gate clocks */ of_sunxi_table_clock_setup(clk_gates_match, sunxi_gates_clk_setup); - /* Enable core system clocks */ - sunxi_clock_protect(); + /* Protect the clocks that needs to stay on */ + for (i = 0; i < nclocks; i++) { + struct clk *clk = clk_get(NULL, clocks[i]); + + if (!IS_ERR(clk)) + clk_prepare_enable(clk); + } +} + +static const char *sun4i_a10_critical_clocks[] __initdata = { + "pll5_ddr", +}; + +static void __init sun4i_a10_init_clocks(struct device_node *node) +{ + sunxi_init_clocks(sun4i_a10_critical_clocks, + ARRAY_SIZE(sun4i_a10_critical_clocks)); +} +CLK_OF_DECLARE(sun4i_a10_clk_init, "allwinner,sun4i-a10", sun4i_a10_init_clocks); + +static const char *sun5i_critical_clocks[] __initdata = { + "mbus", + "pll5_ddr", +}; + +static void __init sun5i_init_clocks(struct device_node *node) +{ + sunxi_init_clocks(sun5i_critical_clocks, + ARRAY_SIZE(sun5i_critical_clocks)); +} +CLK_OF_DECLARE(sun5i_a10s_clk_init, "allwinner,sun5i-a10s", sun5i_init_clocks); +CLK_OF_DECLARE(sun5i_a13_clk_init, "allwinner,sun5i-a13", sun5i_init_clocks); +CLK_OF_DECLARE(sun7i_a20_clk_init, "allwinner,sun7i-a20", sun5i_init_clocks); + +static const char *sun6i_critical_clocks[] __initdata = { + "cpu", + "ahb1_sdram", +}; + +static void __init sun6i_init_clocks(struct device_node *node) +{ + sunxi_init_clocks(sun6i_critical_clocks, + ARRAY_SIZE(sun6i_critical_clocks)); } -CLK_OF_DECLARE(sun4i_a10_clk_init, "allwinner,sun4i-a10", sunxi_init_clocks); -CLK_OF_DECLARE(sun5i_a10s_clk_init, "allwinner,sun5i-a10s", sunxi_init_clocks); -CLK_OF_DECLARE(sun5i_a13_clk_init, "allwinner,sun5i-a13", sunxi_init_clocks); -CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sunxi_init_clocks); -CLK_OF_DECLARE(sun7i_a20_clk_init, "allwinner,sun7i-a20", sunxi_init_clocks); +CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sun6i_init_clocks); diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile index 4319d4031aa3..ed4d0aaf8916 100644 --- a/drivers/clk/ti/Makefile +++ b/drivers/clk/ti/Makefile @@ -3,9 +3,11 @@ obj-y += clk.o autoidle.o clockdomain.o clk-common = dpll.o composite.o divider.o gate.o \ fixed-factor.o mux.o apll.o obj-$(CONFIG_SOC_AM33XX) += $(clk-common) clk-33xx.o +obj-$(CONFIG_ARCH_OMAP2) += $(clk-common) interface.o clk-2xxx.o obj-$(CONFIG_ARCH_OMAP3) += $(clk-common) interface.o clk-3xxx.o obj-$(CONFIG_ARCH_OMAP4) += $(clk-common) clk-44xx.o obj-$(CONFIG_SOC_OMAP5) += $(clk-common) clk-54xx.o -obj-$(CONFIG_SOC_DRA7XX) += $(clk-common) clk-7xx.o +obj-$(CONFIG_SOC_DRA7XX) += $(clk-common) clk-7xx.o \ + clk-dra7-atl.o obj-$(CONFIG_SOC_AM43XX) += $(clk-common) clk-43xx.o endif diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c index b986f61f5a77..5428c9c547cd 100644 --- a/drivers/clk/ti/apll.c +++ b/drivers/clk/ti/apll.c @@ -221,3 +221,184 @@ cleanup: kfree(init); } CLK_OF_DECLARE(dra7_apll_clock, "ti,dra7-apll-clock", of_dra7_apll_setup); + +#define OMAP2_EN_APLL_LOCKED 0x3 +#define OMAP2_EN_APLL_STOPPED 0x0 + +static int omap2_apll_is_enabled(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *ad = clk->dpll_data; + u32 v; + + v = ti_clk_ll_ops->clk_readl(ad->control_reg); + v &= ad->enable_mask; + + v >>= __ffs(ad->enable_mask); + + return v == OMAP2_EN_APLL_LOCKED ? 1 : 0; +} + +static unsigned long omap2_apll_recalc(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + + if (omap2_apll_is_enabled(hw)) + return clk->fixed_rate; + + return 0; +} + +static int omap2_apll_enable(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *ad = clk->dpll_data; + u32 v; + int i = 0; + + v = ti_clk_ll_ops->clk_readl(ad->control_reg); + v &= ~ad->enable_mask; + v |= OMAP2_EN_APLL_LOCKED << __ffs(ad->enable_mask); + ti_clk_ll_ops->clk_writel(v, ad->control_reg); + + while (1) { + v = ti_clk_ll_ops->clk_readl(ad->idlest_reg); + if (v & ad->idlest_mask) + break; + if (i > MAX_APLL_WAIT_TRIES) + break; + i++; + udelay(1); + } + + if (i == MAX_APLL_WAIT_TRIES) { + pr_warn("%s failed to transition to locked\n", + __clk_get_name(clk->hw.clk)); + return -EBUSY; + } + + return 0; +} + +static void omap2_apll_disable(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *ad = clk->dpll_data; + u32 v; + + v = ti_clk_ll_ops->clk_readl(ad->control_reg); + v &= ~ad->enable_mask; + v |= OMAP2_EN_APLL_STOPPED << __ffs(ad->enable_mask); + ti_clk_ll_ops->clk_writel(v, ad->control_reg); +} + +static struct clk_ops omap2_apll_ops = { + .enable = &omap2_apll_enable, + .disable = &omap2_apll_disable, + .is_enabled = &omap2_apll_is_enabled, + .recalc_rate = &omap2_apll_recalc, +}; + +static void omap2_apll_set_autoidle(struct clk_hw_omap *clk, u32 val) +{ + struct dpll_data *ad = clk->dpll_data; + u32 v; + + v = ti_clk_ll_ops->clk_readl(ad->autoidle_reg); + v &= ~ad->autoidle_mask; + v |= val << __ffs(ad->autoidle_mask); + ti_clk_ll_ops->clk_writel(v, ad->control_reg); +} + +#define OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP 0x3 +#define OMAP2_APLL_AUTOIDLE_DISABLE 0x0 + +static void omap2_apll_allow_idle(struct clk_hw_omap *clk) +{ + omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP); +} + +static void omap2_apll_deny_idle(struct clk_hw_omap *clk) +{ + omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_DISABLE); +} + +static struct clk_hw_omap_ops omap2_apll_hwops = { + .allow_idle = &omap2_apll_allow_idle, + .deny_idle = &omap2_apll_deny_idle, +}; + +static void __init of_omap2_apll_setup(struct device_node *node) +{ + struct dpll_data *ad = NULL; + struct clk_hw_omap *clk_hw = NULL; + struct clk_init_data *init = NULL; + struct clk *clk; + const char *parent_name; + u32 val; + + ad = kzalloc(sizeof(*clk_hw), GFP_KERNEL); + clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL); + init = kzalloc(sizeof(*init), GFP_KERNEL); + + if (!ad || !clk_hw || !init) + goto cleanup; + + clk_hw->dpll_data = ad; + clk_hw->hw.init = init; + init->ops = &omap2_apll_ops; + init->name = node->name; + clk_hw->ops = &omap2_apll_hwops; + + init->num_parents = of_clk_get_parent_count(node); + if (init->num_parents != 1) { + pr_err("%s must have one parent\n", node->name); + goto cleanup; + } + + parent_name = of_clk_get_parent_name(node, 0); + init->parent_names = &parent_name; + + if (of_property_read_u32(node, "ti,clock-frequency", &val)) { + pr_err("%s missing clock-frequency\n", node->name); + goto cleanup; + } + clk_hw->fixed_rate = val; + + if (of_property_read_u32(node, "ti,bit-shift", &val)) { + pr_err("%s missing bit-shift\n", node->name); + goto cleanup; + } + + clk_hw->enable_bit = val; + ad->enable_mask = 0x3 << val; + ad->autoidle_mask = 0x3 << val; + + if (of_property_read_u32(node, "ti,idlest-shift", &val)) { + pr_err("%s missing idlest-shift\n", node->name); + goto cleanup; + } + + ad->idlest_mask = 1 << val; + + ad->control_reg = ti_clk_get_reg_addr(node, 0); + ad->autoidle_reg = ti_clk_get_reg_addr(node, 1); + ad->idlest_reg = ti_clk_get_reg_addr(node, 2); + + if (!ad->control_reg || !ad->autoidle_reg || !ad->idlest_reg) + goto cleanup; + + clk = clk_register(NULL, &clk_hw->hw); + if (!IS_ERR(clk)) { + of_clk_add_provider(node, of_clk_src_simple_get, clk); + kfree(init); + return; + } +cleanup: + kfree(ad); + kfree(clk_hw); + kfree(init); +} +CLK_OF_DECLARE(omap2_apll_clock, "ti,omap2-apll-clock", + of_omap2_apll_setup); diff --git a/drivers/clk/ti/clk-2xxx.c b/drivers/clk/ti/clk-2xxx.c new file mode 100644 index 000000000000..c808ab3d2bb2 --- /dev/null +++ b/drivers/clk/ti/clk-2xxx.c @@ -0,0 +1,256 @@ +/* + * OMAP2 Clock init + * + * Copyright (C) 2013 Texas Instruments, Inc + * Tero Kristo (t-kristo@ti.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/clk-provider.h> +#include <linux/clk/ti.h> + +static struct ti_dt_clk omap2xxx_clks[] = { + DT_CLK(NULL, "func_32k_ck", "func_32k_ck"), + DT_CLK(NULL, "secure_32k_ck", "secure_32k_ck"), + DT_CLK(NULL, "virt_12m_ck", "virt_12m_ck"), + DT_CLK(NULL, "virt_13m_ck", "virt_13m_ck"), + DT_CLK(NULL, "virt_19200000_ck", "virt_19200000_ck"), + DT_CLK(NULL, "virt_26m_ck", "virt_26m_ck"), + DT_CLK(NULL, "aplls_clkin_ck", "aplls_clkin_ck"), + DT_CLK(NULL, "aplls_clkin_x2_ck", "aplls_clkin_x2_ck"), + DT_CLK(NULL, "osc_ck", "osc_ck"), + DT_CLK(NULL, "sys_ck", "sys_ck"), + DT_CLK(NULL, "alt_ck", "alt_ck"), + DT_CLK(NULL, "mcbsp_clks", "mcbsp_clks"), + DT_CLK(NULL, "dpll_ck", "dpll_ck"), + DT_CLK(NULL, "apll96_ck", "apll96_ck"), + DT_CLK(NULL, "apll54_ck", "apll54_ck"), + DT_CLK(NULL, "func_54m_ck", "func_54m_ck"), + DT_CLK(NULL, "core_ck", "core_ck"), + DT_CLK(NULL, "func_96m_ck", "func_96m_ck"), + DT_CLK(NULL, "func_48m_ck", "func_48m_ck"), + DT_CLK(NULL, "func_12m_ck", "func_12m_ck"), + DT_CLK(NULL, "sys_clkout_src", "sys_clkout_src"), + DT_CLK(NULL, "sys_clkout", "sys_clkout"), + DT_CLK(NULL, "emul_ck", "emul_ck"), + DT_CLK(NULL, "mpu_ck", "mpu_ck"), + DT_CLK(NULL, "dsp_fck", "dsp_fck"), + DT_CLK(NULL, "gfx_3d_fck", "gfx_3d_fck"), + DT_CLK(NULL, "gfx_2d_fck", "gfx_2d_fck"), + DT_CLK(NULL, "gfx_ick", "gfx_ick"), + DT_CLK("omapdss_dss", "ick", "dss_ick"), + DT_CLK(NULL, "dss_ick", "dss_ick"), + DT_CLK(NULL, "dss1_fck", "dss1_fck"), + DT_CLK(NULL, "dss2_fck", "dss2_fck"), + DT_CLK(NULL, "dss_54m_fck", "dss_54m_fck"), + DT_CLK(NULL, "core_l3_ck", "core_l3_ck"), + DT_CLK(NULL, "ssi_fck", "ssi_ssr_sst_fck"), + DT_CLK(NULL, "usb_l4_ick", "usb_l4_ick"), + DT_CLK(NULL, "l4_ck", "l4_ck"), + DT_CLK(NULL, "ssi_l4_ick", "ssi_l4_ick"), + DT_CLK(NULL, "gpt1_ick", "gpt1_ick"), + DT_CLK(NULL, "gpt1_fck", "gpt1_fck"), + DT_CLK(NULL, "gpt2_ick", "gpt2_ick"), + DT_CLK(NULL, "gpt2_fck", "gpt2_fck"), + DT_CLK(NULL, "gpt3_ick", "gpt3_ick"), + DT_CLK(NULL, "gpt3_fck", "gpt3_fck"), + DT_CLK(NULL, "gpt4_ick", "gpt4_ick"), + DT_CLK(NULL, "gpt4_fck", "gpt4_fck"), + DT_CLK(NULL, "gpt5_ick", "gpt5_ick"), + DT_CLK(NULL, "gpt5_fck", "gpt5_fck"), + DT_CLK(NULL, "gpt6_ick", "gpt6_ick"), + DT_CLK(NULL, "gpt6_fck", "gpt6_fck"), + DT_CLK(NULL, "gpt7_ick", "gpt7_ick"), + DT_CLK(NULL, "gpt7_fck", "gpt7_fck"), + DT_CLK(NULL, "gpt8_ick", "gpt8_ick"), + DT_CLK(NULL, "gpt8_fck", "gpt8_fck"), + DT_CLK(NULL, "gpt9_ick", "gpt9_ick"), + DT_CLK(NULL, "gpt9_fck", "gpt9_fck"), + DT_CLK(NULL, "gpt10_ick", "gpt10_ick"), + DT_CLK(NULL, "gpt10_fck", "gpt10_fck"), + DT_CLK(NULL, "gpt11_ick", "gpt11_ick"), + DT_CLK(NULL, "gpt11_fck", "gpt11_fck"), + DT_CLK(NULL, "gpt12_ick", "gpt12_ick"), + DT_CLK(NULL, "gpt12_fck", "gpt12_fck"), + DT_CLK("omap-mcbsp.1", "ick", "mcbsp1_ick"), + DT_CLK(NULL, "mcbsp1_ick", "mcbsp1_ick"), + DT_CLK(NULL, "mcbsp1_fck", "mcbsp1_fck"), + DT_CLK("omap-mcbsp.2", "ick", "mcbsp2_ick"), + DT_CLK(NULL, "mcbsp2_ick", "mcbsp2_ick"), + DT_CLK(NULL, "mcbsp2_fck", "mcbsp2_fck"), + DT_CLK("omap2_mcspi.1", "ick", "mcspi1_ick"), + DT_CLK(NULL, "mcspi1_ick", "mcspi1_ick"), + DT_CLK(NULL, "mcspi1_fck", "mcspi1_fck"), + DT_CLK("omap2_mcspi.2", "ick", "mcspi2_ick"), + DT_CLK(NULL, "mcspi2_ick", "mcspi2_ick"), + DT_CLK(NULL, "mcspi2_fck", "mcspi2_fck"), + DT_CLK(NULL, "uart1_ick", "uart1_ick"), + DT_CLK(NULL, "uart1_fck", "uart1_fck"), + DT_CLK(NULL, "uart2_ick", "uart2_ick"), + DT_CLK(NULL, "uart2_fck", "uart2_fck"), + DT_CLK(NULL, "uart3_ick", "uart3_ick"), + DT_CLK(NULL, "uart3_fck", "uart3_fck"), + DT_CLK(NULL, "gpios_ick", "gpios_ick"), + DT_CLK(NULL, "gpios_fck", "gpios_fck"), + DT_CLK("omap_wdt", "ick", "mpu_wdt_ick"), + DT_CLK(NULL, "mpu_wdt_ick", "mpu_wdt_ick"), + DT_CLK(NULL, "mpu_wdt_fck", "mpu_wdt_fck"), + DT_CLK(NULL, "sync_32k_ick", "sync_32k_ick"), + DT_CLK(NULL, "wdt1_ick", "wdt1_ick"), + DT_CLK(NULL, "omapctrl_ick", "omapctrl_ick"), + DT_CLK("omap24xxcam", "fck", "cam_fck"), + DT_CLK(NULL, "cam_fck", "cam_fck"), + DT_CLK("omap24xxcam", "ick", "cam_ick"), + DT_CLK(NULL, "cam_ick", "cam_ick"), + DT_CLK(NULL, "mailboxes_ick", "mailboxes_ick"), + DT_CLK(NULL, "wdt4_ick", "wdt4_ick"), + DT_CLK(NULL, "wdt4_fck", "wdt4_fck"), + DT_CLK(NULL, "mspro_ick", "mspro_ick"), + DT_CLK(NULL, "mspro_fck", "mspro_fck"), + DT_CLK(NULL, "fac_ick", "fac_ick"), + DT_CLK(NULL, "fac_fck", "fac_fck"), + DT_CLK("omap_hdq.0", "ick", "hdq_ick"), + DT_CLK(NULL, "hdq_ick", "hdq_ick"), + DT_CLK("omap_hdq.0", "fck", "hdq_fck"), + DT_CLK(NULL, "hdq_fck", "hdq_fck"), + DT_CLK("omap_i2c.1", "ick", "i2c1_ick"), + DT_CLK(NULL, "i2c1_ick", "i2c1_ick"), + DT_CLK("omap_i2c.2", "ick", "i2c2_ick"), + DT_CLK(NULL, "i2c2_ick", "i2c2_ick"), + DT_CLK(NULL, "gpmc_fck", "gpmc_fck"), + DT_CLK(NULL, "sdma_fck", "sdma_fck"), + DT_CLK(NULL, "sdma_ick", "sdma_ick"), + DT_CLK(NULL, "sdrc_ick", "sdrc_ick"), + DT_CLK(NULL, "des_ick", "des_ick"), + DT_CLK("omap-sham", "ick", "sha_ick"), + DT_CLK(NULL, "sha_ick", "sha_ick"), + DT_CLK("omap_rng", "ick", "rng_ick"), + DT_CLK(NULL, "rng_ick", "rng_ick"), + DT_CLK("omap-aes", "ick", "aes_ick"), + DT_CLK(NULL, "aes_ick", "aes_ick"), + DT_CLK(NULL, "pka_ick", "pka_ick"), + DT_CLK(NULL, "usb_fck", "usb_fck"), + DT_CLK(NULL, "timer_32k_ck", "func_32k_ck"), + DT_CLK(NULL, "timer_sys_ck", "sys_ck"), + DT_CLK(NULL, "timer_ext_ck", "alt_ck"), + { .node_name = NULL }, +}; + +static struct ti_dt_clk omap2420_clks[] = { + DT_CLK(NULL, "sys_clkout2_src", "sys_clkout2_src"), + DT_CLK(NULL, "sys_clkout2", "sys_clkout2"), + DT_CLK(NULL, "dsp_ick", "dsp_ick"), + DT_CLK(NULL, "iva1_ifck", "iva1_ifck"), + DT_CLK(NULL, "iva1_mpu_int_ifck", "iva1_mpu_int_ifck"), + DT_CLK(NULL, "wdt3_ick", "wdt3_ick"), + DT_CLK(NULL, "wdt3_fck", "wdt3_fck"), + DT_CLK("mmci-omap.0", "ick", "mmc_ick"), + DT_CLK(NULL, "mmc_ick", "mmc_ick"), + DT_CLK("mmci-omap.0", "fck", "mmc_fck"), + DT_CLK(NULL, "mmc_fck", "mmc_fck"), + DT_CLK(NULL, "eac_ick", "eac_ick"), + DT_CLK(NULL, "eac_fck", "eac_fck"), + DT_CLK(NULL, "i2c1_fck", "i2c1_fck"), + DT_CLK(NULL, "i2c2_fck", "i2c2_fck"), + DT_CLK(NULL, "vlynq_ick", "vlynq_ick"), + DT_CLK(NULL, "vlynq_fck", "vlynq_fck"), + DT_CLK("musb-hdrc", "fck", "osc_ck"), + { .node_name = NULL }, +}; + +static struct ti_dt_clk omap2430_clks[] = { + DT_CLK("twl", "fck", "osc_ck"), + DT_CLK(NULL, "iva2_1_ick", "iva2_1_ick"), + DT_CLK(NULL, "mdm_ick", "mdm_ick"), + DT_CLK(NULL, "mdm_osc_ck", "mdm_osc_ck"), + DT_CLK("omap-mcbsp.3", "ick", "mcbsp3_ick"), + DT_CLK(NULL, "mcbsp3_ick", "mcbsp3_ick"), + DT_CLK(NULL, "mcbsp3_fck", "mcbsp3_fck"), + DT_CLK("omap-mcbsp.4", "ick", "mcbsp4_ick"), + DT_CLK(NULL, "mcbsp4_ick", "mcbsp4_ick"), + DT_CLK(NULL, "mcbsp4_fck", "mcbsp4_fck"), + DT_CLK("omap-mcbsp.5", "ick", "mcbsp5_ick"), + DT_CLK(NULL, "mcbsp5_ick", "mcbsp5_ick"), + DT_CLK(NULL, "mcbsp5_fck", "mcbsp5_fck"), + DT_CLK("omap2_mcspi.3", "ick", "mcspi3_ick"), + DT_CLK(NULL, "mcspi3_ick", "mcspi3_ick"), + DT_CLK(NULL, "mcspi3_fck", "mcspi3_fck"), + DT_CLK(NULL, "icr_ick", "icr_ick"), + DT_CLK(NULL, "i2chs1_fck", "i2chs1_fck"), + DT_CLK(NULL, "i2chs2_fck", "i2chs2_fck"), + DT_CLK("musb-omap2430", "ick", "usbhs_ick"), + DT_CLK(NULL, "usbhs_ick", "usbhs_ick"), + DT_CLK("omap_hsmmc.0", "ick", "mmchs1_ick"), + DT_CLK(NULL, "mmchs1_ick", "mmchs1_ick"), + DT_CLK(NULL, "mmchs1_fck", "mmchs1_fck"), + DT_CLK("omap_hsmmc.1", "ick", "mmchs2_ick"), + DT_CLK(NULL, "mmchs2_ick", "mmchs2_ick"), + DT_CLK(NULL, "mmchs2_fck", "mmchs2_fck"), + DT_CLK(NULL, "gpio5_ick", "gpio5_ick"), + DT_CLK(NULL, "gpio5_fck", "gpio5_fck"), + DT_CLK(NULL, "mdm_intc_ick", "mdm_intc_ick"), + DT_CLK("omap_hsmmc.0", "mmchsdb_fck", "mmchsdb1_fck"), + DT_CLK(NULL, "mmchsdb1_fck", "mmchsdb1_fck"), + DT_CLK("omap_hsmmc.1", "mmchsdb_fck", "mmchsdb2_fck"), + DT_CLK(NULL, "mmchsdb2_fck", "mmchsdb2_fck"), + { .node_name = NULL }, +}; + +static const char *enable_init_clks[] = { + "apll96_ck", + "apll54_ck", + "sync_32k_ick", + "omapctrl_ick", + "gpmc_fck", + "sdrc_ick", +}; + +enum { + OMAP2_SOC_OMAP2420, + OMAP2_SOC_OMAP2430, +}; + +static int __init omap2xxx_dt_clk_init(int soc_type) +{ + ti_dt_clocks_register(omap2xxx_clks); + + if (soc_type == OMAP2_SOC_OMAP2420) + ti_dt_clocks_register(omap2420_clks); + else + ti_dt_clocks_register(omap2430_clks); + + omap2xxx_clkt_vps_init(); + + omap2_clk_disable_autoidle_all(); + + omap2_clk_enable_init_clocks(enable_init_clks, + ARRAY_SIZE(enable_init_clks)); + + pr_info("Clocking rate (Crystal/DPLL/MPU): %ld.%01ld/%ld/%ld MHz\n", + (clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 1000000), + (clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 100000) % 10, + (clk_get_rate(clk_get_sys(NULL, "dpll_ck")) / 1000000), + (clk_get_rate(clk_get_sys(NULL, "mpu_ck")) / 1000000)); + + return 0; +} + +int __init omap2420_dt_clk_init(void) +{ + return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2420); +} + +int __init omap2430_dt_clk_init(void) +{ + return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2430); +} diff --git a/drivers/clk/ti/clk-54xx.c b/drivers/clk/ti/clk-54xx.c index 08f3d1b915b3..5e183993e3ec 100644 --- a/drivers/clk/ti/clk-54xx.c +++ b/drivers/clk/ti/clk-54xx.c @@ -240,6 +240,12 @@ int __init omap5xxx_dt_clk_init(void) if (rc) pr_err("%s: failed to configure ABE DPLL!\n", __func__); + abe_dpll = clk_get_sys(NULL, "dpll_abe_m2x2_ck"); + if (!rc) + rc = clk_set_rate(abe_dpll, OMAP5_DPLL_ABE_DEFFREQ * 2); + if (rc) + pr_err("%s: failed to configure ABE m2x2 DPLL!\n", __func__); + usb_dpll = clk_get_sys(NULL, "dpll_usb_ck"); rc = clk_set_rate(usb_dpll, OMAP5_DPLL_USB_DEFFREQ); if (rc) diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c index f7e40734c819..e1581335937d 100644 --- a/drivers/clk/ti/clk-7xx.c +++ b/drivers/clk/ti/clk-7xx.c @@ -24,7 +24,7 @@ static struct ti_dt_clk dra7xx_clks[] = { DT_CLK(NULL, "atl_clkin0_ck", "atl_clkin0_ck"), DT_CLK(NULL, "atl_clkin1_ck", "atl_clkin1_ck"), DT_CLK(NULL, "atl_clkin2_ck", "atl_clkin2_ck"), - DT_CLK(NULL, "atlclkin3_ck", "atlclkin3_ck"), + DT_CLK(NULL, "atl_clkin3_ck", "atl_clkin3_ck"), DT_CLK(NULL, "hdmi_clkin_ck", "hdmi_clkin_ck"), DT_CLK(NULL, "mlb_clkin_ck", "mlb_clkin_ck"), DT_CLK(NULL, "mlbp_clkin_ck", "mlbp_clkin_ck"), diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c new file mode 100644 index 000000000000..4a65b410e4d5 --- /dev/null +++ b/drivers/clk/ti/clk-dra7-atl.c @@ -0,0 +1,312 @@ +/* + * DRA7 ATL (Audio Tracking Logic) clock driver + * + * Copyright (C) 2013 Texas Instruments, Inc. + * + * Peter Ujfalusi <peter.ujfalusi@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/clk-provider.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> + +#define DRA7_ATL_INSTANCES 4 + +#define DRA7_ATL_PPMR_REG(id) (0x200 + (id * 0x80)) +#define DRA7_ATL_BBSR_REG(id) (0x204 + (id * 0x80)) +#define DRA7_ATL_ATLCR_REG(id) (0x208 + (id * 0x80)) +#define DRA7_ATL_SWEN_REG(id) (0x210 + (id * 0x80)) +#define DRA7_ATL_BWSMUX_REG(id) (0x214 + (id * 0x80)) +#define DRA7_ATL_AWSMUX_REG(id) (0x218 + (id * 0x80)) +#define DRA7_ATL_PCLKMUX_REG(id) (0x21c + (id * 0x80)) + +#define DRA7_ATL_SWEN BIT(0) +#define DRA7_ATL_DIVIDER_MASK (0x1f) +#define DRA7_ATL_PCLKMUX BIT(0) +struct dra7_atl_clock_info; + +struct dra7_atl_desc { + struct clk *clk; + struct clk_hw hw; + struct dra7_atl_clock_info *cinfo; + int id; + + bool probed; /* the driver for the IP has been loaded */ + bool valid; /* configured */ + bool enabled; + u32 bws; /* Baseband Word Select Mux */ + u32 aws; /* Audio Word Select Mux */ + u32 divider; /* Cached divider value */ +}; + +struct dra7_atl_clock_info { + struct device *dev; + void __iomem *iobase; + + struct dra7_atl_desc *cdesc; +}; + +#define to_atl_desc(_hw) container_of(_hw, struct dra7_atl_desc, hw) + +static inline void atl_write(struct dra7_atl_clock_info *cinfo, u32 reg, + u32 val) +{ + __raw_writel(val, cinfo->iobase + reg); +} + +static inline int atl_read(struct dra7_atl_clock_info *cinfo, u32 reg) +{ + return __raw_readl(cinfo->iobase + reg); +} + +static int atl_clk_enable(struct clk_hw *hw) +{ + struct dra7_atl_desc *cdesc = to_atl_desc(hw); + + if (!cdesc->probed) + goto out; + + if (unlikely(!cdesc->valid)) + dev_warn(cdesc->cinfo->dev, "atl%d has not been configured\n", + cdesc->id); + pm_runtime_get_sync(cdesc->cinfo->dev); + + atl_write(cdesc->cinfo, DRA7_ATL_ATLCR_REG(cdesc->id), + cdesc->divider - 1); + atl_write(cdesc->cinfo, DRA7_ATL_SWEN_REG(cdesc->id), DRA7_ATL_SWEN); + +out: + cdesc->enabled = true; + + return 0; +} + +static void atl_clk_disable(struct clk_hw *hw) +{ + struct dra7_atl_desc *cdesc = to_atl_desc(hw); + + if (!cdesc->probed) + goto out; + + atl_write(cdesc->cinfo, DRA7_ATL_SWEN_REG(cdesc->id), 0); + pm_runtime_put_sync(cdesc->cinfo->dev); + +out: + cdesc->enabled = false; +} + +static int atl_clk_is_enabled(struct clk_hw *hw) +{ + struct dra7_atl_desc *cdesc = to_atl_desc(hw); + + return cdesc->enabled; +} + +static unsigned long atl_clk_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct dra7_atl_desc *cdesc = to_atl_desc(hw); + + return parent_rate / cdesc->divider; +} + +static long atl_clk_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + unsigned divider; + + divider = (*parent_rate + rate / 2) / rate; + if (divider > DRA7_ATL_DIVIDER_MASK + 1) + divider = DRA7_ATL_DIVIDER_MASK + 1; + + return *parent_rate / divider; +} + +static int atl_clk_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct dra7_atl_desc *cdesc = to_atl_desc(hw); + u32 divider; + + divider = ((parent_rate + rate / 2) / rate) - 1; + if (divider > DRA7_ATL_DIVIDER_MASK) + divider = DRA7_ATL_DIVIDER_MASK; + + cdesc->divider = divider + 1; + + return 0; +} + +const struct clk_ops atl_clk_ops = { + .enable = atl_clk_enable, + .disable = atl_clk_disable, + .is_enabled = atl_clk_is_enabled, + .recalc_rate = atl_clk_recalc_rate, + .round_rate = atl_clk_round_rate, + .set_rate = atl_clk_set_rate, +}; + +static void __init of_dra7_atl_clock_setup(struct device_node *node) +{ + struct dra7_atl_desc *clk_hw = NULL; + struct clk_init_data init = { 0 }; + const char **parent_names = NULL; + struct clk *clk; + + clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL); + if (!clk_hw) { + pr_err("%s: could not allocate dra7_atl_desc\n", __func__); + return; + } + + clk_hw->hw.init = &init; + clk_hw->divider = 1; + init.name = node->name; + init.ops = &atl_clk_ops; + init.flags = CLK_IGNORE_UNUSED; + init.num_parents = of_clk_get_parent_count(node); + + if (init.num_parents != 1) { + pr_err("%s: atl clock %s must have 1 parent\n", __func__, + node->name); + goto cleanup; + } + + parent_names = kzalloc(sizeof(char *), GFP_KERNEL); + + if (!parent_names) + goto cleanup; + + parent_names[0] = of_clk_get_parent_name(node, 0); + + init.parent_names = parent_names; + + clk = clk_register(NULL, &clk_hw->hw); + + if (!IS_ERR(clk)) { + of_clk_add_provider(node, of_clk_src_simple_get, clk); + return; + } +cleanup: + kfree(parent_names); + kfree(clk_hw); +} +CLK_OF_DECLARE(dra7_atl_clock, "ti,dra7-atl-clock", of_dra7_atl_clock_setup); + +static int of_dra7_atl_clk_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct dra7_atl_clock_info *cinfo; + int i; + int ret = 0; + + if (!node) + return -ENODEV; + + cinfo = devm_kzalloc(&pdev->dev, sizeof(*cinfo), GFP_KERNEL); + if (!cinfo) + return -ENOMEM; + + cinfo->iobase = of_iomap(node, 0); + cinfo->dev = &pdev->dev; + pm_runtime_enable(cinfo->dev); + + pm_runtime_get_sync(cinfo->dev); + atl_write(cinfo, DRA7_ATL_PCLKMUX_REG(0), DRA7_ATL_PCLKMUX); + + for (i = 0; i < DRA7_ATL_INSTANCES; i++) { + struct device_node *cfg_node; + char prop[5]; + struct dra7_atl_desc *cdesc; + struct of_phandle_args clkspec; + struct clk *clk; + int rc; + + rc = of_parse_phandle_with_args(node, "ti,provided-clocks", + NULL, i, &clkspec); + + if (rc) { + pr_err("%s: failed to lookup atl clock %d\n", __func__, + i); + return -EINVAL; + } + + clk = of_clk_get_from_provider(&clkspec); + + cdesc = to_atl_desc(__clk_get_hw(clk)); + cdesc->cinfo = cinfo; + cdesc->id = i; + + /* Get configuration for the ATL instances */ + snprintf(prop, sizeof(prop), "atl%u", i); + cfg_node = of_find_node_by_name(node, prop); + if (cfg_node) { + ret = of_property_read_u32(cfg_node, "bws", + &cdesc->bws); + ret |= of_property_read_u32(cfg_node, "aws", + &cdesc->aws); + if (!ret) { + cdesc->valid = true; + atl_write(cinfo, DRA7_ATL_BWSMUX_REG(i), + cdesc->bws); + atl_write(cinfo, DRA7_ATL_AWSMUX_REG(i), + cdesc->aws); + } + } + + cdesc->probed = true; + /* + * Enable the clock if it has been asked prior to loading the + * hw driver + */ + if (cdesc->enabled) + atl_clk_enable(__clk_get_hw(clk)); + } + pm_runtime_put_sync(cinfo->dev); + + return ret; +} + +static int of_dra7_atl_clk_remove(struct platform_device *pdev) +{ + pm_runtime_disable(&pdev->dev); + + return 0; +} + +static struct of_device_id of_dra7_atl_clk_match_tbl[] = { + { .compatible = "ti,dra7-atl", }, + {}, +}; +MODULE_DEVICE_TABLE(of, of_dra7_atl_clk_match_tbl); + +static struct platform_driver dra7_atl_clk_driver = { + .driver = { + .name = "dra7-atl", + .owner = THIS_MODULE, + .of_match_table = of_dra7_atl_clk_match_tbl, + }, + .probe = of_dra7_atl_clk_probe, + .remove = of_dra7_atl_clk_remove, +}; + +module_platform_driver(dra7_atl_clk_driver); + +MODULE_DESCRIPTION("Clock driver for DRA7 Audio Tracking Logic"); +MODULE_ALIAS("platform:dra7-atl-clock"); +MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c index 7e498a44f97d..abd956d5f838 100644 --- a/drivers/clk/ti/dpll.c +++ b/drivers/clk/ti/dpll.c @@ -25,8 +25,6 @@ #undef pr_fmt #define pr_fmt(fmt) "%s: " fmt, __func__ -#define DPLL_HAS_AUTOIDLE 0x1 - #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) || \ defined(CONFIG_SOC_DRA7XX) static const struct clk_ops dpll_m4xen_ck_ops = { @@ -37,21 +35,18 @@ static const struct clk_ops dpll_m4xen_ck_ops = { .set_rate = &omap3_noncore_dpll_set_rate, .get_parent = &omap2_init_dpll_parent, }; +#else +static const struct clk_ops dpll_m4xen_ck_ops = {}; #endif +#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4) || \ + defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX) || \ + defined(CONFIG_SOC_AM33XX) || defined(CONFIG_SOC_AM43XX) static const struct clk_ops dpll_core_ck_ops = { .recalc_rate = &omap3_dpll_recalc, .get_parent = &omap2_init_dpll_parent, }; -#ifdef CONFIG_ARCH_OMAP3 -static const struct clk_ops omap3_dpll_core_ck_ops = { - .get_parent = &omap2_init_dpll_parent, - .recalc_rate = &omap3_dpll_recalc, - .round_rate = &omap2_dpll_round_rate, -}; -#endif - static const struct clk_ops dpll_ck_ops = { .enable = &omap3_noncore_dpll_enable, .disable = &omap3_noncore_dpll_disable, @@ -67,6 +62,33 @@ static const struct clk_ops dpll_no_gate_ck_ops = { .round_rate = &omap2_dpll_round_rate, .set_rate = &omap3_noncore_dpll_set_rate, }; +#else +static const struct clk_ops dpll_core_ck_ops = {}; +static const struct clk_ops dpll_ck_ops = {}; +static const struct clk_ops dpll_no_gate_ck_ops = {}; +const struct clk_hw_omap_ops clkhwops_omap3_dpll = {}; +#endif + +#ifdef CONFIG_ARCH_OMAP2 +static const struct clk_ops omap2_dpll_core_ck_ops = { + .get_parent = &omap2_init_dpll_parent, + .recalc_rate = &omap2_dpllcore_recalc, + .round_rate = &omap2_dpll_round_rate, + .set_rate = &omap2_reprogram_dpllcore, +}; +#else +static const struct clk_ops omap2_dpll_core_ck_ops = {}; +#endif + +#ifdef CONFIG_ARCH_OMAP3 +static const struct clk_ops omap3_dpll_core_ck_ops = { + .get_parent = &omap2_init_dpll_parent, + .recalc_rate = &omap3_dpll_recalc, + .round_rate = &omap2_dpll_round_rate, +}; +#else +static const struct clk_ops omap3_dpll_core_ck_ops = {}; +#endif #ifdef CONFIG_ARCH_OMAP3 static const struct clk_ops omap3_dpll_ck_ops = { @@ -193,14 +215,12 @@ static void ti_clk_register_dpll_x2(struct device_node *node, * @node: device node containing the DPLL info * @ops: ops for the DPLL * @ddt: DPLL data template to use - * @init_flags: flags for controlling init types * * Initializes a DPLL clock from device tree data. */ static void __init of_ti_dpll_setup(struct device_node *node, const struct clk_ops *ops, - const struct dpll_data *ddt, - u8 init_flags) + const struct dpll_data *ddt) { struct clk_hw_omap *clk_hw = NULL; struct clk_init_data *init = NULL; @@ -241,13 +261,30 @@ static void __init of_ti_dpll_setup(struct device_node *node, init->parent_names = parent_names; dd->control_reg = ti_clk_get_reg_addr(node, 0); - dd->idlest_reg = ti_clk_get_reg_addr(node, 1); - dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2); - if (!dd->control_reg || !dd->idlest_reg || !dd->mult_div1_reg) + /* + * Special case for OMAP2 DPLL, register order is different due to + * missing idlest_reg, also clkhwops is different. Detected from + * missing idlest_mask. + */ + if (!dd->idlest_mask) { + dd->mult_div1_reg = ti_clk_get_reg_addr(node, 1); +#ifdef CONFIG_ARCH_OMAP2 + clk_hw->ops = &clkhwops_omap2xxx_dpll; + omap2xxx_clkt_dpllcore_init(&clk_hw->hw); +#endif + } else { + dd->idlest_reg = ti_clk_get_reg_addr(node, 1); + if (!dd->idlest_reg) + goto cleanup; + + dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2); + } + + if (!dd->control_reg || !dd->mult_div1_reg) goto cleanup; - if (init_flags & DPLL_HAS_AUTOIDLE) { + if (dd->autoidle_mask) { dd->autoidle_reg = ti_clk_get_reg_addr(node, 3); if (!dd->autoidle_reg) goto cleanup; @@ -310,7 +347,7 @@ static void __init of_ti_omap3_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &omap3_dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_dpll_clock, "ti,omap3-dpll-clock", of_ti_omap3_dpll_setup); @@ -329,7 +366,7 @@ static void __init of_ti_omap3_core_dpll_setup(struct device_node *node) .freqsel_mask = 0xf0, }; - of_ti_dpll_setup(node, &omap3_dpll_core_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &omap3_dpll_core_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_core_dpll_clock, "ti,omap3-dpll-core-clock", of_ti_omap3_core_dpll_setup); @@ -349,7 +386,7 @@ static void __init of_ti_omap3_per_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_STOP) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_per_dpll_clock, "ti,omap3-dpll-per-clock", of_ti_omap3_per_dpll_setup); @@ -371,7 +408,7 @@ static void __init of_ti_omap3_per_jtype_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_STOP) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &omap3_dpll_per_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap3_per_jtype_dpll_clock, "ti,omap3-dpll-per-j-type-clock", of_ti_omap3_per_jtype_dpll_setup); @@ -391,11 +428,32 @@ static void __init of_ti_omap4_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap4_dpll_clock, "ti,omap4-dpll-clock", of_ti_omap4_dpll_setup); +static void __init of_ti_omap5_mpu_dpll_setup(struct device_node *node) +{ + const struct dpll_data dd = { + .idlest_mask = 0x1, + .enable_mask = 0x7, + .autoidle_mask = 0x7, + .mult_mask = 0x7ff << 8, + .div1_mask = 0x7f, + .max_multiplier = 2047, + .max_divider = 128, + .dcc_mask = BIT(22), + .dcc_rate = 1400000000, /* DCC beyond 1.4GHz */ + .min_divider = 1, + .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), + }; + + of_ti_dpll_setup(node, &dpll_ck_ops, &dd); +} +CLK_OF_DECLARE(of_ti_omap5_mpu_dpll_clock, "ti,omap5-mpu-dpll-clock", + of_ti_omap5_mpu_dpll_setup); + static void __init of_ti_omap4_core_dpll_setup(struct device_node *node) { const struct dpll_data dd = { @@ -410,7 +468,7 @@ static void __init of_ti_omap4_core_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap4_core_dpll_clock, "ti,omap4-dpll-core-clock", of_ti_omap4_core_dpll_setup); @@ -433,7 +491,7 @@ static void __init of_ti_omap4_m4xen_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap4_m4xen_dpll_clock, "ti,omap4-dpll-m4xen-clock", of_ti_omap4_m4xen_dpll_setup); @@ -454,7 +512,7 @@ static void __init of_ti_omap4_jtype_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd, DPLL_HAS_AUTOIDLE); + of_ti_dpll_setup(node, &dpll_m4xen_ck_ops, &dd); } CLK_OF_DECLARE(ti_omap4_jtype_dpll_clock, "ti,omap4-dpll-j-type-clock", of_ti_omap4_jtype_dpll_setup); @@ -465,7 +523,6 @@ static void __init of_ti_am3_no_gate_dpll_setup(struct device_node *node) const struct dpll_data dd = { .idlest_mask = 0x1, .enable_mask = 0x7, - .autoidle_mask = 0x7, .mult_mask = 0x7ff << 8, .div1_mask = 0x7f, .max_multiplier = 2047, @@ -474,7 +531,7 @@ static void __init of_ti_am3_no_gate_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd, 0); + of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd); } CLK_OF_DECLARE(ti_am3_no_gate_dpll_clock, "ti,am3-dpll-no-gate-clock", of_ti_am3_no_gate_dpll_setup); @@ -484,7 +541,6 @@ static void __init of_ti_am3_jtype_dpll_setup(struct device_node *node) const struct dpll_data dd = { .idlest_mask = 0x1, .enable_mask = 0x7, - .autoidle_mask = 0x7, .mult_mask = 0x7ff << 8, .div1_mask = 0x7f, .max_multiplier = 4095, @@ -494,7 +550,7 @@ static void __init of_ti_am3_jtype_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_ck_ops, &dd, 0); + of_ti_dpll_setup(node, &dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_am3_jtype_dpll_clock, "ti,am3-dpll-j-type-clock", of_ti_am3_jtype_dpll_setup); @@ -504,7 +560,6 @@ static void __init of_ti_am3_no_gate_jtype_dpll_setup(struct device_node *node) const struct dpll_data dd = { .idlest_mask = 0x1, .enable_mask = 0x7, - .autoidle_mask = 0x7, .mult_mask = 0x7ff << 8, .div1_mask = 0x7f, .max_multiplier = 2047, @@ -514,7 +569,7 @@ static void __init of_ti_am3_no_gate_jtype_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd, 0); + of_ti_dpll_setup(node, &dpll_no_gate_ck_ops, &dd); } CLK_OF_DECLARE(ti_am3_no_gate_jtype_dpll_clock, "ti,am3-dpll-no-gate-j-type-clock", @@ -525,7 +580,6 @@ static void __init of_ti_am3_dpll_setup(struct device_node *node) const struct dpll_data dd = { .idlest_mask = 0x1, .enable_mask = 0x7, - .autoidle_mask = 0x7, .mult_mask = 0x7ff << 8, .div1_mask = 0x7f, .max_multiplier = 2047, @@ -534,7 +588,7 @@ static void __init of_ti_am3_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_ck_ops, &dd, 0); + of_ti_dpll_setup(node, &dpll_ck_ops, &dd); } CLK_OF_DECLARE(ti_am3_dpll_clock, "ti,am3-dpll-clock", of_ti_am3_dpll_setup); @@ -543,7 +597,6 @@ static void __init of_ti_am3_core_dpll_setup(struct device_node *node) const struct dpll_data dd = { .idlest_mask = 0x1, .enable_mask = 0x7, - .autoidle_mask = 0x7, .mult_mask = 0x7ff << 8, .div1_mask = 0x7f, .max_multiplier = 2047, @@ -552,7 +605,22 @@ static void __init of_ti_am3_core_dpll_setup(struct device_node *node) .modes = (1 << DPLL_LOW_POWER_BYPASS) | (1 << DPLL_LOCKED), }; - of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd, 0); + of_ti_dpll_setup(node, &dpll_core_ck_ops, &dd); } CLK_OF_DECLARE(ti_am3_core_dpll_clock, "ti,am3-dpll-core-clock", of_ti_am3_core_dpll_setup); + +static void __init of_ti_omap2_core_dpll_setup(struct device_node *node) +{ + const struct dpll_data dd = { + .enable_mask = 0x3, + .mult_mask = 0x3ff << 12, + .div1_mask = 0xf << 8, + .max_divider = 16, + .min_divider = 1, + }; + + of_ti_dpll_setup(node, &omap2_dpll_core_ck_ops, &dd); +} +CLK_OF_DECLARE(ti_omap2_core_dpll_clock, "ti,omap2-dpll-core-clock", + of_ti_omap2_core_dpll_setup); diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c index 58734817d502..b326d2797feb 100644 --- a/drivers/clk/ti/gate.c +++ b/drivers/clk/ti/gate.c @@ -185,7 +185,7 @@ of_ti_composite_no_wait_gate_clk_setup(struct device_node *node) CLK_OF_DECLARE(ti_composite_no_wait_gate_clk, "ti,composite-no-wait-gate-clock", of_ti_composite_no_wait_gate_clk_setup); -#ifdef CONFIG_ARCH_OMAP3 +#if defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3) static void __init of_ti_composite_interface_clk_setup(struct device_node *node) { _of_ti_composite_gate_clk_setup(node, &clkhwops_iclk_wait); diff --git a/drivers/clk/ti/interface.c b/drivers/clk/ti/interface.c index 320a2b168bb2..9c3e8c4aaa40 100644 --- a/drivers/clk/ti/interface.c +++ b/drivers/clk/ti/interface.c @@ -94,6 +94,7 @@ static void __init of_ti_no_wait_interface_clk_setup(struct device_node *node) CLK_OF_DECLARE(ti_no_wait_interface_clk, "ti,omap3-no-wait-interface-clock", of_ti_no_wait_interface_clk_setup); +#ifdef CONFIG_ARCH_OMAP3 static void __init of_ti_hsotgusb_interface_clk_setup(struct device_node *node) { _of_ti_interface_clk_setup(node, @@ -123,3 +124,13 @@ static void __init of_ti_am35xx_interface_clk_setup(struct device_node *node) } CLK_OF_DECLARE(ti_am35xx_interface_clk, "ti,am35xx-interface-clock", of_ti_am35xx_interface_clk_setup); +#endif + +#ifdef CONFIG_SOC_OMAP2430 +static void __init of_ti_omap2430_interface_clk_setup(struct device_node *node) +{ + _of_ti_interface_clk_setup(node, &clkhwops_omap2430_i2chs_wait); +} +CLK_OF_DECLARE(ti_omap2430_interface_clk, "ti,omap2430-interface-clock", + of_ti_omap2430_interface_clk_setup); +#endif diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 719f6fb5b1c3..74f5788d50b1 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -73,12 +73,10 @@ static int fastsleep_loop(struct cpuidle_device *dev, return index; new_lpcr = old_lpcr; - new_lpcr &= ~(LPCR_MER | LPCR_PECE); /* lpcr[mer] must be 0 */ - - /* exit powersave upon external interrupt, but not decrementer - * interrupt. + /* Do not exit powersave upon decrementer as we've setup the timer + * offload. */ - new_lpcr |= LPCR_PECE0; + new_lpcr &= ~LPCR_PECE1; mtspr(SPRN_LPCR, new_lpcr); power7_sleep(); diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 136d6a283e0a..9634f20e3926 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -187,8 +187,11 @@ static int poll_idle(struct cpuidle_device *dev, t1 = ktime_get(); local_irq_enable(); - while (!need_resched()) - cpu_relax(); + if (!current_set_polling_and_test()) { + while (!need_resched()) + cpu_relax(); + } + current_clr_polling(); t2 = ktime_get(); diff = ktime_to_us(ktime_sub(t2, t1)); diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index f066fa23cc05..02f177aeb16c 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -313,7 +313,7 @@ config CRYPTO_DEV_S5P config CRYPTO_DEV_NX bool "Support for IBM Power7+ in-Nest cryptographic acceleration" - depends on PPC64 && IBMVIO + depends on PPC64 && IBMVIO && !CPU_LITTLE_ENDIAN default n help Support for Power7+ in-Nest cryptographic acceleration. diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index d9c9cb4665db..2ebc9071e354 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2614,7 +2614,7 @@ static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx, &of_flags); - if (!IS_ERR(desc)) + if (!IS_ERR(desc) || (PTR_ERR(desc) == -EPROBE_DEFER)) break; } diff --git a/drivers/hsi/clients/Kconfig b/drivers/hsi/clients/Kconfig index 71b9f9ab86e4..bc60dec3f586 100644 --- a/drivers/hsi/clients/Kconfig +++ b/drivers/hsi/clients/Kconfig @@ -15,7 +15,7 @@ config NOKIA_MODEM config SSI_PROTOCOL tristate "SSI protocol" - depends on HSI && PHONET && (OMAP_SSI=y || OMAP_SSI=m) + depends on HSI && PHONET && OMAP_SSI help If you say Y here, you will enable the SSI protocol aka McSAAB. diff --git a/drivers/hsi/controllers/omap_ssi_port.c b/drivers/hsi/controllers/omap_ssi_port.c index b8693f0b27fe..29aea0b93360 100644 --- a/drivers/hsi/controllers/omap_ssi_port.c +++ b/drivers/hsi/controllers/omap_ssi_port.c @@ -1116,8 +1116,7 @@ static int __init ssi_port_probe(struct platform_device *pd) dev_dbg(&pd->dev, "init ssi port...\n"); - err = ref_module(THIS_MODULE, ssi->owner); - if (err) { + if (!try_module_get(ssi->owner)) { dev_err(&pd->dev, "could not increment parent module refcount (err=%d)\n", err); return -ENODEV; @@ -1254,6 +1253,7 @@ static int __exit ssi_port_remove(struct platform_device *pd) omap_ssi->port[omap_port->port_id] = NULL; platform_set_drvdata(pd, NULL); + module_put(ssi->owner); pm_runtime_disable(&pd->dev); return 0; diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 00343166feb1..08531a128f53 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1124,6 +1124,16 @@ config SENSORS_SHT21 This driver can also be built as a module. If so, the module will be called sht21. +config SENSORS_SHTC1 + tristate "Sensiron humidity and temperature sensors. SHTC1 and compat." + depends on I2C + help + If you say yes here you get support for the Sensiron SHTC1 and SHTW1 + humidity and temperature sensors. + + This driver can also be built as a module. If so, the module + will be called shtc1. + config SENSORS_S3C tristate "Samsung built-in ADC" depends on S3C_ADC diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 11798ad7e801..3dc0f02f71d2 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -126,6 +126,7 @@ obj-$(CONFIG_SENSORS_SCH5627) += sch5627.o obj-$(CONFIG_SENSORS_SCH5636) += sch5636.o obj-$(CONFIG_SENSORS_SHT15) += sht15.o obj-$(CONFIG_SENSORS_SHT21) += sht21.o +obj-$(CONFIG_SENSORS_SHTC1) += shtc1.o obj-$(CONFIG_SENSORS_SIS5595) += sis5595.o obj-$(CONFIG_SENSORS_SMM665) += smm665.o obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c index 6edce42c61d5..2ae8a304b5ef 100644 --- a/drivers/hwmon/atxp1.c +++ b/drivers/hwmon/atxp1.c @@ -45,30 +45,6 @@ MODULE_AUTHOR("Sebastian Witt <se.witt@gmx.net>"); static const unsigned short normal_i2c[] = { 0x37, 0x4e, I2C_CLIENT_END }; -static int atxp1_probe(struct i2c_client *client, - const struct i2c_device_id *id); -static int atxp1_remove(struct i2c_client *client); -static struct atxp1_data *atxp1_update_device(struct device *dev); -static int atxp1_detect(struct i2c_client *client, struct i2c_board_info *info); - -static const struct i2c_device_id atxp1_id[] = { - { "atxp1", 0 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, atxp1_id); - -static struct i2c_driver atxp1_driver = { - .class = I2C_CLASS_HWMON, - .driver = { - .name = "atxp1", - }, - .probe = atxp1_probe, - .remove = atxp1_remove, - .id_table = atxp1_id, - .detect = atxp1_detect, - .address_list = normal_i2c, -}; - struct atxp1_data { struct device *hwmon_dev; struct mutex update_lock; @@ -386,4 +362,22 @@ static int atxp1_remove(struct i2c_client *client) return 0; }; +static const struct i2c_device_id atxp1_id[] = { + { "atxp1", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, atxp1_id); + +static struct i2c_driver atxp1_driver = { + .class = I2C_CLASS_HWMON, + .driver = { + .name = "atxp1", + }, + .probe = atxp1_probe, + .remove = atxp1_remove, + .id_table = atxp1_id, + .detect = atxp1_detect, + .address_list = normal_i2c, +}; + module_i2c_driver(atxp1_driver); diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c index 93d26e8af3e2..bfd3f3eeabcd 100644 --- a/drivers/hwmon/ina2xx.c +++ b/drivers/hwmon/ina2xx.c @@ -148,7 +148,8 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg) switch (reg) { case INA2XX_SHUNT_VOLTAGE: - val = DIV_ROUND_CLOSEST(data->regs[reg], + /* signed register */ + val = DIV_ROUND_CLOSEST((s16)data->regs[reg], data->config->shunt_div); break; case INA2XX_BUS_VOLTAGE: @@ -160,8 +161,8 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg) val = data->regs[reg] * data->config->power_lsb; break; case INA2XX_CURRENT: - /* LSB=1mA (selected). Is in mA */ - val = data->regs[reg]; + /* signed register, LSB=1mA (selected), in mA */ + val = (s16)data->regs[reg]; break; default: /* programmer goofed */ diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c index bed4af358308..b0129a54e1a6 100644 --- a/drivers/hwmon/lm85.c +++ b/drivers/hwmon/lm85.c @@ -5,7 +5,7 @@ * Copyright (c) 2002, 2003 Philip Pokorny <ppokorny@penguincomputing.com> |