aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/cpu_setup_a2.S6
-rw-r--r--arch/powerpc/kernel/dma-iommu.c1
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c1
-rw-r--r--arch/powerpc/kernel/dma.c46
-rw-r--r--arch/powerpc/kernel/entry_32.S34
-rw-r--r--arch/powerpc/kernel/entry_64.S134
-rw-r--r--arch/powerpc/kernel/epapr_hcalls.S25
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c52
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S10
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S3
-rw-r--r--arch/powerpc/kernel/fpu.S16
-rw-r--r--arch/powerpc/kernel/ftrace.c92
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S25
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c2
-rw-r--r--arch/powerpc/kernel/idle_6xx.S4
-rw-r--r--arch/powerpc/kernel/idle_book3e.S2
-rw-r--r--arch/powerpc/kernel/idle_e500.S4
-rw-r--r--arch/powerpc/kernel/idle_power4.S2
-rw-r--r--arch/powerpc/kernel/iommu.c291
-rw-r--r--arch/powerpc/kernel/irq.c50
-rw-r--r--arch/powerpc/kernel/kvm.c30
-rw-r--r--arch/powerpc/kernel/kvm_emul.S12
-rw-r--r--arch/powerpc/kernel/misc_32.S4
-rw-r--r--arch/powerpc/kernel/misc_64.S9
-rw-r--r--arch/powerpc/kernel/pci-common.c28
-rw-r--r--arch/powerpc/kernel/pci_64.c2
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c3
-rw-r--r--arch/powerpc/kernel/prom_init.c4
-rw-r--r--arch/powerpc/kernel/rtas_flash.c2
-rw-r--r--arch/powerpc/kernel/setup-common.c27
-rw-r--r--arch/powerpc/kernel/setup_32.c24
-rw-r--r--arch/powerpc/kernel/smp.c5
-rw-r--r--arch/powerpc/kernel/vdso.c28
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile4
-rw-r--r--arch/powerpc/kernel/vdso32/getcpu.S45
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S3
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile2
-rw-r--r--arch/powerpc/kernel/vdso64/getcpu.S45
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64.lds.S1
-rw-r--r--arch/powerpc/kernel/vio.c48
42 files changed, 778 insertions, 350 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 83afacd3ba7..bb282dd8161 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -128,6 +128,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
obj-y += ppc_save_regs.o
endif
+obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
# Disable GCOV in odd or sensitive code
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 52c7ad78242..85b05c463fa 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -533,6 +533,7 @@ int main(void)
HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);
HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
+ HSTATE_FIELD(HSTATE_SPRG3, sprg3);
HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
HSTATE_FIELD(HSTATE_NAPPING, napping);
diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S
index ebc62f42a23..61f079e05b6 100644
--- a/arch/powerpc/kernel/cpu_setup_a2.S
+++ b/arch/powerpc/kernel/cpu_setup_a2.S
@@ -100,19 +100,19 @@ _icswx_skip_guest:
lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h
mtspr SPRN_MMUCR0, r4
li r4,A2_IERAT_SIZE-1
- PPC_ERATWE(r4,r4,3)
+ PPC_ERATWE(R4,R4,3)
/* Now set the D-ERAT watermark to 31 */
lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h
mtspr SPRN_MMUCR0, r4
li r4,A2_DERAT_SIZE-1
- PPC_ERATWE(r4,r4,3)
+ PPC_ERATWE(R4,R4,3)
/* And invalidate the beast just in case. That won't get rid of
* a bolted entry though it will be in LRU and so will go away eventually
* but let's not bother for now
*/
- PPC_ERATILX(0,0,0)
+ PPC_ERATILX(0,0,R0)
1:
blr
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index bcfdcd22c76..2d7bb8ced13 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -109,6 +109,7 @@ static u64 dma_iommu_get_required_mask(struct device *dev)
struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
+ .mmap = dma_direct_mmap_coherent,
.map_sg = dma_iommu_map_sg,
.unmap_sg = dma_iommu_unmap_sg,
.dma_supported = dma_iommu_dma_supported,
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 4ab88dafb23..46943651da2 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -49,6 +49,7 @@ static u64 swiotlb_powerpc_get_required(struct device *dev)
struct dma_map_ops swiotlb_dma_ops = {
.alloc = dma_direct_alloc_coherent,
.free = dma_direct_free_coherent,
+ .mmap = dma_direct_mmap_coherent,
.map_sg = swiotlb_map_sg_attrs,
.unmap_sg = swiotlb_unmap_sg_attrs,
.dma_supported = swiotlb_dma_supported,
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index b1ec983dcec..355b9d84b0f 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -11,6 +11,8 @@
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/export.h>
+#include <linux/pci.h>
+#include <asm/vio.h>
#include <asm/bug.h>
#include <asm/abs_addr.h>
#include <asm/machdep.h>
@@ -65,6 +67,24 @@ void dma_direct_free_coherent(struct device *dev, size_t size,
#endif
}
+int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t handle, size_t size,
+ struct dma_attrs *attrs)
+{
+ unsigned long pfn;
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
+#else
+ pfn = page_to_pfn(virt_to_page(cpu_addr));
+#endif
+ return remap_pfn_range(vma, vma->vm_start,
+ pfn + vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction direction,
struct dma_attrs *attrs)
@@ -154,6 +174,7 @@ static inline void dma_direct_sync_single(struct device *dev,
struct dma_map_ops dma_direct_ops = {
.alloc = dma_direct_alloc_coherent,
.free = dma_direct_free_coherent,
+ .mmap = dma_direct_mmap_coherent,
.map_sg = dma_direct_map_sg,
.unmap_sg = dma_direct_unmap_sg,
.dma_supported = dma_direct_dma_supported,
@@ -205,26 +226,15 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask);
static int __init dma_init(void)
{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+ dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+#ifdef CONFIG_PCI
+ dma_debug_add_bus(&pci_bus_type);
+#endif
+#ifdef CONFIG_IBMVIO
+ dma_debug_add_bus(&vio_bus_type);
+#endif
return 0;
}
fs_initcall(dma_init);
-int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t handle, size_t size)
-{
- unsigned long pfn;
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
-#else
- pfn = page_to_pfn(virt_to_page(cpu_addr));
-#endif
- return remap_pfn_range(vma, vma->vm_start,
- pfn + vma->vm_pgoff,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
-}
-EXPORT_SYMBOL_GPL(dma_mmap_coherent);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index ba3aeb4bc06..ead5016b02d 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -89,6 +89,10 @@ crit_transfer_to_handler:
mfspr r0,SPRN_SRR1
stw r0,_SRR1(r11)
+ /* set the stack limit to the current stack
+ * and set the limit to protect the thread_info
+ * struct
+ */
mfspr r8,SPRN_SPRG_THREAD
lwz r0,KSP_LIMIT(r8)
stw r0,SAVED_KSP_LIMIT(r11)
@@ -109,6 +113,10 @@ crit_transfer_to_handler:
mfspr r0,SPRN_SRR1
stw r0,crit_srr1@l(0)
+ /* set the stack limit to the current stack
+ * and set the limit to protect the thread_info
+ * struct
+ */
mfspr r8,SPRN_SPRG_THREAD
lwz r0,KSP_LIMIT(r8)
stw r0,saved_ksp_limit@l(0)
@@ -158,7 +166,7 @@ transfer_to_handler:
tophys(r11,r11)
addi r11,r11,global_dbcr0@l
#ifdef CONFIG_SMP
- rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r9, r1)
lwz r9,TI_CPU(r9)
slwi r9,r9,3
add r11,r11,r9
@@ -179,7 +187,7 @@ transfer_to_handler:
ble- stack_ovf /* then the kernel stack overflowed */
5:
#if defined(CONFIG_6xx) || defined(CONFIG_E500)
- rlwinm r9,r1,0,0,31-THREAD_SHIFT
+ CURRENT_THREAD_INFO(r9, r1)
tophys(r9,r9) /* check local flags */
lwz r12,TI_LOCAL_FLAGS(r9)
mtcrf 0x01,r12
@@ -226,13 +234,7 @@ reenable_mmu: /* re-enable mmu so we can */
stw r3,16(r1)
stw r4,20(r1)
stw r5,24(r1)
- andi. r12,r12,MSR_PR
- b 11f
- bl trace_hardirqs_off
- b 12f
-11:
bl trace_hardirqs_off
-12:
lwz r5,24(r1)
lwz r4,20(r1)
lwz r3,16(r1)
@@ -333,7 +335,7 @@ _GLOBAL(DoSyscall)
mtmsr r11
1:
#endif /* CONFIG_TRACE_IRQFLAGS */
- rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+ CURRENT_THREAD_INFO(r10, r1)
lwz r11,TI_FLAGS(r10)
andi. r11,r11,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
@@ -354,7 +356,7 @@ ret_from_syscall:
bl do_show_syscall_exit
#endif
mr r6,r3
- rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+ CURRENT_THREAD_INFO(r12, r1)
/* disable interrupts so current_thread_info()->flags can't change */
LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
@@ -815,7 +817,7 @@ ret_from_except:
user_exc_return: /* r10 contains MSR_KERNEL here */
/* Check current_thread_info()->flags */
- rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r9, r1)
lwz r9,TI_FLAGS(r9)
andi. r0,r9,_TIF_USER_WORK_MASK
bne do_work
@@ -835,7 +837,7 @@ restore_user:
/* N.B. the only way to get here is from the beq following ret_from_except. */
resume_kernel:
/* check current_thread_info->preempt_count */
- rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r9, r1)
lwz r0,TI_PREEMPT(r9)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
bne restore
@@ -852,7 +854,7 @@ resume_kernel:
bl trace_hardirqs_off
#endif
1: bl preempt_schedule_irq
- rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r9, r1)
lwz r3,TI_FLAGS(r9)
andi. r0,r3,_TIF_NEED_RESCHED
bne- 1b
@@ -1122,7 +1124,7 @@ ret_from_debug_exc:
lwz r10,SAVED_KSP_LIMIT(r1)
stw r10,KSP_LIMIT(r9)
lwz r9,THREAD_INFO-THREAD(r9)
- rlwinm r10,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r10, r1)
lwz r10,TI_PREEMPT(r10)
stw r10,TI_PREEMPT(r9)
RESTORE_xSRR(SRR0,SRR1);
@@ -1156,7 +1158,7 @@ load_dbcr0:
lis r11,global_dbcr0@ha
addi r11,r11,global_dbcr0@l
#ifdef CONFIG_SMP
- rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r9, r1)
lwz r9,TI_CPU(r9)
slwi r9,r9,3
add r11,r11,r9
@@ -1197,7 +1199,7 @@ recheck:
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
- rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r9, r1)
lwz r9,TI_FLAGS(r9)
andi. r0,r9,_TIF_NEED_RESCHED
bne- do_resched
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ed1718feb9d..4b01a25e29e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -146,7 +146,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
REST_2GPRS(7,r1)
addi r9,r1,STACK_FRAME_OVERHEAD
#endif
- clrrdi r11,r1,THREAD_SHIFT
+ CURRENT_THREAD_INFO(r11, r1)
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
@@ -181,7 +181,7 @@ syscall_exit:
bl .do_show_syscall_exit
ld r3,RESULT(r1)
#endif
- clrrdi r12,r1,THREAD_SHIFT
+ CURRENT_THREAD_INFO(r12, r1)
ld r8,_MSR(r1)
#ifdef CONFIG_PPC_BOOK3S
@@ -197,7 +197,16 @@ syscall_exit:
wrteei 0
#else
ld r10,PACAKMSR(r13)
- mtmsrd r10,1
+ /*
+ * For performance reasons we clear RI the same time that we
+ * clear EE. We only need to clear RI just before we restore r13
+ * below, but batching it with EE saves us one expensive mtmsrd call.
+ * We have to be careful to restore RI if we branch anywhere from
+ * here (eg syscall_exit_work).
+ */
+ li r9,MSR_RI
+ andc r11,r10,r9
+ mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
ld r9,TI_FLAGS(r12)
@@ -214,17 +223,6 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- li r12,MSR_RI
- andc r11,r10,r12
- mtmsrd r11,1 /* clear MSR.RI */
-#endif /* CONFIG_PPC_BOOK3S */
beq- 1f
ACCOUNT_CPU_USER_EXIT(r11, r12)
@@ -262,7 +260,7 @@ syscall_dotrace:
ld r7,GPR7(r1)
ld r8,GPR8(r1)
addi r9,r1,STACK_FRAME_OVERHEAD
- clrrdi r10,r1,THREAD_SHIFT
+ CURRENT_THREAD_INFO(r10, r1)
ld r10,TI_FLAGS(r10)
b .Lsyscall_dotrace_cont
@@ -271,6 +269,9 @@ syscall_enosys:
b syscall_exit
syscall_exit_work:
+#ifdef CONFIG_PPC_BOOK3S
+ mtmsrd r10,1 /* Restore RI */
+#endif
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
If TIF_NOERROR is set, just save r3 as it is. */
@@ -499,7 +500,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
2:
#endif /* !CONFIG_PPC_BOOK3S */
- clrrdi r7,r8,THREAD_SHIFT /* base of new stack */
+ CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
because we don't need to leave the 288-byte ABI gap at the
top of the kernel stack. */
@@ -558,27 +559,54 @@ _GLOBAL(ret_from_except_lite)
mtmsrd r10,1 /* Update machine state */
#endif /* CONFIG_PPC_BOOK3E */
-#ifdef CONFIG_PREEMPT
- clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */
- li r0,_TIF_NEED_RESCHED /* bits to check */
+ CURRENT_THREAD_INFO(r9, r1)
ld r3,_MSR(r1)
ld r4,TI_FLAGS(r9)
- /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
- rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
- and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */
- bne do_work
-
-#else /* !CONFIG_PREEMPT */
- ld r3,_MSR(r1) /* Returning to user mode? */
andi. r3,r3,MSR_PR
- beq restore /* if not, just restore regs and return */
+ beq resume_kernel
/* Check current_thread_info()->flags */
- clrrdi r9,r1,THREAD_SHIFT
- ld r4,TI_FLAGS(r9)
andi. r0,r4,_TIF_USER_WORK_MASK
- bne do_work
-#endif /* !CONFIG_PREEMPT */
+ beq restore
+
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq 1f
+ bl .restore_interrupts
+ bl .schedule
+ b .ret_from_except_lite
+
+1: bl .save_nvgprs
+ bl .restore_interrupts
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .do_notify_resume
+ b .ret_from_except
+
+resume_kernel:
+#ifdef CONFIG_PREEMPT
+ /* Check if we need to preempt */
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq+ restore
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+ lwz r8,TI_PREEMPT(r9)
+ cmpwi cr1,r8,0
+ ld r0,SOFTE(r1)
+ cmpdi r0,0
+ crandc eq,cr1*4+eq,eq
+ bne restore
+
+ /*
+ * Here we are preempting the current task. We want to make
+ * sure we are soft-disabled first
+ */
+ SOFT_DISABLE_INTS(r3,r4)
+1: bl .preempt_schedule_irq
+
+ /* Re-test flags and eventually loop */
+ CURRENT_THREAD_INFO(r9, r1)
+ ld r4,TI_FLAGS(r9)
+ andi. r0,r4,_TIF_NEED_RESCHED
+ bne 1b
+#endif /* CONFIG_PREEMPT */
.globl fast_exc_return_irq
fast_exc_return_irq:
@@ -759,50 +787,6 @@ restore_check_irq_replay:
#endif /* CONFIG_PPC_BOOK3E */
1: b .ret_from_except /* What else to do here ? */
-
-
-3:
-do_work:
-#ifdef CONFIG_PREEMPT
- andi. r0,r3,MSR_PR /* Returning to user mode? */
- bne user_work
- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
- cmpwi cr1,r8,0
- ld r0,SOFTE(r1)
- cmpdi r0,0
- crandc eq,cr1*4+eq,eq
- bne restore
-
- /*
- * Here we are preempting the current task. We want to make
- * sure we are soft-disabled first
- */
- SOFT_DISABLE_INTS(r3,r4)
-1: bl .preempt_schedule_irq
-
- /* Re-test flags and eventually loop */
- clrrdi r9,r1,THREAD_SHIFT
- ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_NEED_RESCHED
- bne 1b
- b restore
-
-user_work:
-#endif /* CONFIG_PREEMPT */
-
- andi. r0,r4,_TIF_NEED_RESCHED
- beq 1f
- bl .restore_interrupts
- bl .schedule
- b .ret_from_except_lite
-
-1: bl .save_nvgprs
- bl .restore_interrupts
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_notify_resume
- b .ret_from_except
-
unrecov_restore:
addi r3,r1,STACK_FRAME_OVERHEAD
bl .unrecoverable_exception
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
new file mode 100644
index 00000000000..697b390ebfd
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/* Hypercall entry point. Will be patched with device tree instructions. */
+.global epapr_hypercall_start
+epapr_hypercall_start:
+ li r3, -1
+ nop
+ nop
+ nop
+ blr
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
new file mode 100644
index 00000000000..028aeae370b
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -0,0 +1,52 @@
+/*
+ * ePAPR para-virtualization support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/of.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+
+bool epapr_paravirt_enabled;
+
+static int __init epapr_paravirt_init(void)
+{
+ struct device_node *hyper_node;
+ const u32 *insts;
+ int len, i;
+
+ hyper_node = of_find_node_by_path("/hypervisor");
+ if (!hyper_node)
+ return -ENODEV;
+
+ insts = of_get_property(hyper_node, "hcall-instructions", &len);
+ if (!insts)
+ return -ENODEV;
+
+ if (len % 4 || len > (4 * 4))
+ return -ENODEV;
+
+ for (i = 0; i < (len / 4); i++)
+ patch_instruction(epapr_hypercall_start + i, insts[i]);
+
+ epapr_paravirt_enabled = true;
+
+ return 0;
+}
+
+early_initcall(epapr_paravirt_init);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 7215cc2495d..98be7f0cd22 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -222,7 +222,7 @@ exc_##n##_bad_stack: \
* interrupts happen before the wait instruction.
*/
#define CHECK_NAPPING() \
- clrrdi r11,r1,THREAD_SHIFT; \
+ CURRENT_THREAD_INFO(r11, r1); \
ld r10,TI_LOCAL_FLAGS(r11); \
andi. r9,r10,_TLF_NAPPING; \
beq+ 1f; \
@@ -903,7 +903,7 @@ skpinv: addi r6,r6,1 /* Increment */
bne 1b /* If not, repeat */
/* Invalidate all TLBs */
- PPC_TLBILX_ALL(0,0)
+ PPC_TLBILX_ALL(0,R0)
sync
isync
@@ -961,7 +961,7 @@ skpinv: addi r6,r6,1 /* Increment */
tlbwe
/* Invalidate TLB1 */
- PPC_TLBILX_ALL(0,0)
+ PPC_TLBILX_ALL(0,R0)
sync
isync
@@ -1020,7 +1020,7 @@ skpinv: addi r6,r6,1 /* Increment */
tlbwe
/* Invalidate TLB1 */
- PPC_TLBILX_ALL(0,0)
+ PPC_TLBILX_ALL(0,R0)
sync
isync
@@ -1138,7 +1138,7 @@ a2_tlbinit_after_iprot_flush:
tlbwe
#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */
- PPC_TLBILX(0,0,0)
+ PPC_TLBILX(0,0,R0)
sync
isync
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1c06d297154..e894515e77b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -239,6 +239,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
* out of line to handle them
*/
. = 0xe00
+hv_exception_trampoline:
b h_data_storage_hv
. = 0xe20
b h_instr_storage_hv
@@ -851,7 +852,7 @@ BEGIN_FTR_SECTION
bne- do_ste_alloc /* If so handle it */
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
- clrrdi r11,r1,THREAD_SHIFT
+ CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
bne 77f /* then don't call hash_page now */
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index de369558bf0..e0ada05f2df 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -26,7 +26,7 @@
#include <asm/ptrace.h>
#ifdef CONFIG_VSX
-#define REST_32FPVSRS(n,c,base) \
+#define __REST_32FPVSRS(n,c,base) \
BEGIN_FTR_SECTION \
b 2f; \
END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
@@ -35,7 +35,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
2: REST_32VSRS(n,c,base); \
3:
-#define SAVE_32FPVSRS(n,c,base) \
+#define __SAVE_32FPVSRS(n,c,base) \
BEGIN_FTR_SECTION \
b 2f; \
END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
@@ -44,9 +44,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
2: SAVE_32VSRS(n,c,base); \
3:
#else
-#define REST_32FPVSRS(n,b,base) REST_32FPRS(n, base)
-#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base)
+#define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base)
+#define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base)
#endif
+#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
+#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
/*
* This task wants to use the FPU now.
@@ -79,7 +81,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
beq 1f
toreal(r4)
addi r4,r4,THREAD /* want last_task_used_math->thread */
- SAVE_32FPVSRS(0, r5, r4)
+ SAVE_32FPVSRS(0, R5, R4)
mffs fr0
stfd fr0,THREAD_FPSCR(r4)
PPC_LL r5,PT_REGS(r4)
@@ -106,7 +108,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
lfd fr0,THREAD_FPSCR(r5)
MTFSF_L(fr0)
- REST_32FPVSRS(0, r4, r5)
+ REST_32FPVSRS(0, R4, R5)
#ifndef CONFIG_SMP
subi r4,r5,THREAD
fromreal(r4)
@@ -140,7 +142,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
addi r3,r3,THREAD /* want THREAD of task */
PPC_LL r5,PT_REGS(r3)
PPC_LCMPI 0,r5,0
- SAVE_32FPVSRS(0, r4 ,r3)
+ SAVE_32FPVSRS(0, R4 ,R3)
mffs fr0
stfd fr0,THREAD_FPSCR(r3)
beq 1f
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index bf99cfa6bbf..1fb78561096 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -63,11 +63,9 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
return -EINVAL;
/* replace the text with the new text */
- if (probe_kernel_write((void *)ip, &new, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, new))
return -EPERM;
- flush_icache_range(ip, ip + 8);
-
return 0;
}
@@ -212,12 +210,9 @@ __ftrace_make_nop(struct module *mod,
*/
op = 0x48000008; /* b +8 */
- if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, op))
return -EPERM;
-
- flush_icache_range(ip, ip + 8);
-
return 0;
}
@@ -245,9 +240,9 @@ __ftrace_make_nop(struct module *mod,
/*
* On PPC32 the trampoline looks like:
- * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha
- * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l
- * 0x7d, 0x69, 0x03, 0xa6 mtctr r11
+ * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha
+ * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l
+ * 0x7d, 0x89, 0x03, 0xa6 mtctr r12
* 0x4e, 0x80, 0x04, 0x20 bctr
*/
@@ -262,9 +257,9 @@ __ftrace_make_nop(struct module *mod,
pr_devel(" %08x %08x ", jmp[0], jmp[1]);
/* verify that this is what we expect it to be */
- if (((jmp[0] & 0xffff0000) != 0x3d600000) ||
- ((jmp[1] & 0xffff0000) != 0x396b0000) ||
- (jmp[2] != 0x7d6903a6) ||
+ if (((jmp[0] & 0xffff0000) != 0x3d800000) ||
+ ((jmp[1] & 0xffff0000) != 0x398c0000) ||
+ (jmp[2] != 0x7d8903a6) ||
(jmp[3] != 0x4e800420)) {
printk(KERN_ERR "Not a trampoline\n");
return -EINVAL;
@@ -286,11 +281,9 @@ __ftrace_make_nop(struct module *mod,
op = PPC_INST_NOP;
- if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, op))
return -EPERM;
- flush_icache_range(ip, ip + 8);
-
return 0;
}
#endif /* PPC64 */
@@ -426,11 +419,9 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
pr_devel("write to %lx\n", rec->ip);
- if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE))
+ if (patch_instruction((unsigned int *)ip, op))
return -EPERM;
- flush_icache_range(ip, ip + 8);
-
return 0;
}
#endif /* CONFIG_PPC64 */
@@ -484,6 +475,58 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ret;
}
+static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR;
+ int ret;
+
+ ret = ftrace_update_record(rec, enable);
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+ case FTRACE_UPDATE_MAKE_CALL:
+ return ftrace_make_call(rec, ftrace_addr);
+ case FTRACE_UPDATE_MAKE_NOP:
+ return ftrace_make_nop(NULL, rec, ftrace_addr);
+ }
+
+ return 0;
+}
+
+void ftrace_replace_code(int enable)
+{
+ struct ftrace_rec_iter *iter;
+ struct dyn_ftrace *rec;
+ int ret;
+
+ for (iter = ftrace_rec_iter_start(); iter;
+ iter = ftrace_rec_iter_next(iter)) {
+ rec = ftrace_rec_iter_record(iter);
+ ret = __ftrace_replace_code(rec, enable);
+ if (ret) {
+ ftrace_bug(ret, rec->ip);
+ return;
+ }
+ }
+}
+
+void arch_ftrace_update_code(int command)
+{
+ if (command & FTRACE_UPDATE_CALLS)
+ ftrace_replace_code(1);
+ else if (command & FTRACE_DISABLE_CALLS)
+ ftrace_replace_code(0);
+
+ if (command & FTRACE_UPDATE_TRACE_FUNC)
+ ftrace_update_ftrace_func(ftrace_trace_function);
+
+ if (command & FTRACE_START_FUNC_RET)
+ ftrace_enable_ftrace_graph_caller();
+ else if (command & FTRACE_STOP_FUNC_RET)
+ ftrace_disable_ftrace_graph_caller();
+}
+
int __init ftrace_dyn_arch_init(void *data)
{
/* caller expects data to be zero */
@@ -587,18 +630,17 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
- if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY) {
- *parent = old;
- return;
- }
-
trace.func = self_addr;
+ trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
*parent = old;
+ return;
}
+
+ if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY)
+ *parent = old;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 1f4434a3860..0f59863c3ad 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -192,7 +192,7 @@ _ENTRY(__early_start)
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
- rlwinm r22,r1,0,0,31-THREAD_SHIFT /* current thread_info */
+ CURRENT_THREAD_INFO(r22, r1)
stw r24, TI_CPU(r22)
bl early_init
@@ -556,8 +556,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
/* SPE Unavailable */
START_EXCEPTION(SPEUnavailable)
NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
- bne load_up_spe
- addi r3,r1,STACK_FRAME_OVERHEAD
+ beq 1f
+ bl load_up_spe
+ b fast_exception_return
+1: addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_EE_LITE(0x2010, KernelSPE)
#else
EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
@@ -778,7 +780,7 @@ tlb_write_entry:
/* Note that the SPE support is closely modeled after the AltiVec
* support. Changes to one are likely to be applicable to the
* other! */
-load_up_spe:
+_GLOBAL(load_up_spe)
/*
* Disable SPE for the task which had SPE previously,
* and save its SPE registers in its thread_struct.
@@ -826,20 +828,7 @@ load_up_spe:
subi r4,r5,THREAD
stw r4,last_task_used_spe@l(r3)
#endif /* !CONFIG_SMP */
- /* restore registers and return */
-2: REST_4GPRS(3, r11)
- lwz r10,_CCR(r11)
- REST_GPR(1, r11)
- mtcr r10
- lwz r10,_LINK(r11)
- mtlr r10
- REST_GPR(10, r11)
- mtspr SPRN_SRR1,r9
- mtspr SPRN_SRR0,r12
- REST_GPR(9, r11)
- REST_GPR(12, r11)
- lwz r11,GPR11(r11)
- rfi
+ blr
/*
* SPE unavailable trap from kernel - print a message, but let
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 2bc0584be81..f3a82dde61d 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -111,7 +111,7 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp)
* and the single_step_dabr_instruction(), then cleanup the breakpoint
* restoration variables to prevent dangling pointers.
*/
- if (bp->ctx->task)
+ if (bp->ctx && bp->ctx->task)
bp->ctx->task->thread.last_hit_ubp = NULL;
}
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 15c611de1ee..1686916cc7f 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -135,7 +135,7 @@ BEGIN_FTR_SECTION
DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- rlwinm r9,r1,0,0,31-THREAD_SHIFT /* current thread_info */
+ CURRENT_THREAD_INFO(r9, r1)
lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
@@ -158,7 +158,7 @@ _GLOBAL(power_save_ppc32_restore)
stw r9,_NIP(r11) /* make it do a blr */
#ifdef CONFIG_SMP
- rlwinm r12,r11,0,0,31-THREAD_SHIFT
+ CURRENT_THREAD_INFO(r12, r11)
lwz r11,TI_CPU(r12) /* get cpu number * 4 */
slwi r11,r11,2
#else
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index ff007b59448..4c7cb400858 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -60,7 +60,7 @@ _GLOBAL(book3e_idle)
1: /* Let's set the _TLF_NAPPING flag so interrupts make us return
* to the right spot
*/
- clrrdi r11,r1,THREAD_SHIFT
+ CURRENT_THREAD_INFO(r11, r1)
ld r10,TI_LOCAL_FLAGS(r11)
ori r10,r10,_TLF_NAPPING
std r10,TI_LOCAL_FLAGS(r11)
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
index 4f0ab85f378..15448668988 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -21,7 +21,7 @@
.text
_GLOBAL(e500_idle)
- rlwinm r3,r1,0,0,31-THREAD_SHIFT /* current thread_info */
+ CURRENT_THREAD_INFO(r3, r1)
lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */
ori r4,r4,_TLF_NAPPING /* so when we take an exception */
stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */
@@ -96,7 +96,7 @@ _GLOBAL(power_save_ppc32_restore)
stw r9,_NIP(r11) /* make it do a blr */
#ifdef CONFIG_SMP
- rlwinm r12,r1,0,0,31-THREAD_SHIFT
+ CURRENT_THREAD_INFO(r12, r1)
lwz r11,TI_CPU(r12) /* get cpu number * 4 */
slwi r11,r11,2
#else
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index 2c71b0fc9f9..e3edaa18991 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -59,7 +59,7 @@ BEGIN_FTR_SECTION
DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- clrrdi r9,r1,THREAD_SHIFT /* current thread_info */
+ CURRENT_THREAD_INFO(r9, r1)
ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 359f078571c..ff5a6ce027b 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -33,6 +33,9 @@
#include <linux/bitmap.h>
#include <linux/iommu-helper.h>
#include <linux/crash_dump.h>
+#include <linux/hash.h>
+#include <linux/fault-inject.h>
+#include <linux/pci.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/iommu.h>
@@ -40,6 +43,7 @@
#include <asm/machdep.h>
#include <asm/kdump.h>
#include <asm/fadump.h>
+#include <asm/vio.h>
#define DBG(...)
@@ -58,6 +62,114 @@ static int __init setup_iommu(char *str)
__setup("iommu=", setup_iommu);
+static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
+
+/*
+ * We precalculate the hash to avoid doing it on every allocation.
+ *
+ * The hash is important to spread CPUs across all the pools. For example,
+ * on a POWER7 with 4 way SMT we want interrupts on the primary threads and
+ * with 4 pools all primary threads would map to the same pool.
+ */
+static int __init setup_iommu_pool_hash(void)
+{
+ unsigned int i;
+
+ for_each_possible_cpu(i)
+ per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
+
+ return 0;
+}
+subsys_initcall(setup_iommu_pool_hash);
+
+#ifdef CONFIG_FAIL_IOMMU
+
+static DECLARE_FAULT_ATTR(fail_iommu);
+
+static int __init setup_fail_iommu(char *str)
+{
+ return setup_fault_attr(&fail_iommu, str);
+}
+__setup("fail_iommu=", setup_fail_iommu);
+
+static bool should_fail_iommu(struct device *dev)
+{
+ return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1);
+}
+
+static int __init fail_iommu_debugfs(void)
+{
+ struct dentry *dir = fault_create_debugfs_attr("fail_iommu",
+ NULL, &fail_iommu);
+
+ return IS_ERR(dir) ? PTR_ERR(dir) : 0;
+}
+late_initcall(fail_iommu_debugfs);
+
+static ssize_t fail_iommu_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", dev->archdata.fail_iommu);
+}
+
+static ssize_t fail_iommu_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ int i;
+
+ if (count > 0 && sscanf(buf, "%d", &i) > 0)
+ dev->archdata.fail_iommu = (i == 0) ? 0 : 1;
+
+ return count;
+}
+
+static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show,
+ fail_iommu_store);
+
+static int fail_iommu_bus_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ if (action == BUS_NOTIFY_ADD_DEVICE) {
+ if (device_create_file(dev, &dev_attr_fail_iommu))
+ pr_warn("Unable to create IOMMU fault injection sysfs "
+ "entries\n");
+ } else if (action == BUS_NOTIFY_DEL_DEVICE) {
+ device_remove_file(dev, &dev_attr_fail_iommu);
+ }
+
+ return 0;
+}
+
+static struct notifier_block fail_iommu_bus_notifier = {
+ .notifier_call = fail_iommu_bus_notify
+};
+
+static int __init fail_iommu_setup(void)
+{
+#ifdef CONFIG_PCI
+ bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
+#endif
+#ifdef CONFIG_IBMVIO
+ bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
+#endif
+
+ return 0;
+}
+/*
+ * Must execute after PCI and VIO subsystem have initialised but before
+ * devices are probed.
+ */
+arch_initcall(fail_iommu_setup);
+#else
+static inline bool should_fail_iommu(struct device *dev)
+{
+ return false;
+}
+#endif
+
static unsigned long iommu_range_alloc(struct device *dev,
struct iommu_table *tbl,
unsigned long npages,
@@ -71,6 +183,9 @@ static unsigned long iommu_range_alloc(struct device *dev,
int pass = 0;
unsigned long align_mask;
unsigned long boundary_size;
+ unsigned long flags;
+ unsigned int pool_nr;
+ struct iommu_pool *pool;
align_mask = 0xffffffffffffffffl >> (64 - align_order);
@@ -83,36 +198,49 @@ static unsigned long iommu_range_alloc(struct device *dev,
return DMA_ERROR_CODE;
}
- if (handle && *handle)
- start = *handle;
+ if (should_fail_iommu(dev))
+ return DMA_ERROR_CODE;
+
+ /*
+ * We don't need to disable preemption here because any CPU can
+ * safely use any IOMMU pool.
+ */
+ pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1);
+
+ if (largealloc)
+ pool = &(tbl->large_pool);
else
- start = largealloc ? tbl->it_largehint : tbl->it_hint;
+ pool = &(tbl->pools[pool_nr]);
- /* Use only half of the table for small allocs (15 pages or less) */
- limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
+ spin_lock_irqsave(&(pool->lock), flags);
- if (largealloc && start < tbl->it_halfpoint)
- start = tbl->it_halfpoint;
+again:
+ if ((pass == 0) && handle && *handle)
+ start = *handle;
+ else
+ start = pool->hint;
+
+ limit = pool->end;
/* The case below can happen if we have a small segment appended
* to a large, or when the previous alloc was at the very end of
* the available space. If so, go back to the initial start.
*/
if (start >= limit)
- start = largealloc ? tbl->it_largehint : tbl->it_hint;
-
- again:
+ start = pool->start;
if (limit + tbl->it_offset > mask) {
limit = mask - tbl->it_offset + 1;
/* If we're constrained on address range, first try
* at the masked hint to avoid O(n) search complexity,
- * but on second pass, start at 0.
+ * but on second pass, start at 0 in pool 0.
*/
- if ((start & mask) >= limit || pass > 0)
- start = 0;
- else
+ if ((start & mask) >= limit || pass > 0) {
+ pool = &(tbl->pools[0]);
+ start = pool->start;
+ } else {
start &= mask;
+ }
}
if (dev)
@@ -126,16 +254,25 @@ static unsigned long iommu_range_alloc(struct device *dev,
tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT,
align_mask);
if (n == -1) {
- if (likely(pass < 2)) {
- /* First failure, just rescan the half of the table.
- * Second failure, rescan the other half of the table.
- */
- start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
- limit = pass ? tbl->it_size : limit;
+ if (likely(pass == 0)) {
+ /* First try the pool from the start */
+ pool->hint = pool->start;
pass++;
goto again;
+
+ } else if (pass <= tbl->nr_pools) {
+ /* Now try scanning all the other pools */
+ spin_unlock(&(pool->lock));
+ pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1);
+ pool = &tbl->pools[pool_nr];
+ spin_lock(&(pool->lock));
+ pool->hint = pool->start;
+ pass++;
+ goto again;
+
} else {
- /* Third failure, give up */
+ /* Give up */
+ spin_unlock_irqrestore(&(pool->lock), flags);
return DMA_ERROR_CODE;
}
}
@@ -145,10 +282,10 @@ static unsigned long iommu_range_alloc(struct device *dev,
/* Bump the hint to a new block for small allocs. */
if (largealloc) {
/* Don't bump to new block to avoid fragmentation */
- tbl->it_largehint = end;
+ pool->hint = end;
} else {
/* Overflow will be taken care of at the next allocation */
- tbl->it_hint = (end + tbl->it_blocksize - 1) &
+ pool->hint = (end + tbl->it_blocksize - 1) &
~(tbl->it_blocksize - 1);
}
@@ -156,6 +293,8 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (handle)
*handle = end;
+ spin_unlock_irqrestore(&(pool->lock), flags);
+
return n;
}
@@ -165,18 +304,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
unsigned long mask, unsigned int align_order,
struct dma_attrs *attrs)
{
- unsigned long entry, flags;
+ unsigned long entry;
dma_addr_t ret = DMA_ERROR_CODE;
int build_fail;
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
- if (unlikely(entry == DMA_ERROR_CODE)) {
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ if (unlikely(entry == DMA_ERROR_CODE))
return DMA_ERROR_CODE;
- }
entry += tbl->it_offset; /* Offset into real TCE table */
ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
@@ -193,8 +328,6 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
*/
if (unlikely(build_fail)) {
__iommu_free(tbl, ret, npages);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
return DMA_ERROR_CODE;
}
@@ -202,16 +335,14 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
-
/* Make sure updates are seen by hardware */
mb();
return ret;
}
-static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
- unsigned int npages)
+static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
{
unsigned long entry, free_entry;
@@ -231,20 +362,57 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index);
WARN_ON(1);
}
- return;
+
+ return false;
+ }
+
+ return true;
+}
+
+static struct iommu_pool *get_pool(struct iommu_table *tbl,
+ unsigned long entry)
+{
+ struct iommu_pool *p;
+ unsigned long largepool_start = tbl->large_pool.start;
+
+ /* The large pool is the last pool at the top of the table */
+ if (entry >= largepool_start) {
+ p = &tbl->large_pool;
+ } else {
+ unsigned int pool_nr = entry / tbl->poolsize;
+
+ BUG_ON(pool_nr > tbl->nr_pools);
+ p = &tbl->pools[pool_nr];
}
+ return p;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long entry, free_entry;
+ unsigned long flags;
+ struct iommu_pool *pool;
+
+ entry = dma_addr >> IOMMU_PAGE_SHIFT;
+ free_entry = entry - tbl->it_offset;
+
+ pool = get_pool(tbl, free_entry);
+
+ if (!iommu_free_check(tbl, dma_addr, npages))
+ return;
+
ppc_md.tce_free(tbl, entry, npages);
+
+ spin_lock_irqsave(&(pool->lock), flags);
bitmap_clear(tbl->it_map, free_entry, npages);
+ spin_unlock_irqrestore(&(pool->lock), flags);
}
static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages)
{
- unsigned long flags;
-
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
__iommu_free(tbl, dma_addr, npages);
/* Make sure TLB cache is flushed if the HW needs it. We do
@@ -253,8 +421,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
*/
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
@@ -263,7 +429,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
struct dma_attrs *attrs)
{
dma_addr_t dma_next = 0, dma_addr;
- unsigned long flags;
struct scatterlist *s, *outs, *segstart;
int outcount, incount, i, build_fail = 0;
unsigned int align;
@@ -285,8 +450,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
DBG("sg mapping %d elements:\n", nelems);
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
max_seg_size = dma_get_max_seg_size(dev);
for_each_sg(sglist, s, nelems, i) {
unsigned long vaddr, npages, entry, slen;
@@ -369,8 +532,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
-
DBG("mapped %d elements:\n", outcount);
/* For the sake of iommu_unmap_sg, we clear out the length in the
@@ -402,7 +563,6 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
if (s == outs)
break;
}
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
return 0;
}
@@ -412,15 +572,12 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
struct dma_attrs *attrs)
{
struct scatterlist *sg;
- unsigned long flags;
BUG_ON(direction == DMA_NONE);
if (!tbl)
return;
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
sg = sglist;
while (nelems--) {
unsigned int npages;
@@ -440,8 +597,6 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
*/
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
static void iommu_table_clear(struct iommu_table *tbl)
@@ -494,9 +649,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
unsigned long sz;
static int welcomed = 0;
struct page *page;
-
- /* Set aside 1/4 of the table for large allocations. */
- tbl->it_halfpoint = tbl->it_size * 3 / 4;
+ unsigned int i;
+ struct iommu_pool *p;
/* number of bytes needed for the bitmap */
sz = (tbl->it_size + 7) >> 3;
@@ -515,9 +669,28 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
if (tbl->it_offset == 0)
set_bit(0, tbl->it_map);
- tbl->it_hint = 0;
- tbl->it_largehint = tbl->it_halfpoint;
- spin_lock_init(&tbl->it_lock);
+ /* We only split the IOMMU table if we have 1GB or more of space */
+ if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024))
+ tbl->nr_pools = IOMMU_NR_POOLS;
+ else
+ tbl->nr_pools = 1;
+
+ /* We reserve the top 1/4 of the table for large allocations */
+ tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools;
+
+ for (i = 0; i < tbl->nr_pools; i++) {
+ p = &tbl->pools[i];
+ spin_lock_init(&(p->lock));
+ p->start = tbl->poolsize * i;
+ p->hint = p->start;
+ p->end = p->start + tbl->poolsize;
+ }
+
+ p = &tbl->large_pool;
+ spin_lock_init(&(p->lock));
+ p->start = tbl->poolsize * i;
+ p->hint = p->start;
+ p->end = tbl->it_size;
iommu_table_clear(tbl);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 7835a5e1ea5..1f017bb7a7c 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -229,7 +229,7 @@ notrace void arch_local_irq_restore(unsigned long en)
*/
if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
__hard_irq_disable();
-#ifdef CONFIG_TRACE_IRQFLAG
+#ifdef CONFIG_TRACE_IRQFLAGS
else {
/*
* We should already be hard disabled here. We had bugs
@@ -277,7 +277,7 @@ EXPORT_SYMBOL(arch_local_irq_restore);
* NOTE: This is called with interrupts hard disabled but not marked
* as such in paca->irq_happened, so we need to resync this.
*/
-void restore_interrupts(void)
+void notrace restore_interrupts(void)
{
if (irqs_disabled()) {
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
@@ -286,6 +286,52 @@ void restore_interrupts(void)
__hard_irq_enable();
}
+/*
+ * This is a helper to use when about to go into idle low-power
+ * when the latter has the side effect of re-enabling interrupts
+ * (such as calling H_CEDE under pHyp).
+ *
+ * You call this function with interrupts soft-disabled (this is
+ * already the case when ppc_md.power_save is called). The function
+ * will return whether to enter power save or just return.
+ *
+ * In the former case, it will have notified lockdep of interrupts
+ * being re-enabled and generally sanitized the lazy irq state,
+ * and in the latter case it will leave with interrupts hard
+ * disabled and marked as such, so the local_irq_enable() call
+ * in cpu_idle() will properly re-enable everything.
+ */
+bool prep_irq_for_idle(void)
+{
+ /*
+ * First we need to hard disable to ensure no interrupt
+ * occurs before we effectively enter the low power state
+ */
+ hard_irq_disable();
+
+ /*
+ * If anything happened while we were soft-disabled,
+ * we return now and do not enter the low power state.
+ */
+ if (lazy_irq_pending())
+ return false;
+
+ /* Tell lockdep we are about to re-enable */
+ trace_hardirqs_on();
+
+ /*
+ * Mark interrupts as soft-enabled and clear the
+ * PACA_IRQ_HARD_DIS from the pending mask since we
+ * are about to hard enable as well as a side effect
+ * of entering the low power state.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ local_paca->soft_enabled = 1;
+
+ /* Tell the caller to enter the low power state */
+ return true;
+}
+
#endif /* CONFIG_PPC64 */
int arch_show_interrupts(struct seq_file *p, int prec)
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 62bdf238966..867db1de894 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -31,6 +31,7 @@
#include <asm/cacheflush.h>
#include <asm/disassemble.h>
#include <asm/ppc-opcode.h>
+#include <asm/epapr_hcalls.h>
#define KVM_MAGIC_PAGE (-4096L)
#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
@@ -302,7 +303,7 @@ static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
if (imm_one) {
p[kvm_emulate_wrtee_reg_offs] =
- KVM_INST_LI | __PPC_RT(30) | MSR_EE;
+ KVM_INST_LI | __PPC_RT(R30) | MSR_EE;
} else {
/* Make clobbered registers work too */
switch (get_rt(rt)) {
@@ -726,7 +727,7 @@ unsigned long kvm_hypercall(unsigned long *in,
unsigned long register r11 asm("r11") = nr;
unsigned long register r12 asm("r12");
- asm volatile("bl kvm_hypercall_start"
+ asm volatile("bl epapr_hypercall_start"
: "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
"=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
"=r"(r12)
@@ -747,29 +748,6 @@ unsigned long kvm_hypercall(unsigned long *in,
}
EXPORT_SYMBOL_GPL(kvm_hypercall);
-static int kvm_para_setup(void)
-{
- extern u32 kvm_hypercall_start;
- struct device_node *hyper_node;
- u32 *insts;
- int len, i;
-
- hyper_node = of_find_node_by_path("/hypervisor");
- if (!hyper_node)
- return -1;
-
- insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len);
- if (len % 4)
- return -1;
- if (len > (4 * 4))
- return -1;
-
- for (i = 0; i < (len / 4); i++)
- kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]);
-
- return 0;
-}
-
static __init void kvm_free_tmp(void)
{
unsigned long start, end;
@@ -791,7 +769,7 @@ static int __init kvm_guest_init(void)
if (!kvm_para_available())
goto free_tmp;
- if (kvm_para_setup())
+ if (!epapr_paravirt_enabled)
goto free_tmp;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index e291cf3cf95..e100ff324a8 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -24,16 +24,6 @@
#include <asm/page.h>
#include <asm/asm-offsets.h>
-/* Hypercall entry point. Will be patched with device tree instructions. */
-
-.global kvm_hypercall_start
-kvm_hypercall_start:
- li r3, -1
- nop
- nop
- nop
- blr
-
#define KVM_MAGIC_PAGE (-4096)
#ifdef CONFIG_64BIT
@@ -132,7 +122,7 @@ kvm_emulate_mtmsrd_len:
.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
-#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI)
+#define MSR_SAFE_BITS (MSR_EE | MSR_RI)
#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
.global kvm_emulate_mtmsr
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 386d57f66f2..407e293aad2 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -179,7 +179,7 @@ _GLOBAL(low_choose_750fx_pll)
mtspr SPRN_HID1,r4
/* Store new HID1 image */
- rlwinm r6,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r6, r1)
lwz r6,TI_CPU(r6)
slwi r6,r6,2
addis r6,r6,nap_save_hid1@ha
@@ -699,7 +699,7 @@ _GLOBAL(kernel_thread)
#ifdef CONFIG_SMP
_GLOBAL(start_secondary_resume)
/* Reset stack */
- rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+ CURRENT_THREAD_INFO(r1, r1)
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
li r3,0
stw r3,0(r1) /* Zero the stack frame pointer */
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 616921ef143..565b78625a3 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -301,11 +301,6 @@ _GLOBAL(real_writeb)
#ifdef CONFIG_PPC_PASEMI
-/* No support in all binutils for these yet, so use defines */
-#define LBZCIX(RT,RA,RB) .long (0x7c0006aa|(RT<<21)|(RA<<16)|(RB << 11))
-#define STBCIX(RS,RA,RB) .long (0x7c0007aa|(RS<<21)|(RA<<16)|(RB << 11))
-
-
_GLOBAL(real_205_readb)
mfmsr r7
ori r0,r7,MSR_DR
@@ -314,7 +309,7 @@ _GLOBAL(real_205_readb)
mtmsrd r0
sync
isync
- LBZCIX(r3,0,r3)
+ LBZCIX(R3,R0,R3)
isync
mtmsrd r7
sync
@@ -329,7 +324,7 @@ _GLOBAL(real_205_writeb)
mtmsrd r0
sync
isync
- STBCIX(r3,0,r4)
+ STBCIX(R3,R0,R4)
isync
mtmsrd r7
sync
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 8e78e93c818..2aa04f29e1d 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -200,11 +200,6 @@ int pcibios_add_platform_entries(struct pci_dev *pdev)
return device_create_file(&pdev->dev, &dev_attr_devspec);
}
-char __devinit *pcibios_setup(char *str)
-{
- return str;
-}
-
/*
* Reads the interrupt pin to determine if interrupt is use by card.
* If the interrupt is used, then gets the interrupt line from the
@@ -248,8 +243,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
} else {
pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
oirq.size, oirq.specifier[0], oirq.specifier[1],
- oirq.controller ? oirq.controller->full_name :
- "<default>");
+ of_node_full_name(oirq.controller));
virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
oirq.size);
@@ -1628,8 +1622,7 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
struct device_node *node = hose->dn;
int mode;
- pr_debug("PCI: Scanning PHB %s\n",
- node ? node->full_name : "<NO NAME>");
+ pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node));
/* Get some IO space for the new PHB */
pcibios_setup_phb_io_space(hose);
@@ -1637,6 +1630,11 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
/* Wire up PHB bus resources */
pcibios_setup_phb_resources(hose, &resources);
+ hose->busn.start = hose->first_busno;
+ hose->busn.end = hose->last_busno;
+ hose->busn.flags = IORESOURCE_BUS;
+ pci_add_resource(&resources, &hose->busn);
+
/* Create an empty bus for the toplevel */
bus = pci_create_root_bus(hose->parent, hose->first_busno,
hose->ops, hose, &resources);
@@ -1646,7 +1644,6 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
pci_free_resource_list(&resources);
return;
}
- bus->secondary = hose->first_busno;
hose->bus = bus;
/* Get probe mode and perform scan */
@@ -1654,13 +1651,14 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose)
if (node && ppc_md.pci_probe_mode)
mode = ppc_md.pci_probe_mode(bus);
pr_debug(" probe mode: %d\n", mode);
- if (mode == PCI_PROBE_DEVTREE) {
- bus->subordinate = hose->last_busno;
+ if (mode == PCI_PROBE_DEVTREE)
of_scan_bus(node, bus);
- }
- if (mode == PCI_PROBE_NORMAL)
- hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
+ if (mode == PCI_PROBE_NORMAL) {
+ pci_bus_update_busn_res_end(bus, 255);
+ hose->last_busno = pci_scan_child_bus(bus);
+ pci_bus_update_busn_res_end(bus, hose->last_busno);
+ }
/* Platform gets a chance to do some global fixups before
* we proceed to resource allocation
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 94a54f61d34..4ff190ff24a 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -236,7 +236,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
bus = pci_bus_b(ln);
- if (in_bus >= bus->number && in_bus <= bus->subordinate)
+ if (in_bus >= bus->number && in_bus <= bus->busn_res.end)
break;
bus = NULL;
}
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 89dde171a6f..30378a19f65 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -198,7 +198,6 @@ EXPORT_SYMBOL(of_create_pci_dev);
/**
* of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes
- * @node: device tree node of bridge
* @dev: pci_dev structure for the bridge
*
* of_scan_bus() calls this routine for each PCI bridge that it finds, and
@@ -240,7 +239,7 @@ void __devinit of_scan_pci_bridge(struct pci_dev *dev)
}
bus->primary = dev->bus->number;
- bus->subordinate = busrange[1];
+ pci_bus_insert_busn_res(bus, busrange[0], busrange[1]);
bus->bridge_ctl = 0;
/* parse ranges property */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1b488e5305c..0794a3017b1 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1312,7 +1312,7 @@ static struct opal_secondary_data {
extern char opal_secondary_entry;
-static void prom_query_opal(void)
+static void __init prom_query_opal(void)
{
long rc;
@@ -1436,7 +1436,7 @@ static void __init prom_opal_hold_cpus(void)
prom_debug("prom_opal_hold_cpus: end...\n");
}
-static void prom_opal_takeover(void)
+static void __init prom_opal_takeover(void)
{
struct opal_secondary_data *data = &RELOC(opal_secondary_data);
struct opal_takeover_args *args = &data->args;
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 4174b4b2324..2c0ee640563 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -709,7 +709,7 @@ static int __init rtas_flash_init(void)
if (rtas_token("ibm,update-flash-64-and-reboot") ==
RTAS_UNKNOWN_SERVICE) {
- printk(KERN_ERR "rtas_flash: no firmware flash support\n");
+ pr_info("rtas_flash: no firmware flash support\n");
return 1;
}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index afd4f051f3f..bdc499c1787 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -720,6 +720,33 @@ static int powerpc_debugfs_init(void)
arch_initcall(powerpc_debugfs_init);
#endif
+#ifdef CONFIG_BOOKE_WDT
+extern u32 booke_wdt_enabled;
+extern u32 booke_wdt_period;
+
+/* Checks wdt=x and wdt_period=xx command-line option */
+notrace int __init early_parse_wdt(char *p)
+{
+ if (p && strncmp(p, "0", 1) != 0)
+ booke_wdt_enabled = 1;
+
+ return 0;
+}
+early_param("wdt", early_parse_wdt);
+
+int __init early_parse_wdt_period(char *p)
+{
+ unsigned long ret;
+ if (p) {
+ if (!kstrtol(p, 0, &ret))
+ booke_wdt_period = ret;
+ }
+
+ return 0;
+}
+early_param("wdt_period", early_parse_wdt_period);
+#endif /* CONFIG_BOOKE_WDT */
+
void ppc_printk_progress(char *s, unsigned short hex)
{
pr_info("%s\n", s);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ec8a53fa9e8..a8f54ecb091 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -149,30 +149,6 @@ notrace void __init machine_init(u64 dt_ptr)
ppc_md.progress("id mach(): done", 0x200);
}
-#ifdef CONFIG_BOOKE_WDT
-extern u32 booke_wdt_enabled;
-extern u32 booke_wdt_period;
-
-/* Checks wdt=x and wdt_period=xx command-line option */
-notrace int __init early_parse_wdt(char *p)
-{
- if (p && strncmp(p, "0", 1) != 0)
- booke_wdt_enabled = 1;
-
- return 0;
-}
-early_param("wdt", early_parse_wdt);
-
-int __init early_parse_wdt_period (char *p)
-{
- if (p)
- booke_wdt_period = simple_strtoul(p, NULL, 0);
-
- return 0;
-}
-early_param("wdt_period", early_parse_wdt_period);
-#endif /* CONFIG_BOOKE_WDT */
-
/* Checks "l2cr=xxxx" command-line option */
int __init ppc_setup_l2cr(char *str)
{
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e4cb34322de..0321007086f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -48,6 +48,7 @@
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
+#include <asm/vdso.h>
#include <asm/debug.h>
#ifdef DEBUG
@@ -570,8 +571,9 @@ void __devinit start_secondary(void *unused)
#ifdef CONFIG_PPC64
if (system_state == SYSTEM_RUNNING)
vdso_data->processorCount++;
+
+ vdso_getcpu_init();
#endif
- ipi_call_lock();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
/* Update sibling maps */
@@ -601,7 +603,6 @@ void __devinit start_secondary(void *unused)
of_node_put(np);
}
of_node_put(l2_cache);
- ipi_call_unlock();
local_irq_enable();
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 9eb5b9b536a..b67db22e102 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -706,6 +706,34 @@ static void __init vdso_setup_syscall_map(void)
}
}
+#ifdef CONFIG_PPC64
+int __cpuinit vdso_getcpu_init(void)
+{
+ unsigned long cpu, node, val;
+
+ /*
+ * SPRG3 contains the CPU in the bottom 16 bits and the NUMA node in
+ * the next 16 bits. The VDSO uses this to implement getcpu().
+ */
+ cpu = get_cpu();
+ WARN_ON_ONCE(cpu > 0xffff);
+
+ node = cpu_to_node(cpu);
+ WARN_ON_ONCE(node > 0xffff);
+
+ val = (cpu & 0xfff) | ((node & 0xffff) << 16);
+ mtspr(SPRN_SPRG3, val);
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+ get_paca()->kvm_hstate.sprg3 = val;
+#endif
+
+ put_cpu();
+
+ return 0;
+}
+/* We need to call this before SMP init */
+early_initcall(vdso_getcpu_init);
+#endif
static int __init vdso_init(void)
{
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 9a7946c4173..53e6c9b979e 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -1,7 +1,9 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+obj-vdso32-$(CONFIG_PPC64) = getcpu.o
+obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
+ $(obj-vdso32-y)
# Build rules
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
new file mode 100644
index 00000000000..47afd08c90f
--- /dev/null
+++ b/arch/powerpc/kernel/vdso32/getcpu.S
@@ -0,0 +1,45 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+
+ .text
+/*
+ * Exact prototype of getcpu
+ *
+ * int __kernel_getcpu(unsigned *cpu, unsigned *node);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_getcpu)
+ .cfi_startproc
+ mfspr r5,SPRN_USPRG3
+ cmpdi cr0,r3,0
+ cmpdi cr1,r4,0
+ clrlwi r6,r5,16
+ rlwinm r7,r5,16,31-15,31-0
+ beq cr0,1f
+ stw r6,0(r3)
+1: beq cr1,2f
+ stw r7,0(r4)
+2: crclr cr0*4+so
+ li r3,0 /* always success */
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 0546bcd49cd..43200ba2e57 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -147,6 +147,9 @@ VERSION
__kernel_sync_dicache_p5;
__kernel_sigtramp32;
__kernel_sigtramp_rt32;
+#ifdef CONFIG_PPC64
+ __kernel_getcpu;
+#endif
local: *;
};
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 8c500d8622e..effca9404b1 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -1,6 +1,6 @@
# List of files in the vdso, has to be asm only for now
-obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
+obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
# Build rules
diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S
new file mode 100644
index 00000000000..47afd08c90f
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/getcpu.S
@@ -0,0 +1,45 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+
+ .text
+/*
+ * Exact prototype of getcpu
+ *
+ * int __kernel_getcpu(unsigned *cpu, unsigned *node);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_getcpu)
+ .cfi_startproc
+ mfspr r5,SPRN_USPRG3
+ cmpdi cr0,r3,0
+ cmpdi cr1,r4,0
+ clrlwi r6,r5,16
+ rlwinm r7,r5,16,31-15,31-0
+ beq cr0,1f
+ stw r6,0(r3)
+1: beq cr1,2f
+ stw r7,0(r4)
+2: crclr cr0*4+so
+ li r3,0 /* always success */
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
index 0e615404e24..e6c1758f358 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -146,6 +146,7 @@ VERSION
__kernel_sync_dicache;
__kernel_sync_dicache_p5;
__kernel_sigtramp_rt64;
+ __kernel_getcpu;
local: *;
};
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index cb87301ccd5..02b32216bbc 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -37,8 +37,6 @@
#include <asm/page.h>
#include <asm/hvcall.h>
-static struct bus_type vio_bus_type;
-
static struct vio_dev vio_bus_device = { /* fake "parent" device */
.name = "vio",
.type = "",
@@ -613,6 +611,7 @@ static u64 vio_dma_get_required_mask(struct device *dev)
struct dma_map_ops vio_dma_mapping_ops = {
.alloc = vio_dma_iommu_alloc_coherent,
.free = vio_dma_iommu_free_coherent,
+ .mmap = dma_direct_mmap_coherent,
.map_sg = vio_dma_iommu_map_sg,
.unmap_sg = vio_dma_iommu_unmap_sg,
.map_page = vio_dma_iommu_map_page,
@@ -625,7 +624,7 @@ struct dma_map_ops vio_dma_mapping_ops = {
* vio_cmo_set_dev_desired - Set desired entitlement for a device
*
* @viodev: struct vio_dev for device to alter
- * @new_desired: new desired entitlement level in bytes
+ * @desired: new desired entitlement level in bytes
*
* For use by devices to request a change to their entitlement at runtime or
* through sysfs. The desired entitlement level is changed and a balancing
@@ -1262,7 +1261,7 @@ static int vio_bus_remove(struct device *dev)
/**
* vio_register_driver: - Register a new vio driver
- * @drv: The vio_driver structure to be registered.
+ * @viodrv: The vio_driver structure to be registered.
*/
int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
const char *mod_name)
@@ -1282,7 +1281,7 @@ EXPORT_SYMBOL(__vio_register_driver);
/**
* vio_unregister_driver - Remove registration of vio driver.
- * @driver: The vio_driver struct to be removed form registration
+ * @viodrv: The vio_driver struct to be removed form registration
*/
void vio_unregister_driver(struct vio_driver *viodrv)
{
@@ -1296,8 +1295,7 @@ static void __devinit vio_dev_release(struct device *dev)
struct iommu_table *tbl = get_iommu_table_base(dev);
if (tbl)
- iommu_free_table(tbl, dev->of_node ?
- dev->of_node->full_name : dev_name(dev));
+ iommu_free_table(tbl, of_node_full_name(dev->of_node));
of_node_put(dev->of_node);
kfree(to_vio_dev(dev));
}
@@ -1397,21 +1395,27 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
viodev->name = of_node->name;
viodev->dev.of_node = of_node_get(of_node);
- if (firmware_has_feature(FW_FEATURE_CMO))
- vio_cmo_set_dma_ops(viodev);
- else
- set_dma_ops(&viodev->dev, &dma_iommu_ops);
- set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev));
set_dev_node(&viodev->dev, of_node_to_nid(of_node));
/* init generic 'struct device' fields: */
viodev->dev.parent = &vio_bus_device.dev;
viodev->dev.bus = &vio_bus_type;
viodev->dev.release = vio_dev_release;
- /* needed to ensure proper operation of coherent allocations
- * later, in case driver doesn't set it explicitly */
- dma_set_mask(&viodev->dev, DMA_BIT_MASK(64));
- dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64));
+
+ if (of_get_property(viodev->dev.of_node, "ibm,my-dma-window", NULL)) {
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_set_dma_ops(viodev);
+ else
+ set_dma_ops(&viodev->dev, &dma_iommu_ops);
+
+ set_iommu_table_base(&viodev->dev,
+ vio_build_iommu_table(viodev));
+
+ /* needed to ensure proper operation of coherent allocations
+ * later, in case driver doesn't set it explicitly */
+ dma_set_mask(&viodev->dev, DMA_BIT_MASK(64));
+ dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64));
+ }
/* register with generic device framework */
if (device_register(&viodev->dev)) {
@@ -1491,12 +1495,18 @@ static int __init vio_bus_init(void)
if (firmware_has_feature(FW_FEATURE_CMO))
vio_cmo_bus_init();
+ return 0;
+}
+postcore_initcall(vio_bus_init);
+
+static int __init vio_device_init(void)
+{
vio_bus_scan_register_devices("vdevice");
vio_bus_scan_register_devices("ibm,platform-facilities");
return 0;
}
-__initcall(vio_bus_init);
+device_initcall(vio_device_init);
static ssize_t name_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -1509,7 +1519,7 @@ static ssize_t devspec_show(struct device *dev,
{
struct device_node *of_node = dev->of_node;
- return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none");
+ return sprintf(buf, "%s\n", of_node_full_name(of_node));
}
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
@@ -1568,7 +1578,7 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
return 0;
}
-static struct bus_type vio_bus_type = {
+struct bus_type vio_bus_type = {
.name = "vio",
.dev_attrs = vio_dev_attrs,
.uevent = vio_hotplug,