aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/Kconfig.debug19
-rw-r--r--arch/x86/Makefile3
-rw-r--r--arch/x86/boot/compressed/cmdline.c4
-rw-r--r--arch/x86/boot/compressed/early_serial_console.c4
-rw-r--r--arch/x86/boot/compressed/eboot.c198
-rw-r--r--arch/x86/boot/compressed/head_32.S10
-rw-r--r--arch/x86/boot/compressed/head_64.S10
-rw-r--r--arch/x86/boot/compressed/misc.c31
-rw-r--r--arch/x86/boot/compressed/misc.h27
-rw-r--r--arch/x86/boot/header.S11
-rw-r--r--arch/x86/crypto/Makefile14
-rw-r--r--arch/x86/crypto/ablk_helper.c149
-rw-r--r--arch/x86/crypto/aes_glue.c2
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S6
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c110
-rw-r--r--arch/x86/crypto/camellia_glue.c355
-rw-r--r--arch/x86/crypto/glue_helper.c307
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S704
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c636
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c513
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S2
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c6
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S300
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c624
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c409
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/alternative.h74
-rw-r--r--arch/x86/include/asm/amd_nb.h21
-rw-r--r--arch/x86/include/asm/apic.h66
-rw-r--r--arch/x86/include/asm/bitops.h7
-rw-r--r--arch/x86/include/asm/bootparam.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h4
-rw-r--r--arch/x86/include/asm/crypto/ablk_helper.h31
-rw-r--r--arch/x86/include/asm/crypto/aes.h (renamed from arch/x86/include/asm/aes.h)0
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h115
-rw-r--r--arch/x86/include/asm/crypto/serpent-avx.h32
-rw-r--r--arch/x86/include/asm/crypto/serpent-sse2.h (renamed from arch/x86/include/asm/serpent.h)4
-rw-r--r--arch/x86/include/asm/crypto/twofish.h46
-rw-r--r--arch/x86/include/asm/emergency-restart.h2
-rw-r--r--arch/x86/include/asm/entry_arch.h9
-rw-r--r--arch/x86/include/asm/floppy.h2
-rw-r--r--arch/x86/include/asm/hypervisor.h1
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h11
-rw-r--r--arch/x86/include/asm/kvm.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h35
-rw-r--r--arch/x86/include/asm/kvm_para.h7
-rw-r--r--arch/x86/include/asm/mce.h8
-rw-r--r--arch/x86/include/asm/msr.h46
-rw-r--r--arch/x86/include/asm/nmi.h20
-rw-r--r--arch/x86/include/asm/olpc.h19
-rw-r--r--arch/x86/include/asm/paravirt.h46
-rw-r--r--arch/x86/include/asm/paravirt_types.h5
-rw-r--r--arch/x86/include/asm/pci_x86.h15
-rw-r--r--arch/x86/include/asm/percpu.h17
-rw-r--r--arch/x86/include/asm/perf_event.h35
-rw-r--r--arch/x86/include/asm/pgtable-2level.h4
-rw-r--r--arch/x86/include/asm/pgtable-3level.h36
-rw-r--r--arch/x86/include/asm/pgtable_64.h8
-rw-r--r--arch/x86/include/asm/processor-flags.h2
-rw-r--r--arch/x86/include/asm/processor.h13
-rw-r--r--arch/x86/include/asm/realmode.h3
-rw-r--r--arch/x86/include/asm/reboot.h4
-rw-r--r--arch/x86/include/asm/smp.h21
-rw-r--r--arch/x86/include/asm/tlb.h9
-rw-r--r--arch/x86/include/asm/tlbflush.h49
-rw-r--r--arch/x86/include/asm/uaccess_64.h11
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/uprobes.h2
-rw-r--r--arch/x86/include/asm/uv/uv.h5
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h28
-rw-r--r--arch/x86/include/asm/vmx.h6
-rw-r--r--arch/x86/include/asm/x2apic.h18
-rw-r--r--arch/x86/include/asm/x86_init.h4
-rw-r--r--arch/x86/include/asm/xen/hypercall.h8
-rw-r--r--arch/x86/kernel/acpi/boot.c27
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S4
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S4
-rw-r--r--arch/x86/kernel/alternative.c21
-rw-r--r--arch/x86/kernel/amd_nb.c11
-rw-r--r--arch/x86/kernel/apic/apic.c42
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c76
-rw-r--r--arch/x86/kernel/apic/apic_noop.c9
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c50
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c48
-rw-r--r--arch/x86/kernel/apic/es7000_32.c51
-rw-r--r--arch/x86/kernel/apic/io_apic.c354
-rw-r--r--arch/x86/kernel/apic/numaq_32.c30
-rw-r--r--arch/x86/kernel/apic/probe_32.c23
-rw-r--r--arch/x86/kernel/apic/probe_64.c11
-rw-r--r--arch/x86/kernel/apic/summit_32.c68
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c82
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c39
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c45
-rw-r--r--arch/x86/kernel/apm_32.c29
-rw-r--r--arch/x86/kernel/cpu/Makefile6
-rw-r--r--arch/x86/kernel/cpu/amd.c39
-rw-r--r--arch/x86/kernel/cpu/bugs.c20
-rw-r--r--arch/x86/kernel/cpu/common.c33
-rw-r--r--arch/x86/kernel/cpu/cpu.h9
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c3
-rw-r--r--arch/x86/kernel/cpu/intel.c176
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c75
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c286
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl25
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event.c196
-rw-r--r--arch/x86/kernel/cpu/perf_event.h46
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c103
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c226
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c19
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c2900
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h621
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c4
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/cpu/sched.c55
-rw-r--r--arch/x86/kernel/dumpstack.c5
-rw-r--r--arch/x86/kernel/dumpstack_32.c25
-rw-r--r--arch/x86/kernel/dumpstack_64.c21
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/entry_64.S38
-rw-r--r--arch/x86/kernel/irq.c5
-rw-r--r--arch/x86/kernel/irqinit.c73
-rw-r--r--arch/x86/kernel/kdebugfs.c6
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/kvm.c64
-rw-r--r--arch/x86/kernel/kvmclock.c5
-rw-r--r--arch/x86/kernel/microcode_core.c66
-rw-r--r--arch/x86/kernel/module.c34
-rw-r--r--arch/x86/kernel/nmi.c47
-rw-r--r--arch/x86/kernel/nmi_selftest.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-calgary_64.c34
-rw-r--r--arch/x86/kernel/pci-dma.c14
-rw-r--r--arch/x86/kernel/process.c34
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c82
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--arch/x86/kernel/signal.c5
-rw-r--r--arch/x86/kernel/smpboot.c121
-rw-r--r--arch/x86/kernel/traps.c19
-rw-r--r--arch/x86/kernel/tsc.c50
-rw-r--r--arch/x86/kernel/uprobes.c3
-rw-r--r--arch/x86/kernel/vm86_32.c6
-rw-r--r--arch/x86/kernel/vsmp_64.c44
-rw-r--r--arch/x86/kernel/vsyscall_64.c56
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c1
-rw-r--r--arch/x86/kernel/x86_init.c2
-rw-r--r--arch/x86/kernel/xsave.c12
-rw-r--r--arch/x86/kvm/cpuid.c46
-rw-r--r--arch/x86/kvm/cpuid.h9
-rw-r--r--arch/x86/kvm/emulate.c273
-rw-r--r--arch/x86/kvm/i8259.c34
-rw-r--r--arch/x86/kvm/lapic.c194
-rw-r--r--arch/x86/kvm/lapic.h11
-rw-r--r--arch/x86/kvm/mmu.c362
-rw-r--r--arch/x86/kvm/mmutrace.h45
-rw-r--r--arch/x86/kvm/paging_tmpl.h3
-rw-r--r--arch/x86/kvm/pmu.c22
-rw-r--r--arch/x86/kvm/svm.c12
-rw-r--r--arch/x86/kvm/trace.h46
-rw-r--r--arch/x86/kvm/vmx.c209
-rw-r--r--arch/x86/kvm/x86.c127
-rw-r--r--arch/x86/lib/csum-wrappers_64.c2
-rw-r--r--arch/x86/lib/msr-reg-export.c4
-rw-r--r--arch/x86/lib/msr-reg.S10
-rw-r--r--arch/x86/lib/usercopy.c2
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/pageattr.c12
-rw-r--r--arch/x86/mm/srat.c15
-rw-r--r--arch/x86/mm/tlb.c401
-rw-r--r--arch/x86/net/bpf_jit_comp.c4
-rw-r--r--arch/x86/oprofile/op_model_amd.c4
-rw-r--r--arch/x86/pci/acpi.c109
-rw-r--r--arch/x86/pci/amd_bus.c7
-rw-r--r--arch/x86/pci/bus_numa.c22
-rw-r--r--arch/x86/pci/bus_numa.h3
-rw-r--r--arch/x86/pci/common.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c372
-rw-r--r--arch/x86/pci/mmconfig_32.c30
-rw-r--r--arch/x86/pci/mmconfig_64.c52
-rw-r--r--arch/x86/pci/mrst.c2
-rw-r--r--arch/x86/platform/efi/efi.c30
-rw-r--r--arch/x86/platform/mrst/early_printk_mrst.c13
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c16
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c1
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c7
-rw-r--r--arch/x86/platform/olpc/olpc.c190
-rw-r--r--arch/x86/platform/uv/tlb_uv.c459
-rw-r--r--arch/x86/platform/uv/uv_irq.c9
-rw-r--r--arch/x86/realmode/rm/Makefile2
-rw-r--r--arch/x86/realmode/rm/header.S4
-rw-r--r--arch/x86/realmode/rm/reboot.S (renamed from arch/x86/realmode/rm/reboot_32.S)30
-rw-r--r--arch/x86/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/um/asm/ptrace.h6
-rw-r--r--arch/x86/um/sys_call_table_32.c4
-rw-r--r--arch/x86/vdso/vdso32-setup.c6
-rw-r--r--arch/x86/xen/enlighten.c234
-rw-r--r--arch/x86/xen/mmu.c51
-rw-r--r--arch/x86/xen/p2m.c41
-rw-r--r--arch/x86/xen/setup.c26
-rw-r--r--arch/x86/xen/smp.c2
-rw-r--r--arch/x86/xen/suspend.c2
-rw-r--r--arch/x86/xen/xen-ops.h2
215 files changed, 11636 insertions, 4154 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c70684f859e..8ec3a1aa4ab 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -70,6 +70,7 @@ config X86
select HAVE_ARCH_JUMP_LABEL
select HAVE_TEXT_POKE_SMP
select HAVE_GENERIC_HARDIRQS
+ select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select SPARSE_IRQ
select GENERIC_FIND_FIRST_BIT
select GENERIC_IRQ_PROBE
@@ -84,6 +85,7 @@ config X86
select GENERIC_IOMAP
select DCACHE_WORD_ACCESS
select GENERIC_SMP_IDLE_THREAD
+ select ARCH_WANT_IPC_PARSE_VERSION if X86_32
select HAVE_ARCH_SECCOMP_FILTER
select BUILDTIME_EXTABLE_SORT
select GENERIC_CMOS_UPDATE
@@ -1525,7 +1527,7 @@ config SECCOMP
If unsure, say Y. Only embedded should say N here.
config CC_STACKPROTECTOR
- bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
+ bool "Enable -fstack-protector buffer overflow detection"
---help---
This option turns on the -fstack-protector GCC feature. This
feature puts, at the beginning of functions, a canary value on
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index e46c2147397..b322f124ee3 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -129,6 +129,25 @@ config DOUBLEFAULT
option saves about 4k and might cause you much additional grey
hair.
+config DEBUG_TLBFLUSH
+ bool "Set upper limit of TLB entries to flush one-by-one"
+ depends on DEBUG_KERNEL && (X86_64 || X86_INVLPG)
+ ---help---
+
+ X86-only for now.
+
+ This option allows the user to tune the amount of TLB entries the
+ kernel flushes one-by-one instead of doing a full TLB flush. In
+ certain situations, the former is cheaper. This is controlled by the
+ tlb_flushall_shift knob under /sys/kernel/debug/x86. If you set it
+ to -1, the code flushes the whole TLB unconditionally. Otherwise,
+ for positive values of it, the kernel will use single TLB entry
+ invalidating instructions according to the following formula:
+
+ flush_entries <= active_tlb_entries / 2^tlb_flushall_shift
+
+ If in doubt, say "N".
+
config IOMMU_DEBUG
bool "Enable IOMMU debugging"
depends on GART_IOMMU && DEBUG_KERNEL
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1f252143455..b0c5276861e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -49,6 +49,9 @@ else
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
+ # Use -mpreferred-stack-boundary=3 if supported.
+ KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)
+
# FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index cb62f786990..10f6b1178c6 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,5 +1,7 @@
#include "misc.h"
+#ifdef CONFIG_EARLY_PRINTK
+
static unsigned long fs;
static inline void set_fs(unsigned long seg)
{
@@ -19,3 +21,5 @@ int cmdline_find_option_bool(const char *option)
{
return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option);
}
+
+#endif
diff --git a/arch/x86/boot/compressed/early_serial_console.c b/arch/x86/boot/compressed/early_serial_console.c
index 261e81fb958..d3d003cb548 100644
--- a/arch/x86/boot/compressed/early_serial_console.c
+++ b/arch/x86/boot/compressed/early_serial_console.c
@@ -1,5 +1,9 @@
#include "misc.h"
+#ifdef CONFIG_EARLY_PRINTK
+
int early_serial_base;
#include "../early_serial_console.c"
+
+#endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 4e85f5f8583..b3e0227df2c 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -729,32 +729,68 @@ fail:
* need to create one ourselves (usually the bootloader would create
* one for us).
*/
-static efi_status_t make_boot_params(struct boot_params *boot_params,
- efi_loaded_image_t *image,
- void *handle)
+struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table)
{
- struct efi_info *efi = &boot_params->efi_info;
- struct apm_bios_info *bi = &boot_params->apm_bios_info;
- struct sys_desc_table *sdt = &boot_params->sys_desc_table;
- struct e820entry *e820_map = &boot_params->e820_map[0];
- struct e820entry *prev = NULL;
- struct setup_header *hdr = &boot_params->hdr;
- unsigned long size, key, desc_size, _size;
- efi_memory_desc_t *mem_map;
- void *options = image->load_options;
- u32 load_options_size = image->load_options_size / 2; /* ASCII */
+ struct boot_params *boot_params;
+ struct sys_desc_table *sdt;
+ struct apm_bios_info *bi;
+ struct setup_header *hdr;
+ struct efi_info *efi;
+ efi_loaded_image_t *image;
+ void *options;
+ u32 load_options_size;
+ efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
int options_size = 0;
efi_status_t status;
- __u32 desc_version;
unsigned long cmdline;
- u8 nr_entries;
u16 *s2;
u8 *s1;
int i;
+ sys_table = _table;
+
+ /* Check if we were booted by the EFI firmware */
+ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ return NULL;
+
+ status = efi_call_phys3(sys_table->boottime->handle_protocol,
+ handle, &proto, (void *)&image);
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
+ return NULL;
+ }
+
+ status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc lowmem for boot params\n");
+ return NULL;
+ }
+
+ memset(boot_params, 0x0, 0x4000);
+
+ hdr = &boot_params->hdr;
+ efi = &boot_params->efi_info;
+ bi = &boot_params->apm_bios_info;
+ sdt = &boot_params->sys_desc_table;
+
+ /* Copy the second sector to boot_params */
+ memcpy(&hdr->jump, image->image_base + 512, 512);
+
+ /*
+ * Fill out some of the header fields ourselves because the
+ * EFI firmware loader doesn't load the first sector.
+ */
+ hdr->root_flags = 1;
+ hdr->vid_mode = 0xffff;
+ hdr->boot_flag = 0xAA55;
+
+ hdr->code32_start = (__u64)(unsigned long)image->image_base;
+
hdr->type_of_loader = 0x21;
/* Convert unicode cmdline to ascii */
+ options = image->load_options;
+ load_options_size = image->load_options_size / 2; /* ASCII */
cmdline = 0;
s2 = (u16 *)options;
@@ -791,18 +827,36 @@ static efi_status_t make_boot_params(struct boot_params *boot_params,
hdr->ramdisk_image = 0;
hdr->ramdisk_size = 0;
- status = handle_ramdisks(image, hdr);
- if (status != EFI_SUCCESS)
- goto free_cmdline;
-
- setup_graphics(boot_params);
-
/* Clear APM BIOS info */
memset(bi, 0, sizeof(*bi));
memset(sdt, 0, sizeof(*sdt));
- memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32));
+ status = handle_ramdisks(image, hdr);
+ if (status != EFI_SUCCESS)
+ goto fail2;
+
+ return boot_params;
+fail2:
+ if (options_size)
+ low_free(options_size, hdr->cmd_line_ptr);
+fail:
+ low_free(0x4000, (unsigned long)boot_params);
+ return NULL;
+}
+
+static efi_status_t exit_boot(struct boot_params *boot_params,
+ void *handle)
+{
+ struct efi_info *efi = &boot_params->efi_info;
+ struct e820entry *e820_map = &boot_params->e820_map[0];
+ struct e820entry *prev = NULL;
+ unsigned long size, key, desc_size, _size;
+ efi_memory_desc_t *mem_map;
+ efi_status_t status;
+ __u32 desc_version;
+ u8 nr_entries;
+ int i;
size = sizeof(*mem_map) * 32;
@@ -811,7 +865,7 @@ again:
_size = size;
status = low_alloc(size, 1, (unsigned long *)&mem_map);
if (status != EFI_SUCCESS)
- goto free_cmdline;
+ return status;
status = efi_call_phys5(sys_table->boottime->get_memory_map, &size,
mem_map, &key, &desc_size, &desc_version);
@@ -823,6 +877,7 @@ again:
if (status != EFI_SUCCESS)
goto free_mem_map;
+ memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32));
efi->efi_systab = (unsigned long)sys_table;
efi->efi_memdesc_size = desc_size;
efi->efi_memdesc_version = desc_version;
@@ -906,61 +961,13 @@ again:
free_mem_map:
low_free(_size, (unsigned long)mem_map);
-free_cmdline:
- if (options_size)
- low_free(options_size, hdr->cmd_line_ptr);
-fail:
return status;
}
-/*
- * On success we return a pointer to a boot_params structure, and NULL
- * on failure.
- */
-struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
+static efi_status_t relocate_kernel(struct setup_header *hdr)
{
- struct boot_params *boot_params;
unsigned long start, nr_pages;
- struct desc_ptr *gdt, *idt;
- efi_loaded_image_t *image;
- struct setup_header *hdr;
efi_status_t status;
- efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
- struct desc_struct *desc;
-
- sys_table = _table;
-
- /* Check if we were booted by the EFI firmware */
- if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- goto fail;
-
- status = efi_call_phys3(sys_table->boottime->handle_protocol,
- handle, &proto, (void *)&image);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
- goto fail;
- }
-
- status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
- if (status != EFI_SUCCESS) {
- efi_printk("Failed to alloc lowmem for boot params\n");
- goto fail;
- }
-
- memset(boot_params, 0x0, 0x4000);
-
- hdr = &boot_params->hdr;
-
- /* Copy the second sector to boot_params */
- memcpy(&hdr->jump, image->image_base + 512, 512);
-
- /*
- * Fill out some of the header fields ourselves because the
- * EFI firmware loader doesn't load the first sector.
- */
- hdr->root_flags = 1;
- hdr->vid_mode = 0xffff;
- hdr->boot_flag = 0xAA55;
/*
* The EFI firmware loader could have placed the kernel image
@@ -978,16 +985,40 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
if (status != EFI_SUCCESS) {
status = low_alloc(hdr->init_size, hdr->kernel_alignment,
&start);
- if (status != EFI_SUCCESS) {
+ if (status != EFI_SUCCESS)
efi_printk("Failed to alloc mem for kernel\n");
- goto fail;
- }
}
+ if (status == EFI_SUCCESS)
+ memcpy((void *)start, (void *)(unsigned long)hdr->code32_start,
+ hdr->init_size);
+
+ hdr->pref_address = hdr->code32_start;
hdr->code32_start = (__u32)start;
- hdr->pref_address = (__u64)(unsigned long)image->image_base;
- memcpy((void *)start, image->image_base, image->image_size);
+ return status;
+}
+
+/*
+ * On success we return a pointer to a boot_params structure, and NULL
+ * on failure.
+ */
+struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
+ struct boot_params *boot_params)
+{
+ struct desc_ptr *gdt, *idt;
+ efi_loaded_image_t *image;
+ struct setup_header *hdr = &boot_params->hdr;
+ efi_status_t status;
+ struct desc_struct *desc;
+
+ sys_table = _table;
+
+ /* Check if we were booted by the EFI firmware */
+ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
+ goto fail;
+
+ setup_graphics(boot_params);
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, sizeof(*gdt),
@@ -1015,7 +1046,18 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
idt->size = 0;
idt->address = 0;
- status = make_boot_params(boot_params, image, handle);
+ /*
+ * If the kernel isn't already loaded at the preferred load
+ * address, relocate it.
+ */
+ if (hdr->pref_address != hdr->code32_start) {
+ status = relocate_kernel(hdr);
+
+ if (status != EFI_SUCCESS)
+ goto fail;
+ }
+
+ status = exit_boot(boot_params, handle);
if (status != EFI_SUCCESS)
goto fail;
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index c85e3ac99bb..aa4aaf1b238 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -42,6 +42,16 @@ ENTRY(startup_32)
*/
add $0x4, %esp
+ call make_boot_params
+ cmpl $0, %eax
+ je 1f
+ movl 0x4(%esp), %esi
+ movl (%esp), %ecx
+ pushl %eax
+ pushl %esi
+ pushl %ecx
+
+ .org 0x30,0x90
call efi_main
cmpl $0, %eax
movl %eax, %esi
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 87e03a13d8e..2c4b171eec3 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -209,6 +209,16 @@ ENTRY(startup_64)
.org 0x210
mov %rcx, %rdi
mov %rdx, %rsi
+ pushq %rdi
+ pushq %rsi
+ call make_boot_params
+ cmpq $0,%rax
+ je 1f
+ mov %rax, %rdx
+ popq %rsi
+ popq %rdi
+
+ .org 0x230,0x90
call efi_main
movq %rax,%rsi
cmpq $0,%rax
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 7116dcba0c9..88f7ff6da40 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -108,8 +108,6 @@ static void error(char *m);
* This is set up by the setup-routine at boot-time
*/
struct boot_params *real_mode; /* Pointer to real-mode data */
-static int quiet;
-static int debug;
void *memset(void *s, int c, size_t n);
void *memcpy(void *dest, const void *src, size_t n);
@@ -170,15 +168,11 @@ static void serial_putchar(int ch)
outb(ch, early_serial_base + TXR);
}
-void __putstr(int error, const char *s)
+void __putstr(const char *s)
{
int x, y, pos;
char c;
-#ifndef CONFIG_X86_VERBOSE_BOOTUP
- if (!error)
- return;
-#endif
if (early_serial_base) {
const char *str = s;
while (*str) {
@@ -265,9 +259,9 @@ void *memcpy(void *dest, const void *src, size_t n)
static void error(char *x)
{
- __putstr(1, "\n\n");
- __putstr(1, x);
- __putstr(1, "\n\n -- System halted");
+ error_putstr("\n\n");
+ error_putstr(x);
+ error_putstr("\n\n -- System halted");
while (1)
asm("hlt");
@@ -294,8 +288,7 @@ static void parse_elf(void *output)
return;
}
- if (!quiet)
- putstr("Parsing ELF... ");
+ debug_putstr("Parsing ELF... ");
phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
if (!phdrs)
@@ -332,11 +325,6 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
{
real_mode = rmode;
- if (cmdline_find_option_bool("quiet"))
- quiet = 1;
- if (cmdline_find_option_bool("debug"))
- debug = 1;
-
if (real_mode->screen_info.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
@@ -349,8 +337,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
cols = real_mode->screen_info.orig_video_cols;
console_init();
- if (debug)
- putstr("early console in decompress_kernel\n");
+ debug_putstr("early console in decompress_kernel\n");
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
@@ -369,11 +356,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
error("Wrong destination address");
#endif
- if (!quiet)
- putstr("\nDecompressing Linux... ");
+ debug_putstr("\nDecompressing Linux... ");
decompress(input_data, input_len, NULL, NULL, output, NULL, error);
parse_elf(output);
- if (!quiet)
- putstr("done.\nBooting the kernel.\n");
+ debug_putstr("done.\nBooting the kernel.\n");
return;
}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 3f19c81a620..0e6dc0ee0ee 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -24,9 +24,21 @@
/* misc.c */
extern struct boot_params *real_mode; /* Pointer to real-mode data */
-void __putstr(int error, const char *s);
-#define putstr(__x) __putstr(0, __x)
-#define puts(__x) __putstr(0, __x)
+void __putstr(const char *s);
+#define error_putstr(__x) __putstr(__x)
+
+#ifdef CONFIG_X86_VERBOSE_BOOTUP
+
+#define debug_putstr(__x) __putstr(__x)
+
+#else
+
+static inline void debug_putstr(const char *s)
+{ }
+
+#endif
+
+#ifdef CONFIG_EARLY_PRINTK
/* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize);
@@ -36,4 +48,13 @@ int cmdline_find_option_bool(const char *option);
extern int early_serial_base;
void console_init(void);
+#else
+
+/* early_serial_console.c */
+static const int early_serial_base;
+static inline void console_init(void)
+{ }
+
+#endif
+
#endif
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index efe5acfc79c..b4e15dd6786 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -283,7 +283,7 @@ _start:
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
- .word 0x020a # header version number (>= 0x0105)
+ .word 0x020b # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -401,18 +401,13 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
#define INIT_SIZE VO_INIT_SIZE
#endif
init_size: .long INIT_SIZE # kernel initialization size
+handover_offset: .long 0x30 # offset to the handover
+ # protocol entry point
# End of setup header #####################################################
.section ".entrytext", "ax"
start_of_setup:
-#ifdef SAFE_RESET_DISK_CONTROLLER
-# Reset the disk controller.
- movw $0x0000, %ax # Reset disk controller
- movb $0x80, %dl # All disks
- int $0x13
-#endif
-
# Force %es = %ds
movw %ds, %ax
movw %ax, %es
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index e191ac048b5..e908e5de82d 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -2,6 +2,9 @@
# Arch-specific CryptoAPI modules.
#
+obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
+obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
+
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
@@ -12,8 +15,10 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
+obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
+obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
@@ -30,16 +35,11 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
+twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o
salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
+serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
-
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
-
-# enable AVX support only when $(AS) can actually assemble the instructions
-ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes)
-AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT
-CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT
-endif
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c
new file mode 100644
index 00000000000..43282fe04a8
--- /dev/null
+++ b/arch/x86/crypto/ablk_helper.c
@@ -0,0 +1,149 @@
+/*
+ * Shared async block cipher helpers
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * Based on aesni-intel_glue.c by:
+ * Copyright (C) 2008, Intel Corp.
+ * Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <crypto/algapi.h>
+#include <crypto/cryptd.h>
+#include <asm/i387.h>
+#include <asm/crypto/ablk_helper.h>
+
+int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+ struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
+ int err;
+
+ crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+ crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
+ & CRYPTO_TFM_REQ_MASK);
+ err = crypto_ablkcipher_setkey(child, key, key_len);
+ crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
+ & CRYPTO_TFM_RES_MASK);
+ return err;
+}
+EXPORT_SYMBOL_GPL(ablk_set_key);
+
+int __ablk_encrypt(struct ablkcipher_request *req)
+{
+ struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+ struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+ struct blkcipher_desc desc;
+
+ desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+ desc.info = req->info;
+ desc.flags = 0;
+
+ return crypto_blkcipher_crt(desc.tfm)->encrypt(
+ &desc, req->dst, req->src, req->nbytes);
+}
+EXPORT_SYMBOL_GPL(__ablk_encrypt);
+
+int ablk_encrypt(struct ablkcipher_request *req)
+{
+ struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+ struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ if (!irq_fpu_usable()) {
+ struct ablkcipher_request *cryptd_req =
+ ablkcipher_request_ctx(req);
+
+ memcpy(cryptd_req, req, sizeof(*req));
+ ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+
+ return crypto_ablkcipher_encrypt(cryptd_req);
+ } else {
+ return __ablk_encrypt(req);
+ }
+}
+EXPORT_SYMBOL_GPL(ablk_encrypt);
+
+int ablk_decrypt(struct ablkcipher_request *req)
+{
+ struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+ struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ if (!irq_fpu_usable()) {
+ struct ablkcipher_request *cryptd_req =
+ ablkcipher_request_ctx(req);
+
+ memcpy(cryptd_req, req, sizeof(*req));
+ ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+
+ return crypto_ablkcipher_decrypt(cryptd_req);
+ } else {
+ struct blkcipher_desc desc;
+
+ desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
+ desc.info = req->info;
+ desc.flags = 0;
+
+ return crypto_blkcipher_crt(desc.tfm)->decrypt(
+ &desc, req->dst, req->src, req->nbytes);
+ }
+}
+EXPORT_SYMBOL_GPL(ablk_decrypt);
+
+void ablk_exit(struct crypto_tfm *tfm)
+{
+ struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cryptd_free_ablkcipher(ctx->cryptd_tfm);
+}
+EXPORT_SYMBOL_GPL(ablk_exit);
+
+int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
+{
+ struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct cryptd_ablkcipher *cryptd_tfm;
+
+ cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+
+ ctx->cryptd_tfm = cryptd_tfm;
+ tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
+ crypto_ablkcipher_reqsize(&cryptd_tfm->base);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ablk_init_common);
+
+int ablk_init(struct crypto_tfm *tfm)
+{
+ char drv_name[CRYPTO_MAX_ALG_NAME];
+
+ snprintf(drv_name, sizeof(drv_name), "__driver-%s",
+ crypto_tfm_alg_driver_name(tfm));
+
+ return ablk_init_common(tfm, drv_name);
+}
+EXPORT_SYMBOL_GPL(ablk_init);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 8efcf42a9d7..59b37deb8c8 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -5,7 +5,7 @@
#include <linux/module.h>
#include <crypto/aes.h>
-#include <asm/aes.h>
+#include <asm/crypto/aes.h>
asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index be6d9e365a8..3470624d783 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2460,10 +2460,12 @@ ENTRY(aesni_cbc_dec)
pxor IN3, STATE4
movaps IN4, IV
#else
- pxor (INP), STATE2
- pxor 0x10(INP), STATE3
pxor IN1, STATE4
movaps IN2, IV
+ movups (INP), IN1
+ pxor IN1, STATE2
+ movups 0x10(INP), IN2
+ pxor IN2, STATE3
#endif
movups STATE1, (OUTP)
movups STATE2, 0x10(OUTP)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index ac7f5cd019e..34fdcff4d2c 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -30,7 +30,8 @@
#include <crypto/ctr.h>
#include <asm/cpu_device_id.h>
#include <asm/i387.h>
-#include <asm/aes.h>
+#include <asm/crypto/aes.h>
+#include <asm/crypto/ablk_helper.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <linux/workqueue.h>
@@ -52,10 +53,6 @@
#define HAS_XTS
#endif
-struct async_aes_ctx {
- struct cryptd_ablkcipher *cryptd_tfm;
-};
-
/* This data is stored at the end of the crypto_tfm struct.
* It's a type of per "session" data storage location.
* This needs to be 16 byte aligned.
@@ -377,87 +374,6 @@ static int ctr_crypt(struct blkcipher_desc *desc,
}
#endif
-static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
- unsigned int key_len)
-{
- struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
- struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
- int err;
-
- crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
- crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
- & CRYPTO_TFM_REQ_MASK);
- err = crypto_ablkcipher_setkey(child, key, key_len);
- crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
- & CRYPTO_TFM_RES_MASK);
- return err;
-}
-
-static int ablk_encrypt(struct ablkcipher_request *req)
-{
- struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
- struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
- if (!irq_fpu_usable()) {
- struct ablkcipher_request *cryptd_req =
- ablkcipher_request_ctx(req);
- memcpy(cryptd_req, req, sizeof(*req));
- ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
- return crypto_ablkcipher_encrypt(cryptd_req);
- } else {
- struct blkcipher_desc desc;
- desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
- desc.info = req->info;
- desc.flags = 0;
- return crypto_blkcipher_crt(desc.tfm)->encrypt(
- &desc, req->dst, req->src, req->nbytes);
- }
-}
-
-static int ablk_decrypt(struct ablkcipher_request *req)
-{
- struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
- struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
- if (!irq_fpu_usable()) {
- struct ablkcipher_request *cryptd_req =
- ablkcipher_request_ctx(req);
- memcpy(cryptd_req, req, sizeof(*req));
- ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
- return crypto_ablkcipher_decrypt(cryptd_req);
- } else {
- struct blkcipher_desc desc;
- desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
- desc.info = req->info;
- desc.flags = 0;
- return crypto_blkcipher_crt(desc.tfm)->decrypt(
- &desc, req->dst, req->src, req->nbytes);
- }
-}
-
-static void ablk_exit(struct crypto_tfm *tfm)
-{
- struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- cryptd_free_ablkcipher(ctx->cryptd_tfm);
-}
-
-static int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
-{
- struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
-
- ctx->cryptd_tfm = cryptd_tfm;
- tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
- crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-
- return 0;
-}
-
static int ablk_ecb_init(struct crypto_tfm *tfm)
{
return ablk_init_common(tfm, "__driver-ecb-aes-aesni");
@@ -613,7 +529,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
struct aesni_rfc4106_gcm_ctx *child_ctx =
aesni_rfc4106_gcm_ctx_get(cryptd_child);
- u8 *new_key_mem = NULL;
+ u8 *new_key_align, *new_key_mem = NULL;
if (key_len < 4) {
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
@@ -637,9 +553,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
if (!new_key_mem)
return -ENOMEM;
- new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
- memcpy(new_key_mem, key, key_len);
- key = new_key_mem;
+ new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN);
+ memcpy(new_key_align, key, key_len);
+ key = new_key_align;
}
if (!irq_fpu_usable())
@@ -968,7 +884,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@@ -989,7 +905,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@@ -1033,7 +949,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@@ -1098,7 +1014,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@@ -1126,7 +1042,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@@ -1150,7 +1066,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@@ -1174,7 +1090,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_aes_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 3306dc0b139..eeb2b3b743e 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -5,10 +5,6 @@
*
* Camellia parts based on code by:
* Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -34,9 +30,9 @@
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
-#include <crypto/b128ops.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
+#include <asm/crypto/glue_helper.h>
#define CAMELLIA_MIN_KEY_SIZE 16
#define CAMELLIA_MAX_KEY_SIZE 32
@@ -1312,307 +1308,128 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
&tfm->crt_flags);
}
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
- void (*fn)(struct camellia_ctx *, u8 *, const u8 *),
- void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *))
+static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
{
- struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int bsize = CAMELLIA_BLOCK_SIZE;
- unsigned int nbytes;
- int err;
-
- err = blkcipher_walk_virt(desc, walk);
-
- while ((nbytes = walk->nbytes)) {
- u8 *wsrc = walk->src.virt.addr;
- u8 *wdst = walk->dst.virt.addr;
-
- /* Process two block batch */
- if (nbytes >= bsize * 2) {
- do {
- fn_2way(ctx, wdst, wsrc);
-
- wsrc += bsize * 2;
- wdst += bsize * 2;
- nbytes -= bsize * 2;
- } while (nbytes >= bsize * 2);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- fn(ctx, wdst, wsrc);
-
- wsrc += bsize;
- wdst += bsize;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- err = blkcipher_walk_done(desc, walk, nbytes);
- }
-
- return err;
-}
-
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way);
-}
+ u128 iv = *src;
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way);
-}
+ camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int bsize = CAMELLIA_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 *iv = (u128 *)walk->iv;
-
- do {
- u128_xor(dst, src, iv);
- camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
- iv = dst;
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
- u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
- return nbytes;
+ u128_xor(&dst[1], &dst[1], &iv);
}
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
{
- struct blkcipher_walk walk;
- int err;
+ be128 ctrblk;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ if (dst != src)
+ *dst = *src;
- while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_encrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
+ u128_to_be128(&ctrblk, iv);
+ u128_inc(iv);
- return err;
+ camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
}
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
+ u128 *iv)
{
- struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int bsize = CAMELLIA_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 ivs[2 - 1];
- u128 last_iv;
+ be128 ctrblks[2];
- /* Start of the last block. */
- src += nbytes / bsize - 1;
- dst += nbytes / bsize - 1;
-
- last_iv = *src;
-
- /* Process two block batch */
- if (nbytes >= bsize * 2) {
- do {
- nbytes -= bsize * (2 - 1);
- src -= 2 - 1;
- dst -= 2 - 1;
-
- ivs[0] = src[0];
-
- camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
-
- u128_xor(dst + 1, dst + 1, ivs + 0);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- goto done;
-
- u128_xor(dst, dst, src - 1);
- src -= 1;
- dst -= 1;
- } while (nbytes >= bsize * 2);
-
- if (nbytes < bsize)
- goto done;
+ if (dst != src) {
+ dst[0] = src[0];
+ dst[1] = src[1];
}
- /* Handle leftovers */
- for (;;) {
- camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- break;
+ u128_to_be128(&ctrblks[0], iv);
+ u128_inc(iv);
+ u128_to_be128(&ctrblks[1], iv);
+ u128_inc(iv);
- u128_xor(dst, dst, src - 1);
- src -= 1;
- dst -= 1;
- }
-
-done:
- u128_xor(dst, dst, (u128 *)walk->iv);
- *(u128 *)walk->iv = last_iv;
-
- return nbytes;
+ camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
}
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+static const struct common_glue_ctx camellia_enc = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = -1,
+
+ .funcs = { {
+ .num_blocks = 2,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
+ } }
+};
- while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_decrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
+static const struct common_glue_ctx camellia_ctr = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = -1,
+
+ .funcs = { {
+ .num_blocks = 2,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
+ } }
+};
- return err;
-}
+static const struct common_glue_ctx camellia_dec = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = -1,
+
+ .funcs = { {
+ .num_blocks = 2,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
+ } }
+};
-static inline void u128_to_be128(be128 *dst, const u128 *src)
-{
- dst->a = cpu_to_be64(src->a);
- dst->b = cpu_to_be64(src->b);
-}
+static const struct common_glue_ctx camellia_dec_cbc = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = -1,
+
+ .funcs = { {
+ .num_blocks = 2,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
+ } }
+};
-static inline void be128_to_u128(u128 *dst, const be128 *src)
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- dst->a = be64_to_cpu(src->a);
- dst->b = be64_to_cpu(src->b);
+ return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
}
-static inline void u128_inc(u128 *i)
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- i->b++;
- if (!i->b)
- i->a++;
+ return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
}
-static void ctr_crypt_final(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- u8 keystream[CAMELLIA_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
- u128 ctrblk;
-
- memcpy(keystream, src, nbytes);
- camellia_enc_blk_xor(ctx, keystream, walk->iv);
- memcpy(dst, keystream, nbytes);
-
- be128_to_u128(&ctrblk, (be128 *)walk->iv);
- u128_inc(&ctrblk);
- u128_to_be128((be128 *)walk->iv, &ctrblk);
+ return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
+ dst, src, nbytes);
}
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int bsize = CAMELLIA_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 ctrblk;
- be128 ctrblocks[2];
-
- be128_to_u128(&ctrblk, (be128 *)walk->iv);
-
- /* Process two block batch */
- if (nbytes >= bsize * 2) {
- do {
- if (dst != src) {
- dst[0] = src[0];
- dst[1] = src[1];
- }
-
- /* create ctrblks for parallel encrypt */
- u128_to_be128(&ctrblocks[0], &ctrblk);
- u128_inc(&ctrblk);
- u128_to_be128(&ctrblocks[1], &ctrblk);
- u128_inc(&ctrblk);
-
- camellia_enc_blk_xor_2way(ctx, (u8 *)dst,
- (u8 *)ctrblocks);
-
- src += 2;
- dst += 2;
- nbytes -= bsize * 2;
- } while (nbytes >= bsize * 2);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- if (dst != src)
- *dst = *src;
-
- u128_to_be128(&ctrblocks[0], &ctrblk);
- u128_inc(&ctrblk);
-
- camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- u128_to_be128((be128 *)walk->iv, &ctrblk);
- return nbytes;
+ return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
+ nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE);
-
- while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) {
- nbytes = __ctr_crypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
-
- if (walk.nbytes) {
- ctr_crypt_final(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
- }
-
- return err;
+ return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
}
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
new file mode 100644
index 00000000000..4854f0f31e4
--- /dev/null
+++ b/arch/x86/crypto/glue_helper.c
@@ -0,0 +1,307 @@
+/*
+ * Shared glue code for 128bit block ciphers
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * CTR part based on code (crypto/ctr.c) by:
+ * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <crypto/b128ops.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+#include <asm/crypto/glue_helper.h>
+#include <crypto/scatterwalk.h>
+
+static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ void *ctx = crypto_blkcipher_ctx(desc->tfm);
+ const unsigned int bsize = 128 / 8;
+ unsigned int nbytes, i, func_bytes;
+ bool fpu_enabled = false;
+ int err;
+
+ err = blkcipher_walk_virt(desc, walk);
+
+ while ((nbytes = walk->nbytes)) {
+ u8 *wsrc = walk->src.virt.addr;
+ u8 *wdst = walk->dst.virt.addr;
+
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+ desc, fpu_enabled, nbytes);
+
+ for (i = 0; i < gctx->num_funcs; i++) {
+ func_bytes = bsize * gctx->funcs[i].num_blocks;
+
+ /* Process multi-block batch */
+ if (nbytes >= func_bytes) {
+ do {
+ gctx->funcs[i].fn_u.ecb(ctx, wdst,
+ wsrc);
+
+ wsrc += func_bytes;
+ wdst += func_bytes;
+ nbytes -= func_bytes;
+ } while (nbytes >= func_bytes);
+
+ if (nbytes < bsize)
+ goto done;
+ }
+ }
+
+done:
+ err = blkcipher_walk_done(desc, walk, nbytes);
+ }
+
+ glue_fpu_end(fpu_enabled);
+ return err;
+}
+
+int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return __glue_ecb_crypt_128bit(gctx, desc, &walk);
+}
+EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit);
+
+static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
+ struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ void *ctx = crypto_blkcipher_ctx(desc->tfm);
+ const unsigned int bsize = 128 / 8;
+ unsigned int nbytes = walk->nbytes;
+ u128 *src = (u128 *)walk->src.virt.addr;
+ u128 *dst = (u128 *)walk->dst.virt.addr;
+ u128 *iv = (u128 *)walk->iv;
+
+ do {
+ u128_xor(dst, src, iv);
+ fn(ctx, (u8 *)dst, (u8 *)dst);
+ iv = dst;
+
+ src += 1;
+ dst += 1;
+ nbytes -= bsize;
+ } while (nbytes >= bsize);
+
+ u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
+ return nbytes;
+}
+
+int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
+ struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while ((nbytes = walk.nbytes)) {
+ nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit);
+
+static unsigned int
+__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ void *ctx = crypto_blkcipher_ctx(desc->tfm);
+ const unsigned int bsize = 128 / 8;
+ unsigned int nbytes = walk->nbytes;
+ u128 *src = (u128 *)walk->src.virt.addr;
+ u128 *dst = (u128 *)walk->dst.virt.addr;
+ u128 last_iv;
+ unsigned int num_blocks, func_bytes;
+ unsigned int i;
+
+ /* Start of the last block. */
+ src += nbytes / bsize - 1;
+ dst += nbytes / bsize - 1;
+
+ last_iv = *src;
+
+ for (i = 0; i < gctx->num_funcs; i++) {
+ num_blocks = gctx->funcs[i].num_blocks;
+ func_bytes = bsize * num_blocks;
+
+ /* Process multi-block batch */
+ if (nbytes >= func_bytes) {
+ do {
+ nbytes -= func_bytes - bsize;
+ src -= num_blocks - 1;
+ dst -= num_blocks - 1;
+
+ gctx->funcs[i].fn_u.cbc(ctx, dst, src);
+
+ nbytes -= bsize;
+ if (nbytes < bsize)
+ goto done;
+
+ u128_xor(dst, dst, src - 1);
+ src -= 1;
+ dst -= 1;
+ } while (nbytes >= func_bytes);
+
+ if (nbytes < bsize)
+ goto done;
+ }
+ }
+
+done:
+ u128_xor(dst, dst, (u128 *)walk->iv);
+ *(u128 *)walk->iv = last_iv;
+
+ return nbytes;
+}
+
+int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ const unsigned int bsize = 128 / 8;
+ bool fpu_enabled = false;
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while ((nbytes = walk.nbytes)) {
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+ desc, fpu_enabled, nbytes);
+ nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ glue_fpu_end(fpu_enabled);
+ return err;
+}
+EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
+
+static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
+ struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ void *ctx = crypto_blkcipher_ctx(desc->tfm);
+ u8 *src = (u8 *)walk->src.virt.addr;
+ u8 *dst = (u8 *)walk->dst.virt.addr;
+ unsigned int nbytes = walk->nbytes;
+ u128 ctrblk;
+ u128 tmp;
+
+ be128_to_u128(&ctrblk, (be128 *)walk->iv);
+
+ memcpy(&tmp, src, nbytes);
+ fn_ctr(ctx, &tmp, &tmp, &ctrblk);
+ memcpy(dst, &tmp, nbytes);
+
+ u128_to_be128((be128 *)walk->iv, &ctrblk);
+}
+EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
+
+static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ const unsigned int bsize = 128 / 8;
+ void *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int nbytes = walk->nbytes;
+ u128 *src = (u128 *)walk->src.virt.addr;
+ u128 *dst = (u128 *)walk->dst.virt.addr;
+ u128 ctrblk;
+ unsigned int num_blocks, func_bytes;
+ unsigned int i;
+
+ be128_to_u128(&ctrblk, (be128 *)walk->iv);
+
+ /* Process multi-block batch */
+ for (i = 0; i < gctx->num_funcs; i++) {
+ num_blocks = gctx->funcs[i].num_blocks;
+ func_bytes = bsize * num_blocks;
+
+ if (nbytes >= func_bytes) {
+ do {
+ gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
+
+ src += num_blocks;
+ dst += num_blocks;
+ nbytes -= func_bytes;
+ } while (nbytes >= func_bytes);
+
+ if (nbytes < bsize)
+ goto done;
+ }
+ }
+
+done:
+ u128_to_be128((be128 *)walk->iv, &ctrblk);
+ return nbytes;
+}
+
+int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ const unsigned int bsize = 128 / 8;
+ bool fpu_enabled = false;
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, bsize);
+
+ while ((nbytes = walk.nbytes) >= bsize) {
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+ desc, fpu_enabled, nbytes);
+ nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ glue_fpu_end(fpu_enabled);
+
+ if (walk.nbytes) {
+ glue_ctr_crypt_final_128bit(
+ gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
new file mode 100644
index 00000000000..504106bf04a
--- /dev/null
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -0,0 +1,704 @@
+/*
+ * Serpent Cipher 8-way parallel algorithm (x86_64/AVX)
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by
+ * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+.file "serpent-avx-x86_64-asm_64.S"
+.text
+
+#define CTX %rdi
+
+/**********************************************************************
+ 8-way AVX serpent
+ **********************************************************************/
+#define RA1 %xmm0
+#define RB1 %xmm1
+#define RC1 %xmm2
+#define RD1 %xmm3
+#define RE1 %xmm4
+
+#define tp %xmm5
+
+#define RA2 %xmm6
+#define RB2 %xmm7
+#define RC2 %xmm8
+#define RD2 %xmm9
+#define RE2 %xmm10
+
+#define RNOT %xmm11
+
+#define RK0 %xmm12
+#define RK1 %xmm13
+#define RK2 %xmm14
+#define RK3 %xmm15
+
+
+#define S0_1(x0, x1, x2, x3, x4) \
+ vpor x0, x3, tp; \
+ vpxor x3, x0, x0; \
+ vpxor x2, x3, x4; \
+ vpxor RNOT, x4, x4; \
+ vpxor x1, tp, x3; \
+ vpand x0, x1, x1; \
+ vpxor x4, x1, x1; \
+ vpxor x0, x2, x2;
+#define S0_2(x0, x1, x2, x3, x4) \
+ vpxor x3, x0, x0; \
+ vpor x0, x4, x4; \
+ vpxor x2, x0, x0; \
+ vpand x1, x2, x2; \
+ vpxor x2, x3, x3; \
+ vpxor RNOT, x1, x1; \
+ vpxor x4, x2, x2; \
+ vpxor x2, x1, x1;
+
+#define S1_1(x0, x1, x2, x3, x4) \
+ vpxor x0, x1, tp; \
+ vpxor x3, x0, x0; \
+ vpxor RNOT, x3, x3; \
+ vpand tp, x1, x4; \
+ vpor tp, x0, x0; \
+ vpxor x2, x3, x3; \
+ vpxor x3, x0, x0; \
+ vpxor x3, tp, x1;
+#define S1_2(x0, x1, x2, x3, x4) \
+ vpxor x4, x3, x3; \
+ vpor x4, x1, x1; \
+ vpxor x2, x4, x4; \
+ vpand x0, x2, x2; \
+ vpxor x1, x2, x2; \
+ vpor x0, x1, x1; \
+ vpxor RNOT, x0, x0; \
+ vpxor x2, x0, x0; \
+ vpxor x1, x4, x4;
+
+#define S2_1(x0, x1, x2, x3, x4) \
+ vpxor RNOT, x3, x3; \
+ vpxor x0, x1, x1; \
+ vpand x2, x0, tp; \
+ vpxor x3, tp, tp; \
+ vpor x0, x3, x3; \
+ vpxor x1, x2, x2; \
+ vpxor x1, x3, x3; \
+ vpand tp, x1, x1;
+#define S2_2(x0, x1, x2, x3, x4) \
+ vpxor x2, tp, tp; \
+ vpand x3, x2, x2; \
+ vpor x1, x3, x3; \
+ vpxor RNOT, tp, tp; \
+ vpxor tp, x3, x3; \
+ vpxor tp, x0, x4; \
+ vpxor x2, tp, x0; \
+ vpor x2, x1, x1;
+
+#define S3_1(x0, x1, x2, x3, x4) \
+ vpxor x3, x1, tp; \
+ vpor x0, x3, x3; \
+ vpand x0, x1, x4; \
+ vpxor x2, x0, x0; \
+ vpxor tp, x2, x2; \
+ vpand x3, tp, x1; \
+ vpxor x3, x2, x2; \
+ vpor x4, x0, x0; \
+ vpxor x3, x4, x4;
+#define S3_2(x0, x1, x2, x3, x4) \
+ vpxor x0, x1, x1; \
+ vpand x3, x0, x0; \
+ vpand x4, x3, x3; \
+ vpxor x2, x3, x3; \
+ vpor x1, x4, x4; \
+ vpand x1, x2, x2; \
+ vpxor x3, x4, x4; \
+ vpxor x3, x0, x0; \
+ vpxor x2, x3, x3;
+
+#define S4_1(x0, x1, x2, x3, x4) \
+ vpand x0, x3, tp; \
+ vpxor x3, x0, x0; \
+ vpxor x2, tp, tp; \
+ vpor x3, x2, x2; \
+ vpxor x1, x0, x0; \
+ vpxor tp, x3, x4; \
+ vpor x0, x2, x2; \
+ vpxor x1, x2, x2;
+#define S4_2(x0, x1, x2, x3, x4) \
+ vpand x0, x1, x1; \
+ vpxor x4, x1, x1; \
+ vpand x2, x4, x4; \
+ vpxor tp, x2, x2; \
+ vpxor x0, x4, x4; \
+ vpor x1, tp, x3; \
+ vpxor RNOT, x1, x1; \
+ vpxor x0, x3, x3;
+
+#define S5_1(x0, x1, x2, x3, x4) \
+ vpor x0, x1, tp; \
+ vpxor tp, x2, x2; \
+ vpxor RNOT, x3, x3; \
+ vpxor x0, x1, x4; \
+ vpxor x2, x0, x0; \
+ vpand x4, tp, x1; \
+ vpor x3, x4, x4; \
+ vpxor x0, x4, x4;
+#define S5_2(x0, x1, x2, x3, x4) \
+ vpand x3, x0, x0; \
+ vpxor x3, x1, x1; \
+ vpxor x2, x3, x3; \
+ vpxor x1, x0, x0; \
+ vpand x4, x2, x2; \
+ vpxor x2, x1, x1; \
+ vpand x0, x2, x2; \
+ vpxor x2, x3, x3;
+
+#define S6_1(x0, x1, x2, x3, x4) \
+ vpxor x0, x3, x3; \
+ vpxor x2, x1, tp; \
+ vpxor x0, x2, x2; \
+ vpand x3, x0, x0; \
+ vpor x3, tp, tp; \
+ vpxor RNOT, x1, x4; \
+ vpxor tp, x0, x0; \
+ vpxor x2, tp, x1;
+#define S6_2(x0, x1, x2, x3, x4) \
+ vpxor x4, x3, x3; \
+ vpxor x0, x4, x4; \
+ vpand x0, x2, x2; \
+ vpxor x1, x4, x4; \
+ vpxor x3, x2, x2; \
+ vpand x1, x3, x3; \
+ vpxor x0, x3, x3; \
+ vpxor x2, x1, x1;
+
+#define S7_1(x0, x1, x2, x3, x4) \
+ vpxor RNOT, x1, tp; \
+ vpxor RNOT, x0, x0; \
+ vpand x2, tp, x1; \
+ vpxor x3, x1, x1; \
+ vpor tp, x3, x3; \
+ vpxor x2, tp, x4; \
+ vpxor x3, x2, x2; \
+ vpxor x0, x3, x3; \
+ vpor x1, x0, x0;
+#define S7_2(x0, x1, x2, x3, x4) \
+ vpand x0, x2, x2; \
+ vpxor x4, x0, x0; \
+ vpxor x3, x4, x4; \
+ vpand x0, x3, x3; \
+ vpxor x1, x4, x4; \
+ vpxor x4, x2, x2; \
+ vpxor x1, x3, x3; \
+ vpor x0, x4, x4; \
+ vpxor x1, x4, x4;
+
+#define SI0_1(x0, x1, x2, x3, x4) \
+ vpxor x0, x1, x1; \
+ vpor x1, x3, tp; \
+ vpxor x1, x3, x4; \
+ vpxor RNOT, x0, x0; \
+ vpxor tp, x2, x2; \
+ vpxor x0, tp, x3; \
+ vpand x1, x0, x0; \
+ vpxor x2, x0, x0;
+#define SI0_2(x0, x1, x2, x3, x4) \
+ vpand x3, x2, x2; \
+ vpxor x4, x3, x3; \
+ vpxor x3, x2, x2; \
+ vpxor x3, x1, x1; \
+ vpand x0, x3, x3; \
+ vpxor x0, x1, x1; \
+ vpxor x2, x0, x0; \
+ vpxor x3, x4, x4;
+
+#define SI1_1(x0, x1, x2, x3, x4) \
+ vpxor x3, x1, x1; \
+ vpxor x2, x0, tp; \
+ vpxor RNOT, x2, x2; \
+ vpor x1, x0, x4; \
+ vpxor x3, x4, x4; \
+ vpand x1, x3, x3; \
+ vpxor x2, x1, x1; \
+ vpand x4, x2, x2;
+#define SI1_2(x0, x1, x2, x3, x4) \
+ vpxor x1, x4, x4; \
+ vpor x3, x1, x1; \
+ vpxor tp, x3, x3; \
+ vpxor tp, x2, x2; \
+ vpor x4, tp, x0; \
+ vpxor x4, x2, x2; \
+ vpxor x0, x1, x1; \
+ vpxor x1, x4, x4;
+
+#define SI2_1(x0, x1, x2, x3, x4) \
+ vpxor x1, x2, x2; \
+ vpxor RNOT, x3, tp; \
+ vpor x2, tp, tp; \
+ vpxor x3, x2, x2; \
+ vpxor x0, x3, x4; \
+ vpxor x1, tp, x3; \
+ vpor x2, x1, x1; \
+ vpxor x0, x2, x2;
+#define SI2_2(x0, x1, x2, x3, x4) \
+ vpxor x4, x1, x1; \
+ vpor x3, x4, x4; \
+ vpxor x3, x2, x2; \
+ vpxor x2, x4, x4; \
+ vpand x1, x2, x2; \
+ vpxor x3, x2, x2; \
+ vpxor x4, x3, x3; \
+ vpxor x0, x4, x4;
+
+#define SI3_1(x0, x1, x2, x3, x4) \
+ vpxor x1, x2, x2; \
+ vpand x2, x1, tp; \
+ vpxor x0, tp, tp; \
+ vpor x1, x0, x0; \
+ vpxor x3, x1, x4; \
+ vpxor x3, x0, x0; \
+ vpor tp, x3, x3; \
+ vpxor x2, tp, x1;
+#define SI3_2(x0, x1, x2, x3, x4) \
+ vpxor x3, x1, x1; \
+ vpxor x2, x0, x0; \
+ vpxor x3, x2, x2; \
+ vpand x1, x3, x3; \
+ vpxor x0, x1, x1; \
+ vpand x2, x0, x0; \
+ vpxor x3, x4, x4; \
+ vpxor x0, x3, x3; \
+ vpxor x1, x0, x0;
+
+#define SI4_1(x0, x1, x2, x3, x4) \
+ vpxor x3, x2, x2; \
+ vpand x1, x0, tp; \
+ vpxor x2, tp, tp; \
+ vpor x3, x2, x2; \
+ vpxor RNOT, x0, x4; \
+ vpxor tp, x1, x1; \
+ vpxor x2, tp, x0; \
+ vpand x4, x2, x2;
+#define SI4_2(x0, x1, x2, x3, x4) \
+ vpxor x0, x2, x2; \
+ vpor x4, x0, x0; \
+ vpxor x3, x0, x0; \
+ vpand x2, x3, x3; \
+ vpxor x3, x4, x4; \
+ vpxor x1, x3, x3; \
+ vpand x0, x1, x1; \
+ vpxor x1, x4, x4; \
+ vpxor x3, x0, x0;
+
+#define SI5_1(x0, x1, x2, x3, x4) \
+ vpor x2, x1, tp; \
+ vpxor x1, x2, x2; \
+ vpxor x3, tp, tp; \
+ vpand x1, x3, x3; \
+ vpxor x3, x2, x2; \
+ vpor x0, x3, x3; \
+ vpxor RNOT, x0, x0; \
+ vpxor x2, x3, x3; \
+ vpor x0, x2, x2;
+#define SI5_2(x0, x1, x2, x3, x4) \
+ vpxor tp, x1, x4; \
+ vpxor x4, x2, x2; \
+ vpand x0, x4, x4; \
+ vpxor tp, x0, x0; \
+ vpxor x3, tp, x1; \
+ vpand x2, x0, x0; \
+ vpxor x3, x2, x2; \
+ vpxor x2, x0, x0; \
+ vpxor x4, x2, x2; \
+ vpxor x3, x4, x4;
+
+#define SI6_1(x0, x1, x2, x3, x4) \
+ vpxor x2, x0, x0; \
+ vpand x3, x0, tp; \
+ vpxor x3, x2, x2; \
+ vpxor x2, tp, tp; \
+ vpxor x1, x3, x3; \
+ vpor x0, x2, x2; \
+ vpxor x3, x2, x2; \
+ vpand tp, x3, x3;
+#define SI6_2(x0, x1, x2, x3, x4) \
+ vpxor RNOT, tp, tp; \
+ vpxor x1, x3, x3; \
+ vpand x2, x1, x1; \
+ vpxor tp, x0, x4; \
+ vpxor x4, x3, x3; \
+ vpxor x2, x4, x4; \
+ vpxor x1, tp, x0; \
+ vpxor x0, x2, x2;
+
+#define SI7_1(x0, x1, x2, x3, x4) \
+ vpand x0, x3, tp; \
+ vpxor x2, x0, x0; \
+ vpor x3, x2, x2; \
+ vpxor x1, x3, x4; \
+ vpxor RNOT, x0, x0; \
+ vpor tp, x1, x1; \
+ vpxor x0, x4, x4; \
+ vpand x2, x0, x0; \
+ vpxor x1, x0, x0;
+#define SI7_2(x0, x1, x2, x3, x4) \
+ vpand x2, x1, x1; \
+ vpxor x2, tp, x3; \
+ vpxor x3, x4, x4; \
+ vpand x3, x2, x2; \
+ vpor x0, x3, x3; \
+ vpxor x4, x1, x1; \
+ vpxor x4, x3, x3; \
+ vpand x0, x4, x4; \
+ vpxor x2, x4, x4;
+
+#define get_key(i, j, t) \
+ vbroadcastss (4*(i)+(j))*4(CTX), t;
+
+#define K2(x0, x1, x2, x3, x4, i) \
+ get_key(i, 0, RK0); \
+ get_key(i, 1, RK1); \
+ get_key(i, 2, RK2); \
+ get_key(i, 3, RK3); \
+ vpxor RK0, x0 ## 1, x0 ## 1; \
+ vpxor RK1, x1 ## 1, x1 ## 1; \
+ vpxor RK2, x2 ## 1, x2 ## 1; \
+ vpxor RK3, x3 ## 1, x3 ## 1; \
+ vpxor RK0, x0 ## 2, x0 ## 2; \
+ vpxor RK1, x1 ## 2, x1 ## 2; \
+ vpxor RK2, x2 ## 2, x2 ## 2; \
+ vpxor RK3, x3 ## 2, x3 ## 2;
+
+#define LK2(x0, x1, x2, x3, x4, i) \
+ vpslld $13, x0 ## 1, x4 ## 1; \
+ vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \
+ vpor x4 ## 1, x0 ## 1, x0 ## 1; \
+ vpxor x0 ## 1, x1 ## 1, x1 ## 1; \
+ vpslld $3, x2 ## 1, x4 ## 1; \
+ vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \
+ vpor x4 ## 1, x2 ## 1, x2 ## 1; \
+ vpxor x2 ## 1, x1 ## 1, x1 ## 1; \
+ vpslld $13, x0 ## 2, x4 ## 2; \
+ vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \
+ vpor x4 ## 2, x0 ## 2, x0 ## 2; \
+ vpxor x0 ## 2, x1 ## 2, x1 ## 2; \
+ vpslld $3, x2 ## 2, x4 ## 2; \
+ vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \
+ vpor x4 ## 2, x2 ## 2, x2 ## 2; \
+ vpxor x2 ## 2, x1 ## 2, x1 ## 2; \
+ vpslld $1, x1 ## 1, x4 ## 1; \
+ vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \
+ vpor x4 ## 1, x1 ## 1, x1 ## 1; \
+ vpslld $3, x0 ## 1, x4 ## 1; \
+ vpxor x2 ## 1, x3 ## 1, x3 ## 1; \
+ vpxor x4 ## 1, x3 ## 1, x3 ## 1; \
+ get_key(i, 1, RK1); \
+ vpslld $1, x1 ## 2, x4 ## 2; \
+ vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \
+ vpor x4 ## 2, x1 ## 2, x1 ## 2; \
+ vpslld $3, x0 ## 2, x4 ## 2; \
+ vpxor x2 ## 2, x3 ## 2, x3 ## 2; \
+ vpxor x4 ## 2, x3 ## 2, x3 ## 2; \
+ get_key(i, 3, RK3); \
+ vpslld $7, x3 ## 1, x4 ## 1; \
+ vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \
+ vpor x4 ## 1, x3 ## 1, x3 ## 1; \
+ vpslld $7, x1 ## 1, x4 ## 1; \
+ vpxor x1 ## 1, x0 ## 1, x0 ## 1; \
+ vpxor x3 ## 1, x0 ## 1, x0 ## 1; \
+ vpxor x3 ## 1, x2 ## 1, x2 ## 1; \
+ vpxor x4 ## 1, x2 ## 1, x2 ## 1; \
+ get_key(i, 0, RK0); \
+ vpslld $7, x3 ## 2, x4 ## 2; \
+ vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \
+ vpor x4 ## 2, x3 ## 2, x3 ## 2; \
+ vpslld $7, x1 ## 2, x4 ## 2; \
+ vpxor x1 ## 2, x0 ## 2, x0 ## 2; \
+ vpxor x3 ## 2, x0 ## 2, x0 ## 2; \
+ vpxor x3 ## 2, x2 ## 2, x2 ## 2; \
+ vpxor x4 ## 2, x2 ## 2, x2 ## 2; \
+ get_key(i, 2, RK2); \
+ vpxor RK1, x1 ## 1, x1 ## 1; \
+ vpxor RK3, x3 ## 1, x3 ## 1; \
+ vpslld $5, x0 ## 1, x4 ## 1; \
+ vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \
+ vpor x4 ## 1, x0 ## 1, x0 ## 1; \
+ vpslld $22, x2 ## 1, x4 ## 1; \
+ vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \
+ vpor x4 ## 1, x2 ## 1, x2 ## 1; \
+ vpxor RK0, x0 ## 1, x0 ## 1; \
+ vpxor RK2, x2 ## 1, x2 ## 1; \
+ vpxor RK1, x1 ## 2, x1 ## 2; \
+ vpxor RK3, x3 ## 2, x3 ## 2; \
+ vpslld $5, x0 ## 2, x4 ## 2; \
+ vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \
+ vpor x4 ## 2, x0 ## 2, x0 ## 2; \
+ vpslld $22, x2 ## 2, x4 ## 2; \
+ vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \
+ vpor x4 ## 2, x2 ## 2, x2 ## 2; \
+ vpxor RK0, x0 ## 2, x0 ## 2; \
+ vpxor RK2, x2 ## 2, x2 ## 2;
+
+#define KL2(x0, x1, x2, x3, x4, i) \
+ vpxor RK0, x0 ## 1, x0 ## 1; \
+ vpxor RK2, x2 ## 1, x2 ## 1; \
+ vpsrld $5, x0 ## 1, x4 ## 1; \
+ vpslld $(32 - 5), x0 ## 1, x0 ## 1; \
+ vpor x4 ## 1, x0 ## 1, x0 ## 1; \
+ vpxor RK3, x3 ## 1, x3 ## 1; \
+ vpxor RK1, x1 ## 1, x1 ## 1; \
+ vpsrld $22, x2 ## 1, x4 ## 1; \
+ vpslld $(32 - 22), x2 ## 1, x2 ## 1; \
+ vpor x4 ## 1, x2 ## 1, x2 ## 1; \
+ vpxor x3 ## 1, x2 ## 1, x2 ## 1; \
+ vpxor RK0, x0 ## 2, x0 ## 2; \
+ vpxor RK2, x2 ## 2, x2 ## 2; \
+ vpsrld $5, x0 ## 2, x4 ## 2; \
+ vpslld $(32 - 5), x0 ## 2, x0 ## 2; \
+ vpor x4 ## 2, x0 ## 2, x0 ## 2; \
+ vpxor RK3, x3 ## 2, x3 ## 2; \
+ vpxor RK1, x1 ## 2, x1 ## 2; \
+ vpsrld $22, x2 ## 2, x4 ## 2; \
+ vpslld $(32 - 22), x2 ## 2, x2 ## 2; \
+ vpor x4 ## 2, x2 ## 2, x2 ## 2; \
+ vpxor x3 ## 2, x2 ## 2, x2 ## 2; \
+ vpxor x3 ## 1, x0 ## 1, x0 ## 1; \
+ vpslld $7, x1 ## 1, x4 ## 1; \
+ vpxor x1 ## 1, x0 ## 1, x0 ## 1; \
+ vpxor x4 ## 1, x2 ## 1, x2 ## 1; \
+ vpsrld $1, x1 ## 1, x4 ## 1; \
+ vpslld $(32 - 1), x1 ## 1, x1 ## 1; \
+ vpor x4 ## 1, x1 ## 1, x1 ## 1; \
+ vpxor x3 ## 2, x0 ## 2, x0 ## 2; \
+ vpslld $7, x1 ## 2, x4 ## 2; \
+ vpxor x1 ## 2, x0 ## 2, x0 ## 2; \
+ vpxor x4 ## 2, x2 ## 2, x2 ## 2; \
+ vpsrld $1, x1 ## 2, x4 ## 2; \
+ vpslld $(32 - 1), x1 ## 2, x1 ## 2; \
+ vpor x4 ## 2, x1 ## 2, x1 ## 2; \
+ vpsrld $7, x3 ## 1, x4 ## 1; \
+ vpslld $(32 - 7), x3 ## 1, x3 ## 1; \
+ vpor x4 ## 1, x3 ## 1, x3 ## 1; \
+ vpxor x0 ## 1, x1 ## 1, x1 ## 1; \
+ vpslld $3, x0 ## 1, x4 ## 1; \
+ vpxor x4 ## 1, x3 ## 1, x3 ## 1; \
+ vpsrld $7, x3 ## 2, x4 ## 2; \
+ vpslld $(32 - 7), x3 ## 2, x3 ## 2; \
+ vpor x4 ## 2, x3 ## 2, x3 ## 2; \
+ vpxor x0 ## 2, x1 ## 2, x1 ## 2; \
+ vpslld $3, x0 ## 2, x4 ## 2; \
+ vpxor x4 ## 2, x3 ## 2, x3 ## 2; \
+ vpsrld $13, x0 ## 1, x4 ## 1; \
+ vpslld $(32 - 13), x0 ## 1, x0 ## 1; \
+ vpor x4 ## 1, x0 ## 1, x0 ## 1; \
+ vpxor x2 ## 1, x1 ## 1, x1 ## 1; \
+ vpxor x2 ## 1, x3 ## 1, x3 ## 1; \
+ vpsrld $3, x2 ## 1, x4 ## 1; \
+ vpslld $(32 - 3), x2 ## 1, x2 ## 1; \
+ vpor x4 ## 1, x2 ## 1, x2 ## 1; \
+ vpsrld $13, x0 ## 2, x4 ## 2; \
+ vpslld $(32 - 13), x0 ## 2, x0 ## 2; \
+ vpor x4 ## 2, x0 ## 2, x0 ## 2; \
+ vpxor x2 ## 2, x1 ## 2, x1 ## 2; \
+ vpxor x2 ## 2, x3 ## 2, x3 ## 2; \
+ vpsrld $3, x2 ## 2, x4 ## 2; \
+ vpslld $(32 - 3), x2 ## 2, x2 ## 2; \
+ vpor x4 ## 2, x2 ## 2, x2 ## 2;
+
+#define S(SBOX, x0, x1, x2, x3, x4) \
+ SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
+ SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
+ SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
+ SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
+
+#define SP(SBOX, x0, x1, x2, x3, x4, i) \
+ get_key(i, 0, RK0); \
+ SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
+ get_key(i, 2, RK2); \
+ SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
+ get_key(i, 3, RK3); \
+ SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
+ get_key(i, 1, RK1); \
+ SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
+
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+ vpunpckldq x1, x0, t0; \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x3; \
+ \
+ vpunpcklqdq t1, t0, x0; \
+ vpunpckhqdq t1, t0, x1; \
+ vpunpcklqdq x3, t2, x2; \
+ vpunpckhqdq x3, t2, x3;
+
+#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
+ vmovdqu (0*4*4)(in), x0; \
+ vmovdqu (1*4*4)(in), x1; \
+ vmovdqu (2*4*4)(in), x2; \
+ vmovdqu (3*4*4)(in), x3; \
+ \
+ transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
+
+#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+ \
+ vmovdqu x0, (0*4*4)(out); \
+ vmovdqu x1, (1*4*4)(out); \
+ vmovdqu x2, (2*4*4)(out); \
+ vmovdqu x3, (3*4*4)(out);
+
+#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+ \
+ vpxor (0*4*4)(out), x0, x0; \
+ vmovdqu x0, (0*4*4)(out); \
+ vpxor (1*4*4)(out), x1, x1; \
+ vmovdqu x1, (1*4*4)(out); \
+ vpxor (2*4*4)(out), x2, x2; \
+ vmovdqu x2, (2*4*4)(out); \
+ vpxor (3*4*4)(out), x3, x3; \
+ vmovdqu x3, (3*4*4)(out);
+
+.align 8
+.global __serpent_enc_blk_8way_avx
+.type __serpent_enc_blk_8way_avx,@function;
+
+__serpent_enc_blk_8way_avx:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: bool, if true: xor output
+ */
+
+ vpcmpeqd RNOT, RNOT, RNOT;
+
+ leaq (4*4*4)(%rdx), %rax;
+ read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+ K2(RA, RB, RC, RD, RE, 0);
+ S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1);
+ S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2);
+ S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3);
+ S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4);
+ S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5);
+ S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6);
+ S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7);
+ S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8);
+ S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9);
+ S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10);
+ S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11);
+ S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12);
+ S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13);
+ S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14);
+ S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15);
+ S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16);
+ S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17);
+ S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18);
+ S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19);
+ S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20);
+ S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21);
+ S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22);
+ S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23);
+ S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24);
+ S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25);
+ S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26);
+ S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27);
+ S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28);
+ S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29);
+ S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30);
+ S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31);
+ S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32);
+
+ leaq (4*4*4)(%rsi), %rax;
+
+ testb %cl, %cl;
+ jnz __enc_xor8;
+
+ write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+ ret;
+
+__enc_xor8:
+ xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+ ret;
+
+.align 8
+.global serpent_dec_blk_8way_avx
+.type serpent_dec_blk_8way_avx,@function;
+
+serpent_dec_blk_8way_avx:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ vpcmpeqd RNOT, RNOT, RNOT;
+
+ leaq (4*4*4)(%rdx), %rax;
+ read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+
+ K2(RA, RB, RC, RD, RE, 32);
+ SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31);
+ SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30);
+ SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29);
+ SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28);
+ SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27);
+ SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26);
+ SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25);
+ SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24);
+ SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23);
+ SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22);
+ SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21);
+ SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20);
+ SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19);
+ SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18);
+ SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17);
+ SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16);
+ SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15);
+ SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14);
+ SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13);
+ SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12);
+ SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11);
+ SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10);
+ SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9);
+ SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8);
+ SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7);
+ SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6);
+ SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5);
+ SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4);
+ SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3);
+ SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2);
+ SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1);
+ S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0);
+
+ leaq (4*4*4)(%rsi), %rax;
+ write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+ write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+
+ ret;
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
new file mode 100644
index 00000000000..b36bdac237e
--- /dev/null
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -0,0 +1,636 @@
+/*
+ * Glue Code for AVX assembler versions of Serpent Cipher
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * Glue code based on serpent_sse2_glue.c by:
+ * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/serpent.h>
+#include <crypto/cryptd.h>
+#include <crypto/b128ops.h>
+#include <crypto/ctr.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/serpent-avx.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+
+static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
+{
+ u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
+ unsigned int j;
+
+ for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+ ivs[j] = src[j];
+
+ serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+ for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+ u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+}
+
+static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+{
+ be128 ctrblk;
+
+ u128_to_be128(&ctrblk, iv);
+ u128_inc(iv);
+
+ __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+ u128_xor(dst, src, (u128 *)&ctrblk);
+}
+
+static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+ u128 *iv)
+{
+ be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
+ unsigned int i;
+
+ for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
+ if (dst != src)
+ dst[i] = src[i];
+
+ u128_to_be128(&ctrblks[i], iv);
+ u128_inc(iv);
+ }
+
+ serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
+}
+
+static const struct common_glue_ctx serpent_enc = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = SERPENT_PARALLEL_BLOCKS,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+ } }
+};
+
+static const struct common_glue_ctx serpent_ctr = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = SERPENT_PARALLEL_BLOCKS,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
+ } }
+};
+
+static const struct common_glue_ctx serpent_dec = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = SERPENT_PARALLEL_BLOCKS,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+ } }
+};
+
+static const struct common_glue_ctx serpent_dec_cbc = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = SERPENT_PARALLEL_BLOCKS,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+ } }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
+ dst, src, nbytes);
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
+ nbytes);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+}
+
+static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+ return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
+ NULL, fpu_enabled, nbytes);
+}
+
+static inline void serpent_fpu_end(bool fpu_enabled)
+{
+ glue_fpu_end(fpu_enabled);
+}
+
+struct crypt_priv {
+ struct serpent_ctx *ctx;
+ bool fpu_enabled;
+};
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+ const unsigned int bsize = SERPENT_BLOCK_SIZE;
+ struct crypt_priv *ctx = priv;
+ int i;
+
+ ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
+
+ if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
+ serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+ return;
+ }
+
+ for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+ __serpent_encrypt(ctx->ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+ const unsigned int bsize = SERPENT_BLOCK_SIZE;
+ struct crypt_priv *ctx = priv;
+ int i;
+
+ ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
+
+ if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
+ serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+ return;
+ }
+
+ for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+ __serpent_decrypt(ctx->ctx, srcdst, srcdst);
+}
+
+struct serpent_lrw_ctx {
+ struct lrw_table_ctx lrw_table;
+ struct serpent_ctx serpent_ctx;
+};
+
+static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+ int err;
+
+ err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
+ SERPENT_BLOCK_SIZE);
+ if (err)
+ return err;
+
+ return lrw_init_table(&ctx->lrw_table, key + keylen -
+ SERPENT_BLOCK_SIZE);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[SERPENT_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->serpent_ctx,
+ .fpu_enabled = false,
+ };
+ struct lrw_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .table_ctx = &ctx->lrw_table,
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = encrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = lrw_crypt(desc, dst, src, nbytes, &req);
+ serpent_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[SERPENT_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->serpent_ctx,
+ .fpu_enabled = false,
+ };
+ struct lrw_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .table_ctx = &ctx->lrw_table,
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = decrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = lrw_crypt(desc, dst, src, nbytes, &req);
+ serpent_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static void lrw_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ lrw_free_table(&ctx->lrw_table);
+}
+
+struct serpent_xts_ctx {
+ struct serpent_ctx tweak_ctx;
+ struct serpent_ctx crypt_ctx;
+};
+
+static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ u32 *flags = &tfm->crt_flags;
+ int err;
+
+ /* key consists of keys of equal size concatenated, therefore
+ * the length must be even
+ */
+ if (keylen % 2) {
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ /* first half of xts-key is for crypt */
+ err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
+ if (err)
+ return err;
+
+ /* second half of xts-key is for tweak */
+ return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[SERPENT_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->crypt_ctx,
+ .fpu_enabled = false,
+ };
+ struct xts_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .tweak_ctx = &ctx->tweak_ctx,
+ .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = encrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = xts_crypt(desc, dst, src, nbytes, &req);
+ serpent_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[SERPENT_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->crypt_ctx,
+ .fpu_enabled = false,
+ };
+ struct xts_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .tweak_ctx = &ctx->tweak_ctx,
+ .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = decrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = xts_crypt(desc, dst, src, nbytes, &req);
+ serpent_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static struct crypto_alg serpent_algs[10] = { {
+ .cra_name = "__ecb-serpent-avx",
+ .cra_driver_name = "__driver-ecb-serpent-avx",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .setkey = serpent_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__cbc-serpent-avx",
+ .cra_driver_name = "__driver-cbc-serpent-avx",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .setkey = serpent_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__ctr-serpent-avx",
+ .cra_driver_name = "__driver-ctr-serpent-avx",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct serpent_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = serpent_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+}, {
+ .cra_name = "__lrw-serpent-avx",
+ .cra_driver_name = "__driver-lrw-serpent-avx",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list),
+ .cra_exit = lrw_exit_tfm,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE +
+ SERPENT_BLOCK_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE +
+ SERPENT_BLOCK_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = lrw_serpent_setkey,
+ .encrypt = lrw_encrypt,
+ .decrypt = lrw_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__xts-serpent-avx",
+ .cra_driver_name = "__driver-xts-serpent-avx",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_xts_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
+ .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = xts_serpent_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ecb(serpent)",
+ .cra_driver_name = "ecb-serpent-avx",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list),
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(serpent)",
+ .cra_driver_name = "cbc-serpent-avx",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list),
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = __ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ctr(serpent)",
+ .cra_driver_name = "ctr-serpent-avx",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list),
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_encrypt,
+ .geniv = "chainiv",
+ },
+ },
+}, {
+ .cra_name = "lrw(serpent)",
+ .cra_driver_name = "lrw-serpent-avx",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list),
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE +
+ SERPENT_BLOCK_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE +
+ SERPENT_BLOCK_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+}, {
+ .cra_name = "xts(serpent)",
+ .cra_driver_name = "xts-serpent-avx",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list),
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
+ .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+} };
+
+static int __init serpent_init(void)
+{
+ u64 xcr0;
+
+ if (!cpu_has_avx || !cpu_has_osxsave) {
+ printk(KERN_INFO "AVX instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+ printk(KERN_INFO "AVX detected but unusable.\n");
+ return -ENODEV;
+ }
+
+ return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+}
+
+static void __exit serpent_exit(void)
+{
+ crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+}
+
+module_init(serpent_init);
+module_exit(serpent_exit);
+
+MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("serpent");
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 4b21be85e0a..d679c8675f4 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -41,358 +41,145 @@
#include <crypto/ctr.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
-#include <asm/i387.h>
-#include <asm/serpent.h>
-#include <crypto/scatterwalk.h>
-#include <linux/workqueue.h>
-#include <linux/spinlock.h>
-
-struct async_serpent_ctx {
- struct cryptd_ablkcipher *cryptd_tfm;
-};
+#include <asm/crypto/serpent-sse2.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
-{
- if (fpu_enabled)
- return true;
-
- /* SSE2 is only used when chunk to be processed is large enough, so
- * do not enable FPU until it is necessary.
- */
- if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS)
- return false;
-
- kernel_fpu_begin();
- return true;
-}
-
-static inline void serpent_fpu_end(bool fpu_enabled)
+static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
{
- if (fpu_enabled)
- kernel_fpu_end();
-}
-
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
- bool enc)
-{
- bool fpu_enabled = false;
- struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- const unsigned int bsize = SERPENT_BLOCK_SIZE;
- unsigned int nbytes;
- int err;
-
- err = blkcipher_walk_virt(desc, walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- while ((nbytes = walk->nbytes)) {
- u8 *wsrc = walk->src.virt.addr;
- u8 *wdst = walk->dst.virt.addr;
-
- fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
-
- /* Process multi-block batch */
- if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
- do {
- if (enc)
- serpent_enc_blk_xway(ctx, wdst, wsrc);
- else
- serpent_dec_blk_xway(ctx, wdst, wsrc);
-
- wsrc += bsize * SERPENT_PARALLEL_BLOCKS;
- wdst += bsize * SERPENT_PARALLEL_BLOCKS;
- nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
- } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- if (enc)
- __serpent_encrypt(ctx, wdst, wsrc);
- else
- __serpent_decrypt(ctx, wdst, wsrc);
-
- wsrc += bsize;
- wdst += bsize;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- err = blkcipher_walk_done(desc, walk, nbytes);
- }
+ u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
+ unsigned int j;
- serpent_fpu_end(fpu_enabled);
- return err;
-}
+ for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+ ivs[j] = src[j];
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
+ serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, true);
+ for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+ u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
}
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
{
- struct blkcipher_walk walk;
+ be128 ctrblk;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, false);
-}
+ u128_to_be128(&ctrblk, iv);
+ u128_inc(iv);
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- const unsigned int bsize = SERPENT_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 *iv = (u128 *)walk->iv;
-
- do {
- u128_xor(dst, src, iv);
- __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst);
- iv = dst;
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
- u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
- return nbytes;
+ __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+ u128_xor(dst, src, (u128 *)&ctrblk);
}
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+ u128 *iv)
{
- struct blkcipher_walk walk;
- int err;
+ be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
+ unsigned int i;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
+ if (dst != src)
+ dst[i] = src[i];
- while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_encrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ u128_to_be128(&ctrblks[i], iv);
+ u128_inc(iv);
}
- return err;
+ serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
}
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- const unsigned int bsize = SERPENT_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
- u128 last_iv;
- int i;
-
- /* Start of the last block. */
- src += nbytes / bsize - 1;
- dst += nbytes / bsize - 1;
-
- last_iv = *src;
-
- /* Process multi-block batch */
- if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
- do {
- nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1);
- src -= SERPENT_PARALLEL_BLOCKS - 1;
- dst -= SERPENT_PARALLEL_BLOCKS - 1;
-
- for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
- ivs[i] = src[i];
-
- serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
-
- for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
- u128_xor(dst + (i + 1), dst + (i + 1), ivs + i);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- goto done;
+static const struct common_glue_ctx serpent_enc = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
- u128_xor(dst, dst, src - 1);
- src -= 1;
- dst -= 1;
- } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- for (;;) {
- __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- break;
+ .funcs = { {
+ .num_blocks = SERPENT_PARALLEL_BLOCKS,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+ } }
+};
- u128_xor(dst, dst, src - 1);
- src -= 1;
- dst -= 1;
- }
+static const struct common_glue_ctx serpent_ctr = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = SERPENT_PARALLEL_BLOCKS,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
+ } }
+};
-done:
- u128_xor(dst, dst, (u128 *)walk->iv);
- *(u128 *)walk->iv = last_iv;
+static const struct common_glue_ctx serpent_dec = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = SERPENT_PARALLEL_BLOCKS,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+ } }
+};
- return nbytes;
-}
+static const struct common_glue_ctx serpent_dec_cbc = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = SERPENT_PARALLEL_BLOCKS,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+ } }
+};
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
- bool fpu_enabled = false;
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- while ((nbytes = walk.nbytes)) {
- fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
- nbytes = __cbc_decrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
-
- serpent_fpu_end(fpu_enabled);
- return err;
+ return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
}
-static inline void u128_to_be128(be128 *dst, const u128 *src)
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- dst->a = cpu_to_be64(src->a);
- dst->b = cpu_to_be64(src->b);
+ return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
}
-static inline void be128_to_u128(u128 *dst, const be128 *src)
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- dst->a = be64_to_cpu(src->a);
- dst->b = be64_to_cpu(src->b);
+ return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
+ dst, src, nbytes);
}
-static inline void u128_inc(u128 *i)
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- i->b++;
- if (!i->b)
- i->a++;
+ return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
+ nbytes);
}
-static void ctr_crypt_final(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- u8 *ctrblk = walk->iv;
- u8 keystream[SERPENT_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
-
- __serpent_encrypt(ctx, keystream, ctrblk);
- crypto_xor(keystream, src, nbytes);
- memcpy(dst, keystream, nbytes);
-
- crypto_inc(ctrblk, SERPENT_BLOCK_SIZE);
+ return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
}
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
- struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- const unsigned int bsize = SERPENT_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 ctrblk;
- be128 ctrblocks[SERPENT_PARALLEL_BLOCKS];
- int i;
-
- be128_to_u128(&ctrblk, (be128 *)walk->iv);
-
- /* Process multi-block batch */
- if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
- do {
- /* create ctrblks for parallel encrypt */
- for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
- if (dst != src)
- dst[i] = src[i];
-
- u128_to_be128(&ctrblocks[i], &ctrblk);
- u128_inc(&ctrblk);
- }
-
- serpent_enc_blk_xway_xor(ctx, (u8 *)dst,
- (u8 *)ctrblocks);
-
- src += SERPENT_PARALLEL_BLOCKS;
- dst += SERPENT_PARALLEL_BLOCKS;
- nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
- } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- if (dst != src)
- *dst = *src;
-
- u128_to_be128(&ctrblocks[0], &ctrblk);
- u128_inc(&ctrblk);
-
- __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
- u128_xor(dst, dst, (u128 *)ctrblocks);
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- u128_to_be128((be128 *)walk->iv, &ctrblk);
- return nbytes;
+ return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
+ NULL, fpu_enabled, nbytes);
}
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static inline void serpent_fpu_end(bool fpu_enabled)
{
- bool fpu_enabled = false;
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) {
- fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
- nbytes = __ctr_crypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
-
- serpent_fpu_end(fpu_enabled);
-
- if (walk.nbytes) {
- ctr_crypt_final(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
- }
-
- return err;
+ glue_fpu_end(fpu_enabled);
}
struct crypt_priv {
@@ -596,106 +383,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ret;
}
-static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
- unsigned int key_len)
-{
- struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
- struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
- int err;
-
- crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
- crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
- & CRYPTO_TFM_REQ_MASK);
- err = crypto_ablkcipher_setkey(child, key, key_len);
- crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
- & CRYPTO_TFM_RES_MASK);
- return err;
-}
-
-static int __ablk_encrypt(struct ablkcipher_request *req)
-{
- struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
- struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
- struct blkcipher_desc desc;
-
- desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
- desc.info = req->info;
- desc.flags = 0;
-
- return crypto_blkcipher_crt(desc.tfm)->encrypt(
- &desc, req->dst, req->src, req->nbytes);
-}
-
-static int ablk_encrypt(struct ablkcipher_request *req)
-{
- struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
- struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
- if (!irq_fpu_usable()) {
- struct ablkcipher_request *cryptd_req =
- ablkcipher_request_ctx(req);
-
- memcpy(cryptd_req, req, sizeof(*req));
- ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
- return crypto_ablkcipher_encrypt(cryptd_req);
- } else {
- return __ablk_encrypt(req);
- }
-}
-
-static int ablk_decrypt(struct ablkcipher_request *req)
-{
- struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
- struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-
- if (!irq_fpu_usable()) {
- struct ablkcipher_request *cryptd_req =
- ablkcipher_request_ctx(req);
-
- memcpy(cryptd_req, req, sizeof(*req));
- ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
-
- return crypto_ablkcipher_decrypt(cryptd_req);
- } else {
- struct blkcipher_desc desc;
-
- desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
- desc.info = req->info;
- desc.flags = 0;
-
- return crypto_blkcipher_crt(desc.tfm)->decrypt(
- &desc, req->dst, req->src, req->nbytes);
- }
-}
-
-static void ablk_exit(struct crypto_tfm *tfm)
-{
- struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
-
- cryptd_free_ablkcipher(ctx->cryptd_tfm);
-}
-
-static int ablk_init(struct crypto_tfm *tfm)
-{
- struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
- struct cryptd_ablkcipher *cryptd_tfm;
- char drv_name[CRYPTO_MAX_ALG_NAME];
-
- snprintf(drv_name, sizeof(drv_name), "__driver-%s",
- crypto_tfm_alg_driver_name(tfm));
-
- cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
-
- ctx->cryptd_tfm = cryptd_tfm;
- tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
- crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-
- return 0;
-}
-
static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ecb-serpent-sse2",
.cra_driver_name = "__driver-ecb-serpent-sse2",
@@ -808,7 +495,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_serpent_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@@ -830,7 +517,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_serpent_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@@ -853,7 +540,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_serpent_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@@ -877,7 +564,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_serpent_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
@@ -902,7 +589,7 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_serpent_ctx),
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index b2c2f57d70e..49d6987a73d 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -468,7 +468,7 @@ W_PRECALC_SSSE3
*/
SHA1_VECTOR_ASM sha1_transform_ssse3
-#ifdef SHA1_ENABLE_AVX_SUPPORT
+#ifdef CONFIG_AS_AVX
.macro W_PRECALC_AVX
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index f916499d0ab..4a11a9d7245 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -35,7 +35,7 @@
asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
unsigned int rounds);
-#ifdef SHA1_ENABLE_AVX_SUPPORT
+#ifdef CONFIG_AS_AVX
asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
unsigned int rounds);
#endif
@@ -184,7 +184,7 @@ static struct shash_alg alg = {
}
};
-#ifdef SHA1_ENABLE_AVX_SUPPORT
+#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
{
u64 xcr0;
@@ -209,7 +209,7 @@ static int __init sha1_ssse3_mod_init(void)
if (cpu_has_ssse3)
sha1_transform_asm = sha1_transform_ssse3;
-#ifdef SHA1_ENABLE_AVX_SUPPORT
+#ifdef CONFIG_AS_AVX
/* allow AVX to override SSSE3, it's a little faster */
if (avx_usable())
sha1_transform_asm = sha1_transform_avx;
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
new file mode 100644
index 00000000000..35f45574390
--- /dev/null
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -0,0 +1,300 @@
+/*
+ * Twofish Cipher 8-way parallel algorithm (AVX/x86_64)
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+.file "twofish-avx-x86_64-asm_64.S"
+.text
+
+/* structure of crypto context */
+#define s0 0
+#define s1 1024
+#define s2 2048
+#define s3 3072
+#define w 4096
+#define k 4128
+
+/**********************************************************************
+ 8-way AVX twofish
+ **********************************************************************/
+#define CTX %rdi
+
+#define RA1 %xmm0
+#define RB1 %xmm1
+#define RC1 %xmm2
+#define RD1 %xmm3
+
+#define RA2 %xmm4
+#define RB2 %xmm5
+#define RC2 %xmm6
+#define RD2 %xmm7
+
+#define RX %xmm8
+#define RY %xmm9
+
+#define RK1 %xmm10
+#define RK2 %xmm11
+
+#define RID1 %rax
+#define RID1b %al
+#define RID2 %rbx
+#define RID2b %bl
+
+#define RGI1 %rdx
+#define RGI1bl %dl
+#define RGI1bh %dh
+#define RGI2 %rcx
+#define RGI2bl %cl
+#define RGI2bh %ch
+
+#define RGS1 %r8
+#define RGS1d %r8d
+#define RGS2 %r9
+#define RGS2d %r9d
+#define RGS3 %r10
+#define RGS3d %r10d
+
+
+#define lookup_32bit(t0, t1, t2, t3, src, dst) \
+ movb src ## bl, RID1b; \
+ movb src ## bh, RID2b; \
+ movl t0(CTX, RID1, 4), dst ## d; \
+ xorl t1(CTX, RID2, 4), dst ## d; \
+ shrq $16, src; \
+ movb src ## bl, RID1b; \
+ movb src ## bh, RID2b; \
+ xorl t2(CTX, RID1, 4), dst ## d; \
+ xorl t3(CTX, RID2, 4), dst ## d;
+
+#define G(a, x, t0, t1, t2, t3) \
+ vmovq a, RGI1; \
+ vpsrldq $8, a, x; \
+ vmovq x, RGI2; \
+ \
+ lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \
+ shrq $16, RGI1; \
+ lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \
+ shlq $32, RGS2; \
+ orq RGS1, RGS2; \
+ \
+ lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \
+ shrq $16, RGI2; \
+ lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \
+ shlq $32, RGS3; \
+ orq RGS1, RGS3; \
+ \
+ vmovq RGS2, x; \
+ vpinsrq $1, RGS3, x, x;
+
+#define encround(a, b, c, d, x, y) \
+ G(a, x, s0, s1, s2, s3); \
+ G(b, y, s1, s2, s3, s0); \
+ vpaddd x, y, x; \
+ vpaddd y, x, y; \
+ vpaddd x, RK1, x; \
+ vpaddd y, RK2, y; \
+ vpxor x, c, c; \
+ vpsrld $1, c, x; \
+ vpslld $(32 - 1), c, c; \
+ vpor c, x, c; \
+ vpslld $1, d, x; \
+ vpsrld $(32 - 1), d, d; \
+ vpor d, x, d; \
+ vpxor d, y, d;
+
+#define decround(a, b, c, d, x, y) \
+ G(a, x, s0, s1, s2, s3); \
+ G(b, y, s1, s2, s3, s0); \
+ vpaddd x, y, x; \
+ vpaddd y, x, y; \
+ vpaddd y, RK2, y; \
+ vpxor d, y, d; \
+ vpsrld $1, d, y; \
+ vpslld $(32 - 1), d, d; \
+ vpor d, y, d; \
+ vpslld $1, c, y; \
+ vpsrld $(32 - 1), c, c; \
+ vpor c, y, c; \
+ vpaddd x, RK1, x; \
+ vpxor x, c, c;
+
+#define encrypt_round(n, a, b, c, d) \
+ vbroadcastss (k+4*(2*(n)))(CTX), RK1; \
+ vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \
+ encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
+ encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
+
+#define decrypt_round(n, a, b, c, d) \
+ vbroadcastss (k+4*(2*(n)))(CTX), RK1; \
+ vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \
+ decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
+ decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
+
+#define encrypt_cycle(n) \
+ encrypt_round((2*n), RA, RB, RC, RD); \
+ encrypt_round(((2*n) + 1), RC, RD, RA, RB);
+
+#define decrypt_cycle(n) \
+ decrypt_round(((2*n) + 1), RC, RD, RA, RB); \
+ decrypt_round((2*n), RA, RB, RC, RD);
+
+
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+ vpunpckldq x1, x0, t0; \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x3; \
+ \
+ vpunpcklqdq t1, t0, x0; \
+ vpunpckhqdq t1, t0, x1; \
+ vpunpcklqdq x3, t2, x2; \
+ vpunpckhqdq x3, t2, x3;
+
+#define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \
+ vpxor (0*4*4)(in), wkey, x0; \
+ vpxor (1*4*4)(in), wkey, x1; \
+ vpxor (2*4*4)(in), wkey, x2; \
+ vpxor (3*4*4)(in), wkey, x3; \
+ \
+ transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
+
+#define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
+ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+ \
+ vpxor x0, wkey, x0; \
+ vmovdqu x0, (0*4*4)(out); \
+ vpxor x1, wkey, x1; \
+ vmovdqu x1, (1*4*4)(out); \
+ vpxor x2, wkey, x2; \
+ vmovdqu x2, (2*4*4)(out); \
+ vpxor x3, wkey, x3; \
+ vmovdqu x3, (3*4*4)(out);
+
+#define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
+ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+ \
+ vpxor x0, wkey, x0; \
+ vpxor (0*4*4)(out), x0, x0; \
+ vmovdqu x0, (0*4*4)(out); \
+ vpxor x1, wkey, x1; \
+ vpxor (1*4*4)(out), x1, x1; \
+ vmovdqu x1, (1*4*4)(out); \
+ vpxor x2, wkey, x2; \
+ vpxor (2*4*4)(out), x2, x2; \
+ vmovdqu x2, (2*4*4)(out); \
+ vpxor x3, wkey, x3; \
+ vpxor (3*4*4)(out), x3, x3; \
+ vmovdqu x3, (3*4*4)(out);
+
+.align 8
+.global __twofish_enc_blk_8way
+.type __twofish_enc_blk_8way,@function;
+
+__twofish_enc_blk_8way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: bool, if true: xor output
+ */
+
+ pushq %rbx;
+ pushq %rcx;
+
+ vmovdqu w(CTX), RK1;
+
+ leaq (4*4*4)(%rdx), %rax;
+ inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
+ inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
+
+ xorq RID1, RID1;
+ xorq RID2, RID2;
+
+ encrypt_cycle(0);
+ encrypt_cycle(1);
+ encrypt_cycle(2);
+ encrypt_cycle(3);
+ encrypt_cycle(4);
+ encrypt_cycle(5);
+ encrypt_cycle(6);
+ encrypt_cycle(7);
+
+ vmovdqu (w+4*4)(CTX), RK1;
+
+ popq %rcx;
+ popq %rbx;
+
+ leaq (4*4*4)(%rsi), %rax;
+
+ testb %cl, %cl;
+ jnz __enc_xor8;
+
+ outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
+ outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+
+ ret;
+
+__enc_xor8:
+ outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
+ outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+
+ ret;
+
+.align 8
+.global twofish_dec_blk_8way
+.type twofish_dec_blk_8way,@function;
+
+twofish_dec_blk_8way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ pushq %rbx;
+
+ vmovdqu (w+4*4)(CTX), RK1;
+
+ leaq (4*4*4)(%rdx), %rax;
+ inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
+ inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+
+ xorq RID1, RID1;
+ xorq RID2, RID2;
+
+ decrypt_cycle(7);
+ decrypt_cycle(6);
+ decrypt_cycle(5);
+ decrypt_cycle(4);
+ decrypt_cycle(3);
+ decrypt_cycle(2);
+ decrypt_cycle(1);
+ decrypt_cycle(0);
+
+ vmovdqu (w)(CTX), RK1;
+
+ popq %rbx;
+
+ leaq (4*4*4)(%rsi), %rax;
+ outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
+ outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
+
+ ret;
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
new file mode 100644
index 00000000000..782b67ddaf6
--- /dev/null
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -0,0 +1,624 @@
+/*
+ * Glue Code for AVX assembler version of Twofish Cipher
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/twofish.h>
+#include <crypto/cryptd.h>
+#include <crypto/b128ops.h>
+#include <crypto/ctr.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+#include <asm/i387.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/twofish.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+#include <crypto/scatterwalk.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+#define TWOFISH_PARALLEL_BLOCKS 8
+
+static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __twofish_enc_blk_3way(ctx, dst, src, false);
+}
+
+/* 8-way parallel cipher functions */
+asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __twofish_enc_blk_8way(ctx, dst, src, false);
+}
+
+static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __twofish_enc_blk_8way(ctx, dst, src, true);
+}
+
+static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ twofish_dec_blk_8way(ctx, dst, src);
+}
+
+static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src)
+{
+ u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1];
+ unsigned int j;
+
+ for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
+ ivs[j] = src[j];
+
+ twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+ for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
+ u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+}
+
+static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+ u128 *iv)
+{
+ be128 ctrblks[TWOFISH_PARALLEL_BLOCKS];
+ unsigned int i;
+
+ for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) {
+ if (dst != src)
+ dst[i] = src[i];
+
+ u128_to_be128(&ctrblks[i], iv);
+ u128_inc(iv);
+ }
+
+ twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
+}
+
+static const struct common_glue_ctx twofish_enc = {
+ .num_funcs = 3,
+ .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) }
+ }, {
+ .num_blocks = 3,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
+ } }
+};
+
+static const struct common_glue_ctx twofish_ctr = {
+ .num_funcs = 3,
+ .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) }
+ }, {
+ .num_blocks = 3,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
+ } }
+};
+
+static const struct common_glue_ctx twofish_dec = {
+ .num_funcs = 3,
+ .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) }
+ }, {
+ .num_blocks = 3,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
+ } }
+};
+
+static const struct common_glue_ctx twofish_dec_cbc = {
+ .num_funcs = 3,
+ .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) }
+ }, {
+ .num_blocks = 3,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
+ } }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
+ dst, src, nbytes);
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
+ nbytes);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
+}
+
+static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+ return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL,
+ fpu_enabled, nbytes);
+}
+
+static inline void twofish_fpu_end(bool fpu_enabled)
+{
+ glue_fpu_end(fpu_enabled);
+}
+
+struct crypt_priv {
+ struct twofish_ctx *ctx;
+ bool fpu_enabled;
+};
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+ const unsigned int bsize = TF_BLOCK_SIZE;
+ struct crypt_priv *ctx = priv;
+ int i;
+
+ ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
+
+ if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
+ twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+ return;
+ }
+
+ for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
+ twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
+
+ nbytes %= bsize * 3;
+
+ for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+ twofish_enc_blk(ctx->ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+ const unsigned int bsize = TF_BLOCK_SIZE;
+ struct crypt_priv *ctx = priv;
+ int i;
+
+ ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
+
+ if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
+ twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+ return;
+ }
+
+ for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
+ twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
+
+ nbytes %= bsize * 3;
+
+ for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+ twofish_dec_blk(ctx->ctx, srcdst, srcdst);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[TWOFISH_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->twofish_ctx,
+ .fpu_enabled = false,
+ };
+ struct lrw_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .table_ctx = &ctx->lrw_table,
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = encrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = lrw_crypt(desc, dst, src, nbytes, &req);
+ twofish_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[TWOFISH_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->twofish_ctx,
+ .fpu_enabled = false,
+ };
+ struct lrw_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .table_ctx = &ctx->lrw_table,
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = decrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = lrw_crypt(desc, dst, src, nbytes, &req);
+ twofish_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[TWOFISH_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->crypt_ctx,
+ .fpu_enabled = false,
+ };
+ struct xts_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .tweak_ctx = &ctx->tweak_ctx,
+ .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = encrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = xts_crypt(desc, dst, src, nbytes, &req);
+ twofish_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[TWOFISH_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->crypt_ctx,
+ .fpu_enabled = false,
+ };
+ struct xts_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .tweak_ctx = &ctx->tweak_ctx,
+ .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = decrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = xts_crypt(desc, dst, src, nbytes, &req);
+ twofish_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static struct crypto_alg twofish_algs[10] = { {
+ .cra_name = "__ecb-twofish-avx",
+ .cra_driver_name = "__driver-ecb-twofish-avx",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct twofish_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .setkey = twofish_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__cbc-twofish-avx",
+ .cra_driver_name = "__driver-cbc-twofish-avx",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct twofish_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .setkey = twofish_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__ctr-twofish-avx",
+ .cra_driver_name = "__driver-ctr-twofish-avx",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct twofish_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = twofish_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+}, {
+ .cra_name = "__lrw-twofish-avx",
+ .cra_driver_name = "__driver-lrw-twofish-avx",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list),
+ .cra_exit = lrw_twofish_exit_tfm,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE +
+ TF_BLOCK_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE +
+ TF_BLOCK_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = lrw_twofish_setkey,
+ .encrypt = lrw_encrypt,
+ .decrypt = lrw_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__xts-twofish-avx",
+ .cra_driver_name = "__driver-xts-twofish-avx",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct twofish_xts_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE * 2,
+ .max_keysize = TF_MAX_KEY_SIZE * 2,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = xts_twofish_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ecb(twofish)",
+ .cra_driver_name = "ecb-twofish-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list),
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(twofish)",
+ .cra_driver_name = "cbc-twofish-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list),
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = __ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ctr(twofish)",
+ .cra_driver_name = "ctr-twofish-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list),
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_encrypt,
+ .geniv = "chainiv",
+ },
+ },
+}, {
+ .cra_name = "lrw(twofish)",
+ .cra_driver_name = "lrw-twofish-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list),
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE +
+ TF_BLOCK_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE +
+ TF_BLOCK_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+}, {
+ .cra_name = "xts(twofish)",
+ .cra_driver_name = "xts-twofish-avx",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list),
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE * 2,
+ .max_keysize = TF_MAX_KEY_SIZE * 2,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+} };
+
+static int __init twofish_init(void)
+{
+ u64 xcr0;
+
+ if (!cpu_has_avx || !cpu_has_osxsave) {
+ printk(KERN_INFO "AVX instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+ printk(KERN_INFO "AVX detected but unusable.\n");
+ return -ENODEV;
+ }
+
+ return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+}
+
+static void __exit twofish_exit(void)
+{
+ crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+}
+
+module_init(twofish_init);
+module_exit(twofish_exit);
+
+MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("twofish");
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 922ab24cce3..15f9347316c 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -3,11 +3,6 @@
*
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -33,20 +28,13 @@
#include <crypto/algapi.h>
#include <crypto/twofish.h>
#include <crypto/b128ops.h>
+#include <asm/crypto/twofish.h>
+#include <asm/crypto/glue_helper.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
-/* regular block cipher functions from twofish_x86_64 module */
-asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-
-/* 3-way parallel cipher functions */
-asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
+EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
+EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
@@ -60,311 +48,139 @@ static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
__twofish_enc_blk_3way(ctx, dst, src, true);
}
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
- void (*fn)(struct twofish_ctx *, u8 *, const u8 *),
- void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *))
-{
- struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int bsize = TF_BLOCK_SIZE;
- unsigned int nbytes;
- int err;
-
- err = blkcipher_walk_virt(desc, walk);
-
- while ((nbytes = walk->nbytes)) {
- u8 *wsrc = walk->src.virt.addr;
- u8 *wdst = walk->dst.virt.addr;
-
- /* Process three block batch */
- if (nbytes >= bsize * 3) {
- do {
- fn_3way(ctx, wdst, wsrc);
-
- wsrc += bsize * 3;
- wdst += bsize * 3;
- nbytes -= bsize * 3;
- } while (nbytes >= bsize * 3);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- fn(ctx, wdst, wsrc);
-
- wsrc += bsize;
- wdst += bsize;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- err = blkcipher_walk_done(desc, walk, nbytes);
- }
-
- return err;
-}
-
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
{
- struct blkcipher_walk walk;
+ u128 ivs[2];
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way);
-}
+ ivs[0] = src[0];
+ ivs[1] = src[1];
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
+ twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
+ u128_xor(&dst[1], &dst[1], &ivs[0]);
+ u128_xor(&dst[2], &dst[2], &ivs[1]);
}
+EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int bsize = TF_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 *iv = (u128 *)walk->iv;
-
- do {
- u128_xor(dst, src, iv);
- twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
- iv = dst;
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
- u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
- return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
{
- struct blkcipher_walk walk;
- int err;
+ be128 ctrblk;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ if (dst != src)
+ *dst = *src;
- while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_encrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
+ u128_to_be128(&ctrblk, iv);
+ u128_inc(iv);
- return err;
+ twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+ u128_xor(dst, dst, (u128 *)&ctrblk);
}
+EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
+ u128 *iv)
{
- struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int bsize = TF_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 ivs[3 - 1];
- u128 last_iv;
-
- /* Start of the last block. */
- src += nbytes / bsize - 1;
- dst += nbytes / bsize - 1;
-
- last_iv = *src;
-
- /* Process three block batch */
- if (nbytes >= bsize * 3) {
- do {
- nbytes -= bsize * (3 - 1);
- src -= 3 - 1;
- dst -= 3 - 1;
-
- ivs[0] = src[0];
- ivs[1] = src[1];
-
- twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
-
- u128_xor(dst + 1, dst + 1, ivs + 0);
- u128_xor(dst + 2, dst + 2, ivs + 1);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- goto done;
-
- u128_xor(dst, dst, src - 1);
- src -= 1;
- dst -= 1;
- } while (nbytes >= bsize * 3);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- for (;;) {
- twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- break;
+ be128 ctrblks[3];
- u128_xor(dst, dst, src - 1);
- src -= 1;
- dst -= 1;
+ if (dst != src) {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
}
-done:
- u128_xor(dst, dst, (u128 *)walk->iv);
- *(u128 *)walk->iv = last_iv;
+ u128_to_be128(&ctrblks[0], iv);
+ u128_inc(iv);
+ u128_to_be128(&ctrblks[1], iv);
+ u128_inc(iv);
+ u128_to_be128(&ctrblks[2], iv);
+ u128_inc(iv);
- return nbytes;
+ twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
}
+EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
+
+static const struct common_glue_ctx twofish_enc = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = -1,
+
+ .funcs = { {
+ .num_blocks = 3,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
+ } }
+};
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+static const struct common_glue_ctx twofish_ctr = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = -1,
+
+ .funcs = { {
+ .num_blocks = 3,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) }
+ } }
+};
- while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_decrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
+static const struct common_glue_ctx twofish_dec = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = -1,
+
+ .funcs = { {
+ .num_blocks = 3,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
+ } }
+};
- return err;
-}
+static const struct common_glue_ctx twofish_dec_cbc = {
+ .num_funcs = 2,
+ .fpu_blocks_limit = -1,
+
+ .funcs = { {
+ .num_blocks = 3,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
+ } }
+};
-static inline void u128_to_be128(be128 *dst, const u128 *src)
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- dst->a = cpu_to_be64(src->a);
- dst->b = cpu_to_be64(src->b);
+ return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
}
-static inline void be128_to_u128(u128 *dst, const be128 *src)
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- dst->a = be64_to_cpu(src->a);
- dst->b = be64_to_cpu(src->b);
+ return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
}
-static inline void u128_inc(u128 *i)
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- i->b++;
- if (!i->b)
- i->a++;
+ return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
+ dst, src, nbytes);
}
-static void ctr_crypt_final(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
{
- struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- u8 *ctrblk = walk->iv;
- u8 keystream[TF_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
-
- twofish_enc_blk(ctx, keystream, ctrblk);
- crypto_xor(keystream, src, nbytes);
- memcpy(dst, keystream, nbytes);
-
- crypto_inc(ctrblk, TF_BLOCK_SIZE);
-}
-
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int bsize = TF_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 ctrblk;
- be128 ctrblocks[3];
-
- be128_to_u128(&ctrblk, (be128 *)walk->iv);
-
- /* Process three block batch */
- if (nbytes >= bsize * 3) {
- do {
- if (dst != src) {
- dst[0] = src[0];
- dst[1] = src[1];
- dst[2] = src[2];
- }
-
- /* create ctrblks for parallel encrypt */
- u128_to_be128(&ctrblocks[0], &ctrblk);
- u128_inc(&ctrblk);
- u128_to_be128(&ctrblocks[1], &ctrblk);
- u128_inc(&ctrblk);
- u128_to_be128(&ctrblocks[2], &ctrblk);
- u128_inc(&ctrblk);
-
- twofish_enc_blk_xor_3way(ctx, (u8 *)dst,
- (u8 *)ctrblocks);
-
- src += 3;
- dst += 3;
- nbytes -= bsize * 3;
- } while (nbytes >= bsize * 3);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- if (dst != src)
- *dst = *src;
-
- u128_to_be128(&ctrblocks[0], &ctrblk);
- u128_inc(&ctrblk);
-
- twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
- u128_xor(dst, dst, (u128 *)ctrblocks);
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- u128_to_be128((be128 *)walk->iv, &ctrblk);
- return nbytes;
+ return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
+ nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE);
-
- while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) {
- nbytes = __ctr_crypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
-
- if (walk.nbytes) {
- ctr_crypt_final(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
- }
-
- return err;
+ return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
}
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
@@ -397,13 +213,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
twofish_dec_blk(ctx, srcdst, srcdst);
}
-struct twofish_lrw_ctx {
- struct lrw_table_ctx lrw_table;
- struct twofish_ctx twofish_ctx;
-};
-
-static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
{
struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
int err;
@@ -415,6 +226,7 @@ static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
}
+EXPORT_SYMBOL_GPL(lrw_twofish_setkey);
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
@@ -450,20 +262,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return lrw_crypt(desc, dst, src, nbytes, &req);
}
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
+void lrw_twofish_exit_tfm(struct crypto_tfm *tfm)
{
struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
lrw_free_table(&ctx->lrw_table);
}
+EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm);
-struct twofish_xts_ctx {
- struct twofish_ctx tweak_ctx;
- struct twofish_ctx crypt_ctx;
-};
-
-static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
{
struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
@@ -486,6 +294,7 @@ static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
flags);
}
+EXPORT_SYMBOL_GPL(xts_twofish_setkey);
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
@@ -596,7 +405,7 @@ static struct crypto_alg tf_algs[5] = { {
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list),
- .cra_exit = lrw_exit_tfm,
+ .cra_exit = lrw_twofish_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index daeca56211e..673ac9b63d6 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -38,7 +38,7 @@
int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
{
int err = 0;
- bool ia32 = is_ia32_task();
+ bool ia32 = test_thread_flag(TIF_IA32);
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
return -EFAULT;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 49331bedc15..70780689599 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -75,23 +75,54 @@ static inline int alternatives_text_reserved(void *start, void *end)
}
#endif /* CONFIG_SMP */
+#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n"
+
+#define b_replacement(number) "663"#number
+#define e_replacement(number) "664"#number
+
+#define alt_slen "662b-661b"
+#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+
+#define ALTINSTR_ENTRY(feature, number) \
+ " .long 661b - .\n" /* label */ \
+ " .long " b_replacement(number)"f - .\n" /* new instruction */ \
+ " .word " __stringify(feature) "\n" /* feature bit */ \
+ " .byte " alt_slen "\n" /* source len */ \
+ " .byte " alt_rlen(number) "\n" /* replacement len */
+
+#define DISCARD_ENTRY(number) /* rlen <= slen */ \
+ " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
+
+#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \
+ b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
+
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, feature) \
- \
- "661:\n\t" oldinstr "\n662:\n" \
- ".section .altinstructions,\"a\"\n" \
- " .long 661b - .\n" /* label */ \
- " .long 663f - .\n" /* new instruction */ \
- " .word " __stringify(feature) "\n" /* feature bit */ \
- " .byte 662b-661b\n" /* sourcelen */ \
- " .byte 664f-663f\n" /* replacementlen */ \
- ".previous\n" \
- ".section .discard,\"aw\",@progbits\n" \
- " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \
- ".previous\n" \
- ".section .altinstr_replacement, \"ax\"\n" \
- "663:\n\t" newinstr "\n664:\n" /* replacement */ \
- ".previous"
+ OLDINSTR(oldinstr) \
+ ".section .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(feature, 1) \
+ ".previous\n" \
+ ".section .discard,\"aw\",@progbits\n" \
+ DISCARD_ENTRY(1) \
+ ".previous\n" \
+ ".section .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
+ ".previous"
+
+#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
+ OLDINSTR(oldinstr) \
+ ".section .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(feature1, 1) \
+ ALTINSTR_ENTRY(feature2, 2) \
+ ".previous\n" \
+ ".section .discard,\"aw\",@progbits\n" \
+ DISCARD_ENTRY(1) \
+ DISCARD_ENTRY(2) \
+ ".previous\n" \
+ ".section .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
+ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
+ ".previous"
/*
* This must be included *after* the definition of ALTERNATIVE due to
@@ -140,6 +171,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
: output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
/*
+ * Like alternative_call, but there are two features and respective functions.
+ * If CPU has feature2, function2 is used.
+ * Otherwise, if CPU has feature1, function1 is used.
+ * Otherwise, old function is used.
+ */
+#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
+ output, input...) \
+ asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
+ "call %P[new2]", feature2) \
+ : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
+ [new2] "i" (newfunc2), ## input)
+
+/*
* use this macro(s) if you need more than one output parameter
* in alternative_io
*/
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 49ad773f4b9..b3341e9cd8f 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -26,10 +26,31 @@ struct amd_l3_cache {
u8 subcaches[4];
};
+struct threshold_block {
+ unsigned int block;
+ unsigned int bank;
+ unsigned int cpu;
+ u32 address;
+ u16 interrupt_enable;
+ bool interrupt_capable;
+ u16 threshold_limit;
+ struct kobject kobj;
+ struct list_head miscj;
+};
+
+struct threshold_bank {
+ struct kobject *kobj;
+ struct threshold_block *blocks;
+
+ /* initialized to the number of CPUs on the node sharing this bank */
+ atomic_t cpus;
+};
+
struct amd_northbridge {
struct pci_dev *misc;
struct pci_dev *link;
struct amd_l3_cache l3_cache;
+ struct threshold_bank *bank4;
};
struct amd_northbridge_info {
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index eaff4790ed9..f34261296ff 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -306,7 +306,8 @@ struct apic {
unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
unsigned long (*check_apicid_present)(int apicid);
- void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
+ void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
+ const struct cpumask *mask);
void (*init_apic_ldr)(void);
void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
@@ -331,9 +332,9 @@ struct apic {
unsigned long (*set_apic_id)(unsigned int id);
unsigned long apic_id_mask;
- unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
- unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
- const struct cpumask *andmask);
+ int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+ const struct cpumask *andmask,
+ unsigned int *apicid);
/* ipi */
void (*send_IPI_mask)(const struct cpumask *mask, int vector);
@@ -464,6 +465,8 @@ static inline u32 safe_apic_wait_icr_idle(void)
return apic->safe_wait_icr_idle();
}
+extern void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v));
+
#else /* CONFIG_X86_LOCAL_APIC */
static inline u32 apic_read(u32 reg) { return 0; }
@@ -473,6 +476,7 @@ static inline u64 apic_icr_read(void) { return 0; }
static inline void apic_icr_write(u32 low, u32 high) { }
static inline void apic_wait_icr_idle(void) { }
static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
+static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {}
#endif /* CONFIG_X86_LOCAL_APIC */
@@ -537,7 +541,12 @@ static inline const struct cpumask *default_target_cpus(void)
#endif
}
-DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+static inline const struct cpumask *online_target_cpus(void)
+{
+ return cpu_online_mask;
+}
+
+DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
static inline unsigned int read_apic_id(void)
@@ -586,21 +595,50 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
#endif
-static inline unsigned int
-default_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static inline int
+flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask,
+ unsigned int *apicid)
{
- return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+ unsigned long cpu_mask = cpumask_bits(cpumask)[0] &
+ cpumask_bits(andmask)[0] &
+ cpumask_bits(cpu_online_mask)[0] &
+ APIC_ALL_CPUS;
+
+ if (likely(cpu_mask)) {
+ *apicid = (unsigned int)cpu_mask;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
}
-static inline unsigned int
+extern int
default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
+ const struct cpumask *andmask,
+ unsigned int *apicid);
+
+static inline void
+flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
+ const struct cpumask *mask)
{
- unsigned long mask1 = cpumask_bits(cpumask)[0];
- unsigned long mask2 = cpumask_bits(andmask)[0];
- unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
+ /* Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+ * priority interrupt delivery mode.
+ *
+ * In particular there was a hyperthreading cpu observed to
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+}
- return (unsigned int)(mask1 & mask2 & mask3);
+static inline void
+default_vector_allocation_domain(int cpu, struct cpumask *retmask,
+ const struct cpumask *mask)
+{
+ cpumask_copy(retmask, cpumask_of(cpu));
}
static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index a6983b27722..72f5009deb5 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -264,6 +264,13 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
* This operation is non-atomic and can be reordered.
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
+ *
+ * Note: the operation is performed atomically with respect to
+ * the local CPU, but not other CPUs. Portable code should not
+ * rely on this behaviour.
+ * KVM relies on this behaviour on x86 for modifying memory that is also
+ * accessed from a hypervisor on the same CPU if running in a VM: don't change
+ * this without also updating arch/x86/kernel/kvm.c
*/
static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
{
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index eb45aa6b1f2..2ad874cb661 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -66,6 +66,7 @@ struct setup_header {
__u64 setup_data;
__u64 pref_address;
__u32 init_size;
+ __u32 handover_offset;
} __attribute__((packed));
struct sys_desc_table {
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 340ee49961a..6b7ee5ff682 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -176,7 +176,7 @@
#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */
#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
+#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */
#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
/* Virtualization flags: Linux defined, word 8 */
@@ -207,6 +207,8 @@
#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */
#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */
+#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
diff --git a/arch/x86/include/asm/crypto/ablk_helper.h b/arch/x86/include/asm/crypto/ablk_helper.h
new file mode 100644
index 00000000000..4f93df50c23
--- /dev/null
+++ b/arch/x86/include/asm/crypto/ablk_helper.h
@@ -0,0 +1,31 @@
+/*
+ * Shared async block cipher helpers
+ */
+
+#ifndef _CRYPTO_ABLK_HELPER_H
+#define _CRYPTO_ABLK_HELPER_H
+
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <crypto/cryptd.h>
+
+struct async_helper_ctx {
+ struct cryptd_ablkcipher *cryptd_tfm;
+};
+
+extern int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
+ unsigned int key_len);
+
+extern int __ablk_encrypt(struct ablkcipher_request *req);
+
+extern int ablk_encrypt(struct ablkcipher_request *req);
+
+extern int ablk_decrypt(struct ablkcipher_request *req);
+
+extern void ablk_exit(struct crypto_tfm *tfm);
+
+extern int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name);
+
+extern int ablk_init(struct crypto_tfm *tfm);
+
+#endif /* _CRYPTO_ABLK_HELPER_H */
diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/crypto/aes.h
index 80545a1cbe3..80545a1cbe3 100644
--- a/arch/x86/include/asm/aes.h
+++ b/arch/x86/include/asm/crypto/aes.h
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
new file mode 100644
index 00000000000..3e408bddc96
--- /dev/null
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -0,0 +1,115 @@
+/*
+ * Shared glue code for 128bit block ciphers
+ */
+
+#ifndef _CRYPTO_GLUE_HELPER_H
+#define _CRYPTO_GLUE_HELPER_H
+
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <asm/i387.h>
+#include <crypto/b128ops.h>
+
+typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
+typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
+typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
+ u128 *iv);
+
+#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
+#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
+#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
+
+struct common_glue_func_entry {
+ unsigned int num_blocks; /* number of blocks that @fn will process */
+ union {
+ common_glue_func_t ecb;
+ common_glue_cbc_func_t cbc;
+ common_glue_ctr_func_t ctr;
+ } fn_u;
+};
+
+struct common_glue_ctx {
+ unsigned int num_funcs;
+ int fpu_blocks_limit; /* -1 means fpu not needed at all */
+
+ /*
+ * First funcs entry must have largest num_blocks and last funcs entry
+ * must have num_blocks == 1!
+ */
+ struct common_glue_func_entry funcs[];
+};
+
+static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
+ struct blkcipher_desc *desc,
+ bool fpu_enabled, unsigned int nbytes)
+{
+ if (likely(fpu_blocks_limit < 0))
+ return false;
+
+ if (fpu_enabled)
+ return true;
+
+ /*
+ * Vector-registers are only used when chunk to be processed is large
+ * enough, so do not enable FPU until it is necessary.
+ */
+ if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
+ return false;
+
+ if (desc) {
+ /* prevent sleeping if FPU is in use */
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ }
+
+ kernel_fpu_begin();
+ return true;
+}
+
+static inline void glue_fpu_end(bool fpu_enabled)
+{
+ if (fpu_enabled)
+ kernel_fpu_end();
+}
+
+static inline void u128_to_be128(be128 *dst, const u128 *src)
+{
+ dst->a = cpu_to_be64(src->a);
+ dst->b = cpu_to_be64(src->b);
+}
+
+static inline void be128_to_u128(u128 *dst, const be128 *src)
+{
+ dst->a = be64_to_cpu(src->a);
+ dst->b = be64_to_cpu(src->b);
+}
+
+static inline void u128_inc(u128 *i)
+{
+ i->b++;
+ if (!i->b)
+ i->a++;
+}
+
+extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes);
+
+extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
+ struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src,
+ unsigned int nbytes);
+
+extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src,
+ unsigned int nbytes);
+
+extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
+ struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes);
+
+#endif /* _CRYPTO_GLUE_HELPER_H */
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
new file mode 100644
index 00000000000..432deedd294
--- /dev/null
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -0,0 +1,32 @@
+#ifndef ASM_X86_SERPENT_AVX_H
+#define ASM_X86_SERPENT_AVX_H
+
+#include <linux/crypto.h>
+#include <crypto/serpent.h>
+
+#define SERPENT_PARALLEL_BLOCKS 8
+
+asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __serpent_enc_blk_8way_avx(ctx, dst, src, false);
+}
+
+static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __serpent_enc_blk_8way_avx(ctx, dst, src, true);
+}
+
+static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ serpent_dec_blk_8way_avx(ctx, dst, src);
+}
+
+#endif
diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/crypto/serpent-sse2.h
index d3ef63fe0c8..e6e77dffbda 100644
--- a/arch/x86/include/asm/serpent.h
+++ b/arch/x86/include/asm/crypto/serpent-sse2.h
@@ -1,5 +1,5 @@
-#ifndef ASM_X86_SERPENT_H
-#define ASM_X86_SERPENT_H
+#ifndef ASM_X86_SERPENT_SSE2_H
+#define ASM_X86_SERPENT_SSE2_H
#include <linux/crypto.h>
#include <crypto/serpent.h>
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
new file mode 100644
index 00000000000..9d2c514bd5f
--- /dev/null
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -0,0 +1,46 @@
+#ifndef ASM_X86_TWOFISH_H
+#define ASM_X86_TWOFISH_H
+
+#include <linux/crypto.h>
+#include <crypto/twofish.h>
+#include <crypto/lrw.h>
+#include <crypto/b128ops.h>
+
+struct twofish_lrw_ctx {
+ struct lrw_table_ctx lrw_table;
+ struct twofish_ctx twofish_ctx;
+};
+
+struct twofish_xts_ctx {
+ struct twofish_ctx tweak_ctx;
+ struct twofish_ctx crypt_ctx;
+};
+
+/* regular block cipher functions from twofish_x86_64 module */
+asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+/* 3-way parallel cipher functions */
+asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+/* helpers from twofish_x86_64-3way module */
+extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
+extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
+ u128 *iv);
+extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
+ u128 *iv);
+
+extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen);
+
+extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
+
+extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen);
+
+#endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index cc70c1c78ca..75ce3f47d20 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -4,9 +4,7 @@
enum reboot_type {
BOOT_TRIPLE = 't',
BOOT_KBD = 'k',
-#ifdef CONFIG_X86_32
BOOT_BIOS = 'b',
-#endif
BOOT_ACPI = 'a',
BOOT_EFI = 'e',
BOOT_CF9 = 'p',
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 0baa628e330..40afa0005c6 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -15,15 +15,6 @@ BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
-
-.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
- 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
-.if NUM_INVALIDATE_TLB_VECTORS > \idx
-BUILD_INTERRUPT3(invalidate_interrupt\idx,
- (INVALIDATE_TLB_VECTOR_START)+\idx,
- smp_invalidate_interrupt)
-.endif
-.endr
#endif
BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index dbe82a5c5ea..d3d74698dce 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -99,7 +99,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
virtual_dma_residue += virtual_dma_count;
virtual_dma_count = 0;
#ifdef TRACE_FLPY_INT
- printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n",
+ printk(KERN_DEBUG "count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n",
virtual_dma_count, virtual_dma_residue, calls, bytes,
dma_wait);
calls = 0;
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 7a15153c675..b518c750993 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -49,6 +49,7 @@ extern const struct hypervisor_x86 *x86_hyper;
extern const struct hypervisor_x86 x86_hyper_vmware;
extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_kvm;
static inline bool hypervisor_x2apic_available(void)
{
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index dffc38ee625..345c99cef15 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -5,7 +5,6 @@ extern struct dma_map_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int iommu_pass_through;
-extern int iommu_group_mf;
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 4b4448761e8..1508e518c7e 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -119,17 +119,6 @@
*/
#define LOCAL_TIMER_VECTOR 0xef
-/* up to 32 vectors used for spreading out TLB flushes: */
-#if NR_CPUS <= 32
-# define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS)
-#else
-# define NUM_INVALIDATE_TLB_VECTORS (32)
-#endif
-
-#define INVALIDATE_TLB_VECTOR_END (0xee)
-#define INVALIDATE_TLB_VECTOR_START \
- (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)
-
#define NR_VECTORS 256
#define FPU_IRQ 13
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index e7d1c194d27..246617efd67 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -12,6 +12,7 @@
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_DEVICE_ASSIGNMENT
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 1ac46c22dd5..c764f43b71c 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -192,8 +192,8 @@ struct x86_emulate_ops {
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
- bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
- u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+ void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
+ u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
};
typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -280,9 +280,9 @@ struct x86_emulate_ctxt {
u8 modrm_seg;
bool rip_relative;
unsigned long _eip;
+ struct operand memop;
/* Fields above regs are cleared together. */
unsigned long regs[NR_VCPU_REGS];
- struct operand memop;
struct operand *memopp;
struct fetch_cache fetch;
struct read_cache io_read;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index db7c1f2709a..09155d64cf7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -48,12 +48,13 @@
#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
+#define CR3_PCID_ENABLED_RESERVED_BITS 0xFFFFFF0000000000ULL
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
0xFFFFFF0000000000ULL)
#define CR4_RESERVED_BITS \
(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
- | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
+ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
@@ -175,6 +176,13 @@ enum {
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
+/*
+ * The following bit is set with PV-EOI, unset on EOI.
+ * We detect PV-EOI changes by guest by comparing
+ * this bit with PV-EOI in guest memory.
+ * See the implementation in apic_update_pv_eoi.
+ */
+#define KVM_APIC_PV_EOI_PENDING 1
/*
* We don't want allocation failures within the mmu code, so we preallocate
@@ -313,8 +321,8 @@ struct kvm_pmu {
u64 counter_bitmask[2];
u64 global_ctrl_mask;
u8 version;
- struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC];
- struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED];
+ struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
+ struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
struct irq_work irq_work;
u64 reprogram_pmi;
};
@@ -484,6 +492,11 @@ struct kvm_vcpu_arch {
u64 length;
u64 status;
} osvw;
+
+ struct {
+ u64 msr_val;
+ struct gfn_to_hva_cache data;
+ } pv_eoi;
};
struct kvm_lpage_info {
@@ -661,6 +674,7 @@ struct kvm_x86_ops {
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
int (*get_lpage_level)(void);
bool (*rdtscp_supported)(void);
+ bool (*invpcid_supported)(void);
void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -802,7 +816,20 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
-int kvm_pic_set_irq(void *opaque, int irq, int level);
+static inline int __kvm_irq_line_state(unsigned long *irq_state,
+ int irq_source_id, int level)
+{
+ /* Logical OR for level trig interrupt */
+ if (level)
+ __set_bit(irq_source_id, irq_state);
+ else
+ __clear_bit(irq_source_id, irq_state);
+
+ return !!(*irq_state);
+}
+
+int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
+void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 63ab1661d00..2f7712e08b1 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -22,6 +22,7 @@
#define KVM_FEATURE_CLOCKSOURCE2 3
#define KVM_FEATURE_ASYNC_PF 4
#define KVM_FEATURE_STEAL_TIME 5
+#define KVM_FEATURE_PV_EOI 6
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -37,6 +38,7 @@
#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
+#define MSR_KVM_PV_EOI_EN 0x4b564d04
struct kvm_steal_time {
__u64 steal;
@@ -89,6 +91,11 @@ struct kvm_vcpu_pv_apf_data {
__u32 enabled;
};
+#define KVM_PV_EOI_BIT 0
+#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
+#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
+#define KVM_PV_EOI_DISABLED 0x0
+
#ifdef __KERNEL__
#include <asm/processor.h>
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 441520e4174..a3ac52b29cb 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -33,6 +33,14 @@
#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+#define MCACOD 0xffff /* MCA Error Code */
+
+/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
+#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
+#define MCACOD_SCRUBMSK 0xfff0
+#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
+#define MCACOD_DATA 0x0134 /* Data Load */
+#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
/* MCi_MISC register defines */
#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 084ef95274c..813ed103f45 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -115,8 +115,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
extern unsigned long long native_read_tsc(void);
-extern int native_rdmsr_safe_regs(u32 regs[8]);
-extern int native_wrmsr_safe_regs(u32 regs[8]);
+extern int rdmsr_safe_regs(u32 regs[8]);
+extern int wrmsr_safe_regs(u32 regs[8]);
static __always_inline unsigned long long __native_read_tsc(void)
{
@@ -187,43 +187,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
return err;
}
-static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
-{
- u32 gprs[8] = { 0 };
- int err;
-
- gprs[1] = msr;
- gprs[7] = 0x9c5a203a;
-
- err = native_rdmsr_safe_regs(gprs);
-
- *p = gprs[0] | ((u64)gprs[2] << 32);
-
- return err;
-}
-
-static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
-{
- u32 gprs[8] = { 0 };
-
- gprs[0] = (u32)val;
- gprs[1] = msr;
- gprs[2] = val >> 32;
- gprs[7] = 0x9c5a203a;
-
- return native_wrmsr_safe_regs(gprs);
-}
-
-static inline int rdmsr_safe_regs(u32 regs[8])
-{
- return native_rdmsr_safe_regs(regs);
-}
-
-static inline int wrmsr_safe_regs(u32 regs[8])
-{
- return native_wrmsr_safe_regs(regs);
-}
-
#define rdtscl(low) \
((low) = (u32)__native_read_tsc())
@@ -237,6 +200,8 @@ do { \
(high) = (u32)(_l >> 32); \
} while (0)
+#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
+
#define rdtscp(low, high, aux) \
do { \
unsigned long long _val = native_read_tscp(&(aux)); \
@@ -248,8 +213,7 @@ do { \
#endif /* !CONFIG_PARAVIRT */
-
-#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \
+#define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val), \
(u32)((val) >> 32))
#define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2))
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index dc580c42851..c0fa356e90d 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -44,28 +44,14 @@ struct nmiaction {
const char *name;
};
-#define register_nmi_handler(t, fn, fg, n) \
+#define register_nmi_handler(t, fn, fg, n, init...) \
({ \
- static struct nmiaction fn##_na = { \
+ static struct nmiaction init fn##_na = { \
.handler = (fn), \
.name = (n), \
.flags = (fg), \
}; \
- __register_nmi_handler((t), &fn##_na); \
-})
-
-/*
- * For special handlers that register/unregister in the
- * init section only. This should be considered rare.
- */
-#define register_nmi_handler_initonly(t, fn, fg, n) \
-({ \
- static struct nmiaction fn##_na __initdata = { \
- .handler = (fn), \
- .name = (n), \
- .flags = (fg), \
- }; \
- __register_nmi_handler((t), &fn##_na); \
+ __register_nmi_handler((t), &fn##_na); \
})
int __register_nmi_handler(unsigned int, struct nmiaction *);
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h
index 87bdbca72f9..72f9adf6eca 100644
--- a/arch/x86/include/asm/olpc.h
+++ b/arch/x86/include/asm/olpc.h
@@ -100,25 +100,6 @@ extern void olpc_xo1_pm_wakeup_clear(u16 value);
extern int pci_olpc_init(void);
-/* EC related functions */
-
-extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
- unsigned char *outbuf, size_t outlen);
-
-/* EC commands */
-
-#define EC_FIRMWARE_REV 0x08
-#define EC_WRITE_SCI_MASK 0x1b
-#define EC_WAKE_UP_WLAN 0x24
-#define EC_WLAN_LEAVE_RESET 0x25
-#define EC_READ_EB_MODE 0x2a
-#define EC_SET_SCI_INHIBIT 0x32
-#define EC_SET_SCI_INHIBIT_RELEASE 0x34
-#define EC_WLAN_ENTER_RESET 0x35
-#define EC_WRITE_EXT_SCI_MASK 0x38
-#define EC_SCI_QUERY 0x84
-#define EC_EXT_SCI_QUERY 0x85
-
/* SCI source values */
#define EC_SCI_SRC_EMPTY 0x00
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6cbbabf5270..a0facf3908d 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -128,21 +128,11 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err)
return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
}
-static inline int paravirt_rdmsr_regs(u32 *regs)
-{
- return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs);
-}
-
static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
{
return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
}
-static inline int paravirt_wrmsr_regs(u32 *regs)
-{
- return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs);
-}
-
/* These should all do BUG_ON(_err), but our headers are too tangled. */
#define rdmsr(msr, val1, val2) \
do { \
@@ -176,9 +166,6 @@ do { \
_err; \
})
-#define rdmsr_safe_regs(regs) paravirt_rdmsr_regs(regs)
-#define wrmsr_safe_regs(regs) paravirt_wrmsr_regs(regs)
-
static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
{
int err;
@@ -186,32 +173,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
*p = paravirt_read_msr(msr, &err);
return err;
}
-static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
-{
- u32 gprs[8] = { 0 };
- int err;
-
- gprs[1] = msr;
- gprs[7] = 0x9c5a203a;
-
- err = paravirt_rdmsr_regs(gprs);
-
- *p = gprs[0] | ((u64)gprs[2] << 32);
-
- return err;
-}
-
-static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
-{
- u32 gprs[8] = { 0 };
-
- gprs[0] = (u32)val;
- gprs[1] = msr;
- gprs[2] = val >> 32;
- gprs[7] = 0x9c5a203a;
-
- return paravirt_wrmsr_regs(gprs);
-}
static inline u64 paravirt_read_tsc(void)
{
@@ -252,6 +213,8 @@ do { \
high = _l >> 32; \
} while (0)
+#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
+
static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
{
return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
@@ -397,9 +360,10 @@ static inline void __flush_tlb_single(unsigned long addr)
static inline void flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
- unsigned long va)
+ unsigned long start,
+ unsigned long end)
{
- PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
+ PVOP_VCALL4(pv_mmu_ops.flush_tlb_others, cpumask, mm, start, end);
}
static inline int paravirt_pgd_alloc(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8e8b9a4987e..142236ed83a 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -153,9 +153,7 @@ struct pv_cpu_ops {
/* MSR, PMC and TSR operations.
err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
u64 (*read_msr)(unsigned int msr, int *err);
- int (*rdmsr_regs)(u32 *regs);
int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
- int (*wrmsr_regs)(u32 *regs);
u64 (*read_tsc)(void);
u64 (*read_pmc)(int counter);
@@ -250,7 +248,8 @@ struct pv_mmu_ops {
void (*flush_tlb_single)(unsigned long addr);
void (*flush_tlb_others)(const struct cpumask *cpus,
struct mm_struct *mm,
- unsigned long va);
+ unsigned long start,
+ unsigned long end);
/* Hooks for allocating and freeing a pagetable top-level */
int (*pgd_alloc)(struct mm_struct *mm);
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index b3a53174602..73e8eeff22e 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -7,9 +7,13 @@
#undef DEBUG
#ifdef DEBUG
-#define DBG(x...) printk(x)
+#define DBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
#else
-#define DBG(x...)
+#define DBG(fmt, ...) \
+do { \
+ if (0) \
+ printk(fmt, ##__VA_ARGS__); \
+} while (0)
#endif
#define PCI_PROBE_BIOS 0x0001
@@ -100,6 +104,7 @@ struct pci_raw_ops {
extern const struct pci_raw_ops *raw_pci_ops;
extern const struct pci_raw_ops *raw_pci_ext_ops;
+extern const struct pci_raw_ops pci_mmcfg;
extern const struct pci_raw_ops pci_direct_conf1;
extern bool port_cf9_safe;
@@ -135,6 +140,12 @@ struct pci_mmcfg_region {
extern int __init pci_mmcfg_arch_init(void);
extern void __init pci_mmcfg_arch_free(void);
+extern int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
+extern void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg);
+extern int __devinit pci_mmconfig_insert(struct device *dev,
+ u16 seg, u8 start,
+ u8 end, phys_addr_t addr);
+extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
extern struct list_head pci_mmcfg_list;
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index d9b8e3f7f42..1104afaba52 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -551,6 +551,12 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off);
{ [0 ... NR_CPUS-1] = _initvalue }; \
__typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map
+#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
+ DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue; \
+ __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \
+ { [0 ... NR_CPUS-1] = _initvalue }; \
+ __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map
+
#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)
@@ -559,6 +565,11 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off);
extern __typeof__(_type) *_name##_early_ptr; \
extern __typeof__(_type) _name##_early_map[]
+#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
+ DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \
+ extern __typeof__(_type) *_name##_early_ptr; \
+ extern __typeof__(_type) _name##_early_map[]
+
#define early_per_cpu_ptr(_name) (_name##_early_ptr)
#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
#define early_per_cpu(_name, _cpu) \
@@ -570,12 +581,18 @@ DECLARE_PER_CPU(unsigned long, this_cpu_off);
#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
DEFINE_PER_CPU(_type, _name) = _initvalue
+#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
+ DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue
+
#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)
#define DECLARE_EARLY_PER_CPU(_type, _name) \
DECLARE_PER_CPU(_type, _name)
+#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
+ DECLARE_PER_CPU_READ_MOSTLY(_type, _name)
+
#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
#define early_per_cpu_ptr(_name) NULL
/* no early_per_cpu_map() */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 588f52ea810..cb4e43bce98 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -5,11 +5,10 @@
* Performance event hw details:
*/
-#define X86_PMC_MAX_GENERIC 32
-#define X86_PMC_MAX_FIXED 3
+#define INTEL_PMC_MAX_GENERIC 32
+#define INTEL_PMC_MAX_FIXED 3
+#define INTEL_PMC_IDX_FIXED 32
-#define X86_PMC_IDX_GENERIC 0
-#define X86_PMC_IDX_FIXED 32
#define X86_PMC_IDX_MAX 64
#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
@@ -48,8 +47,7 @@
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
#define AMD64_NUM_COUNTERS 4
-#define AMD64_NUM_COUNTERS_F15H 6
-#define AMD64_NUM_COUNTERS_MAX AMD64_NUM_COUNTERS_F15H
+#define AMD64_NUM_COUNTERS_CORE 6
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
@@ -121,16 +119,16 @@ struct x86_pmu_capability {
/* Instr_Retired.Any: */
#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
-#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
+#define INTEL_PMC_IDX_FIXED_INSTRUCTIONS (INTEL_PMC_IDX_FIXED + 0)
/* CPU_CLK_Unhalted.Core: */
#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
-#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1)
+#define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1)
/* CPU_CLK_Unhalted.Ref: */
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
-#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2)
-#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES)
+#define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2)
+#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
/*
* We model BTS tracing as another fixed-mode PMC.
@@ -139,7 +137,7 @@ struct x86_pmu_capability {
* values are used by actual fixed events and higher values are used
* to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
*/
-#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
+#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16)
/*
* IBS cpuid feature detection
@@ -198,11 +196,16 @@ static inline u32 get_ibs_caps(void) { return 0; }
extern void perf_events_lapic_init(void);
/*
- * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
- * This flag is otherwise unused and ABI specified to be 0, so nobody should
- * care what we do with it.
+ * Abuse bits {3,5} of the cpu eflags register. These flags are otherwise
+ * unused and ABI specified to be 0, so nobody should care what we do with
+ * them.
+ *
+ * EXACT - the IP points to the exact instruction that triggered the
+ * event (HW bugs exempt).
+ * VM - original X86_VM_MASK; see set_linear_ip().
*/
#define PERF_EFLAGS_EXACT (1UL << 3)
+#define PERF_EFLAGS_VM (1UL << 5)
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
@@ -234,8 +237,9 @@ struct perf_guest_switch_msr {
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
+extern void perf_check_microcode(void);
#else
-static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
+static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
*nr = 0;
return NULL;
@@ -247,6 +251,7 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
}
static inline void perf_events_lapic_init(void) { }
+static inline void perf_check_microcode(void) { }
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 98391db840c..f2b489cf160 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -2,9 +2,9 @@
#define _ASM_X86_PGTABLE_2LEVEL_H
#define pte_ERROR(e) \
- printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
+ pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low)
#define pgd_ERROR(e) \
- printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+ pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e))
/*
* Certain architectures need to do special things when PTEs
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 43876f16caf..4cc9f2b7cdc 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -9,13 +9,13 @@
*/
#define pte_ERROR(e) \
- printk("%s:%d: bad pte %p(%08lx%08lx).\n", \
+ pr_err("%s:%d: bad pte %p(%08lx%08lx)\n", \
__FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low)
#define pmd_ERROR(e) \
- printk("%s:%d: bad pmd %p(%016Lx).\n", \
+ pr_err("%s:%d: bad pmd %p(%016Lx)\n", \
__FILE__, __LINE__, &(e), pmd_val(e))
#define pgd_ERROR(e) \
- printk("%s:%d: bad pgd %p(%016Lx).\n", \
+ pr_err("%s:%d: bad pgd %p(%016Lx)\n", \
__FILE__, __LINE__, &(e), pgd_val(e))
/* Rules for using set_pte: the pte being assigned *must* be
@@ -47,16 +47,26 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
* they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
* operations.
*
- * Without THP if the mmap_sem is hold for reading, the
- * pmd can only transition from null to not null while pmd_read_atomic runs.
- * So there's no need of literally reading it atomically.
+ * Without THP if the mmap_sem is hold for reading, the pmd can only
+ * transition from null to not null while pmd_read_atomic runs. So
+ * we can always return atomic pmd values with this function.
*
* With THP if the mmap_sem is hold for reading, the pmd can become
- * THP or null or point to a pte (and in turn become "stable") at any
- * time under pmd_read_atomic, so it's mandatory to read it atomically
- * with cmpxchg8b.
+ * trans_huge or none or point to a pte (and in turn become "stable")
+ * at any time under pmd_read_atomic. We could read it really
+ * atomically here with a atomic64_read for the THP enabled case (and
+ * it would be a whole lot simpler), but to avoid using cmpxchg8b we
+ * only return an atomic pmdval if the low part of the pmdval is later
+ * found stable (i.e. pointing to a pte). And we're returning a none
+ * pmdval if the low part of the pmd is none. In some cases the high
+ * and low part of the pmdval returned may not be consistent if THP is
+ * enabled (the low part may point to previously mapped hugepage,
+ * while the high part may point to a more recently mapped hugepage),
+ * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part
+ * of the pmd to be read atomically to decide if the pmd is unstable
+ * or not, with the only exception of when the low part of the pmd is
+ * zero in which case we return a none pmd.
*/
-#ifndef CONFIG_TRANSPARENT_HUGEPAGE
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
{
pmdval_t ret;
@@ -74,12 +84,6 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
return (pmd_t) { ret };
}
-#else /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
-{
- return (pmd_t) { atomic64_read((atomic64_t *)pmdp) };
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 975f709e09a..8251be02301 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -26,16 +26,16 @@ extern pgd_t init_level4_pgt[];
extern void paging_init(void);
#define pte_ERROR(e) \
- printk("%s:%d: bad pte %p(%016lx).\n", \
+ pr_err("%s:%d: bad pte %p(%016lx)\n", \
__FILE__, __LINE__, &(e), pte_val(e))
#define pmd_ERROR(e) \
- printk("%s:%d: bad pmd %p(%016lx).\n", \
+ pr_err("%s:%d: bad pmd %p(%016lx)\n", \
__FILE__, __LINE__, &(e), pmd_val(e))
#define pud_ERROR(e) \
- printk("%s:%d: bad pud %p(%016lx).\n", \
+ pr_err("%s:%d: bad pud %p(%016lx)\n", \
__FILE__, __LINE__, &(e), pud_val(e))
#define pgd_ERROR(e) \
- printk("%s:%d: bad pgd %p(%016lx).\n", \
+ pr_err("%s:%d: bad pgd %p(%016lx)\n", \
__FILE__, __LINE__, &(e), pgd_val(e))
struct mm_struct;
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index f8ab3eaad12..aea1d1d848c 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -44,6 +44,7 @@
*/
#define X86_CR3_PWT 0x00000008 /* Page Write Through */
#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
+#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
/*
* Intel CPU features in CR4
@@ -61,6 +62,7 @@
#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
#define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
+#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 39bc5777211..d048cad9bca 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -61,6 +61,19 @@ static inline void *current_text_addr(void)
# define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
+enum tlb_infos {
+ ENTRIES,
+ NR_INFO
+};
+
+extern u16 __read_mostly tlb_lli_4k[NR_INFO];
+extern u16 __read_mostly tlb_lli_2m[NR_INFO];
+extern u16 __read_mostly tlb_lli_4m[NR_INFO];
+extern u16 __read_mostly tlb_lld_4k[NR_INFO];
+extern u16 __read_mostly tlb_lld_2m[NR_INFO];
+extern u16 __read_mostly tlb_lld_4m[NR_INFO];
+extern s8 __read_mostly tlb_flushall_shift;
+
/*
* CPU type and hardware bug flags. Kept separately for each CPU.
* Members of this structure are referenced in head.S, so think twice
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index fce3f4ae5bd..fe1ec5bcd84 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -21,8 +21,9 @@ struct real_mode_header {
u32 wakeup_header;
#endif
/* APM/BIOS reboot */
-#ifdef CONFIG_X86_32
u32 machine_real_restart_asm;
+#ifdef CONFIG_X86_64
+ u32 machine_real_restart_seg;
#endif
};
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 92f297069e8..a82c4f1b4d8 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -18,8 +18,8 @@ extern struct machine_ops machine_ops;
void native_machine_crash_shutdown(struct pt_regs *regs);
void native_machine_shutdown(void);
-void machine_real_restart(unsigned int type);
-/* These must match dispatch_table in reboot_32.S */
+void __noreturn machine_real_restart(unsigned int type);
+/* These must match dispatch in arch/x86/realmore/rm/reboot.S */
#define MRR_BIOS 0
#define MRR_APM 1
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index f48394513c3..4f19a152603 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -31,12 +31,12 @@ static inline bool cpu_has_ht_siblings(void)
return has_siblings;
}
-DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
-DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
/* cpus sharing the last level cache: */
-DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
-DECLARE_PER_CPU(u16, cpu_llc_id);
-DECLARE_PER_CPU(int, cpu_number);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
+DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
+DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
static inline struct cpumask *cpu_sibling_mask(int cpu)
{
@@ -53,10 +53,10 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
return per_cpu(cpu_llc_shared_map, cpu);
}
-DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
-DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
+DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
-DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
+DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
#endif
/* Static state in head.S used to set up a CPU */
@@ -169,11 +169,6 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
-/* We don't mark CPUs online until __cpu_up(), so we need another measure */
-static inline int num_booting_cpus(void)
-{
- return cpumask_weight(cpu_callout_mask);
-}
#else /* !CONFIG_SMP */
#define wbinvd_on_cpu(cpu) wbinvd()
static inline int wbinvd_on_all_cpus(void)
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 829215fef9e..4fef20773b8 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -4,7 +4,14 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+
+#define tlb_flush(tlb) \
+{ \
+ if (tlb->fullmm == 0) \
+ flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \
+ else \
+ flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \
+}
#include <asm-generic/tlb.h>
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 36a1a2ab87d..74a44333545 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -73,14 +73,10 @@ static inline void __flush_tlb_one(unsigned long addr)
* - flush_tlb_page(vma, vmaddr) flushes one page
* - flush_tlb_range(vma, start, end) flushes a range of pages
* - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- * - flush_tlb_others(cpumask, mm, va) flushes TLBs on other cpus
+ * - flush_tlb_others(cpumask, mm, start, end) flushes TLBs on other cpus
*
* ..but the i386 has somewhat limited tlb flushing capabilities,
* and page-granular flushes are available only on i486 and up.
- *
- * x86-64 can only flush individual pages or full VMs. For a range flush
- * we always do the full VM. Might be worth trying if for a small
- * range a few INVLPGs in a row are a win.
*/
#ifndef CONFIG_SMP
@@ -109,9 +105,17 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
__flush_tlb();
}
+static inline void flush_tlb_mm_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end, unsigned long vmflag)
+{
+ if (mm == current->active_mm)
+ __flush_tlb();
+}
+
static inline void native_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
- unsigned long va)
+ unsigned long start,
+ unsigned long end)
{
}
@@ -119,27 +123,35 @@ static inline void reset_lazy_tlbstate(void)
{
}
+static inline void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+ flush_tlb_all();
+}
+
#else /* SMP */
#include <asm/smp.h>
#define local_flush_tlb() __flush_tlb()
+#define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
+
+#define flush_tlb_range(vma, start, end) \
+ flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+
extern void flush_tlb_all(void);
extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ unsigned long end, unsigned long vmflag);
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
#define flush_tlb() flush_tlb_current_task()
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
- flush_tlb_mm(vma->vm_mm);
-}
-
void native_flush_tlb_others(const struct cpumask *cpumask,
- struct mm_struct *mm, unsigned long va);
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end);
#define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2
@@ -159,13 +171,8 @@ static inline void reset_lazy_tlbstate(void)
#endif /* SMP */
#ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va)
+#define flush_tlb_others(mask, mm, start, end) \
+ native_flush_tlb_others(mask, mm, start, end)
#endif
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
- flush_tlb_all();
-}
-
#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 8e796fbbf9c..d8def8b3dba 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -17,6 +17,8 @@
/* Handles exceptions in both to and from, but doesn't do access_ok */
__must_check unsigned long
+copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
+__must_check unsigned long
copy_user_generic_string(void *to, const void *from, unsigned len);
__must_check unsigned long
copy_user_generic_unrolled(void *to, const void *from, unsigned len);
@@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len)
{
unsigned ret;
- alternative_call(copy_user_generic_unrolled,
+ /*
+ * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
+ * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
+ * Otherwise, use copy_user_generic_unrolled.
+ */
+ alternative_call_2(copy_user_generic_unrolled,
copy_user_generic_string,
X86_FEATURE_REP_GOOD,
+ copy_user_enhanced_fast_string,
+ X86_FEATURE_ERMS,
ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
"=d" (len)),
"1" (to), "2" (from), "3" (len)
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 4437001d8e3..0d9776e9e2d 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -15,7 +15,6 @@
# ifdef CONFIG_X86_32
# include <asm/unistd_32.h>
-# define __ARCH_WANT_IPC_PARSE_VERSION
# define __ARCH_WANT_STAT64
# define __ARCH_WANT_SYS_IPC
# define __ARCH_WANT_SYS_OLD_MMAP
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 1e9bed14f7a..f3971bbcd1d 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -48,7 +48,7 @@ struct arch_uprobe_task {
#endif
};
-extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm);
+extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 3bb9491b765..b47c2a82ff1 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -15,7 +15,8 @@ extern void uv_nmi_init(void);
extern void uv_system_init(void);
extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
- unsigned long va,
+ unsigned long start,
+ unsigned end,
unsigned int cpu);
#else /* X86_UV */
@@ -26,7 +27,7 @@ static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
static inline const struct cpumask *
uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
- unsigned long va, unsigned int cpu)
+ unsigned long start, unsigned long end, unsigned int cpu)
{ return cpumask; }
#endif /* X86_UV */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 6149b476d9d..a06983cdc12 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -140,6 +140,9 @@
#define IPI_RESET_LIMIT 1
/* after this # consecutive successes, bump up the throttle if it was lowered */
#define COMPLETE_THRESHOLD 5
+/* after this # of giveups (fall back to kernel IPI's) disable the use of
+ the BAU for a period of time */
+#define GIVEUP_LIMIT 100
#define UV_LB_SUBNODEID 0x10
@@ -166,7 +169,6 @@
#define FLUSH_RETRY_TIMEOUT 2
#define FLUSH_GIVEUP 3
#define FLUSH_COMPLETE 4
-#define FLUSH_RETRY_BUSYBUG 5
/*
* tuning the action when the numalink network is extremely delayed
@@ -175,7 +177,7 @@
microseconds */
#define CONGESTED_REPS 10 /* long delays averaged over
this many broadcasts */
-#define CONGESTED_PERIOD 30 /* time for the bau to be
+#define DISABLED_PERIOD 10 /* time for the bau to be
disabled, in seconds */
/* see msg_type: */
#define MSG_NOOP 0
@@ -520,6 +522,12 @@ struct ptc_stats {
unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
+ unsigned long s_overipilimit; /* over the ipi reset limit */
+ unsigned long s_giveuplimit; /* disables, over giveup limit*/
+ unsigned long s_enters; /* entries to the driver */
+ unsigned long s_ipifordisabled; /* fall back to IPI; disabled */
+ unsigned long s_plugged; /* plugged by h/w bug*/
+ unsigned long s_congested; /* giveup on long wait */
/* destination statistics */
unsigned long d_alltlb; /* times all tlb's on this
cpu were flushed */
@@ -586,8 +594,8 @@ struct bau_control {
int timeout_tries;
int ipi_attempts;
int conseccompletes;
- int baudisabled;
- int set_bau_off;
+ short nobau;
+ short baudisabled;
short cpu;
short osnode;
short uvhub_cpu;
@@ -596,14 +604,16 @@ struct bau_control {
short cpus_in_socket;
short cpus_in_uvhub;
short partition_base_pnode;
- short using_desc; /* an index, like uvhub_cpu */
- unsigned int inuse_map;
+ short busy; /* all were busy (war) */
unsigned short message_number;
unsigned short uvhub_quiesce;
short socket_acknowledge_count[DEST_Q_SIZE];
cycles_t send_message;
+ cycles_t period_end;
+ cycles_t period_time;
spinlock_t uvhub_lock;
spinlock_t queue_lock;
+ spinlock_t disable_lock;
/* tunables */
int max_concurr;
int max_concurr_const;
@@ -614,9 +624,9 @@ struct bau_control {
int complete_threshold;
int cong_response_us;
int cong_reps;
- int cong_period;
- unsigned long clocks_per_100_usec;
- cycles_t period_time;
+ cycles_t disabled_period;
+ int period_giveups;
+ int giveup_limit;
long period_requests;
struct hub_and_pnode *thp;
};
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 31f180c21ce..74fcb963595 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -60,6 +60,7 @@
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define PIN_BASED_EXT_INTR_MASK 0x00000001
@@ -281,6 +282,7 @@ enum vmcs_field {
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_INVPCID 58
/*
* Interruption-information format
@@ -404,6 +406,7 @@ enum vmcs_field {
#define VMX_EPTP_WB_BIT (1ull << 14)
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
+#define VMX_EPT_AD_BIT (1ull << 21)
#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
@@ -415,11 +418,14 @@ enum vmcs_field {
#define VMX_EPT_MAX_GAW 0x4
#define VMX_EPT_MT_EPTE_SHIFT 3
#define VMX_EPT_GAW_EPTP_SHIFT 3
+#define VMX_EPT_AD_ENABLE_BIT (1ull << 6)
#define VMX_EPT_DEFAULT_MT 0x6ull
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
#define VMX_EPT_IPAT_BIT (1ull << 6)
+#define VMX_EPT_ACCESS_BIT (1ull << 8)
+#define VMX_EPT_DIRTY_BIT (1ull << 9)
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
index 92e54abf89e..f90f0a587c6 100644
--- a/arch/x86/include/asm/x2apic.h
+++ b/arch/x86/include/asm/x2apic.h
@@ -9,15 +9,6 @@
#include <asm/ipi.h>
#include <linux/cpumask.h>
-/*
- * Need to use more than cpu 0, because we need more vectors
- * when MSI-X are used.
- */
-static const struct cpumask *x2apic_target_cpus(void)
-{
- return cpu_online_mask;
-}
-
static int x2apic_apic_id_valid(int apicid)
{
return 1;
@@ -28,15 +19,6 @@ static int x2apic_apic_id_registered(void)
return 1;
}
-/*
- * For now each logical cpu is in its own vector allocation domain.
- */
-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- cpumask_clear(retmask);
- cpumask_set_cpu(cpu, retmask);
-}
-
static void
__x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
{
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index c090af10ac7..38155f66714 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -156,7 +156,6 @@ struct x86_cpuinit_ops {
/**
* struct x86_platform_ops - platform specific runtime functions
* @calibrate_tsc: calibrate TSC
- * @wallclock_init: init the wallclock device
* @get_wallclock: get time from HW clock like RTC etc.
* @set_wallclock: set time back to HW clock
* @is_untracked_pat_range exclude from PAT logic
@@ -164,10 +163,10 @@ struct x86_cpuinit_ops {
* @i8042_detect pre-detect if i8042 controller exists
* @save_sched_clock_state: save state for sched_clock() on suspend
* @restore_sched_clock_state: restore state for sched_clock() on resume
+ * @apic_post_init: adjust apic if neeeded
*/
struct x86_platform_ops {
unsigned long (*calibrate_tsc)(void);
- void (*wallclock_init)(void);
unsigned long (*get_wallclock)(void);
int (*set_wallclock)(unsigned long nowtime);
void (*iommu_shutdown)(void);
@@ -177,6 +176,7 @@ struct x86_platform_ops {
int (*i8042_detect)(void);
void (*save_sched_clock_state)(void);
void (*restore_sched_clock_state)(void);
+ void (*apic_post_init)(void);
};
struct pci_dev;
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 5728852fb90..59c226d120c 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -48,6 +48,7 @@
#include <xen/interface/sched.h>
#include <xen/interface/physdev.h>
#include <xen/interface/platform.h>
+#include <xen/interface/xen-mca.h>
/*
* The hypercall asms have to meet several constraints:
@@ -302,6 +303,13 @@ HYPERVISOR_set_timer_op(u64 timeout)
}
static inline int
+HYPERVISOR_mca(struct xen_mc *mc_op)
+{
+ mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+ return _hypercall1(int, mca, mc_op);
+}
+
+static inline int
HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
{
platform_op->interface_version = XENPF_INTERFACE_VERSION;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8afb6931981..b2297e58c6e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -422,12 +422,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
return 0;
}
- if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
+ if (intsrc->source_irq == 0) {
if (acpi_skip_timer_override) {
- printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+ printk(PREFIX "BIOS IRQ0 override ignored.\n");
return 0;
}
- if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
+
+ if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity
+ && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
}
@@ -1334,17 +1336,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d)
}
/*
- * Force ignoring BIOS IRQ0 pin2 override
+ * Force ignoring BIOS IRQ0 override
*/
static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
{
- /*
- * The ati_ixp4x0_rev() early PCI quirk should have set
- * the acpi_skip_timer_override flag already:
- */
if (!acpi_skip_timer_override) {
- WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
- pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
+ pr_notice("%s detected: Ignoring BIOS IRQ0 override\n",
d->ident);
acpi_skip_timer_override = 1;
}
@@ -1438,7 +1435,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
* is enabled. This input is incorrectly designated the
* ISA IRQ 0 via an interrupt source override even though
* it is wired to the output of the master 8259A and INTIN0
- * is not connected at all. Force ignoring BIOS IRQ0 pin2
+ * is not connected at all. Force ignoring BIOS IRQ0
* override in that cases.
*/
{
@@ -1473,6 +1470,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
},
},
+ {
+ .callback = dmi_ignore_irq0_timer_override,
+ .ident = "FUJITSU SIEMENS",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"),
+ },
+ },
{}
};
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 95bf99de905..1b8e5a03d94 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -25,10 +25,6 @@ unsigned long acpi_realmode_flags;
static char temp_stack[4096];
#endif
-asmlinkage void acpi_enter_s3(void)
-{
- acpi_enter_sleep_state(3, wake_sleep_flags);
-}
/**
* acpi_suspend_lowlevel - save kernel state
*
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 5653a5791ec..67f59f8c695 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -2,7 +2,6 @@
* Variables and functions used by the code in sleep.c
*/
-#include <linux/linkage.h>
#include <asm/realmode.h>
extern unsigned long saved_video_mode;
@@ -11,7 +10,6 @@ extern long saved_magic;
extern int wakeup_pmode_return;
extern u8 wake_sleep_flags;
-extern asmlinkage void acpi_enter_s3(void);
extern unsigned long acpi_copy_wakeup_routine(unsigned long);
extern void wakeup_long64(void);
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index 72610839f03..13ab720573e 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -74,7 +74,9 @@ restore_registers:
ENTRY(do_suspend_lowlevel)
call save_processor_state
call save_registers
- call acpi_enter_s3
+ pushl $3
+ call acpi_enter_sleep_state
+ addl $4, %esp
# In case of S3 failure, we'll emerge here. Jump
# to ret_point to recover
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 014d1d28c39..8ea5164cbd0 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -71,7 +71,9 @@ ENTRY(do_suspend_lowlevel)
movq %rsi, saved_rsi
addq $8, %rsp
- call acpi_enter_s3
+ movl $3, %edi
+ xorl %eax, %eax
+ call acpi_enter_sleep_state
/* in case something went wrong, restore the machine status and go on */
jmp resume_point
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 1f84794f075..afb7ff79a29 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) "SMP alternatives: " fmt
+
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/mutex.h>
@@ -63,8 +65,11 @@ static int __init setup_noreplace_paravirt(char *str)
__setup("noreplace-paravirt", setup_noreplace_paravirt);
#endif
-#define DPRINTK(fmt, args...) if (debug_alternative) \
- printk(KERN_DEBUG fmt, args)
+#define DPRINTK(fmt, ...) \
+do { \
+ if (debug_alternative) \
+ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
+} while (0)
/*
* Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
@@ -219,7 +224,7 @@ void __init arch_init_ideal_nops(void)
ideal_nops = intel_nops;
#endif
}
-
+ break;
default:
#ifdef CONFIG_X86_64
ideal_nops = k8_nops;
@@ -428,7 +433,7 @@ void alternatives_smp_switch(int smp)
* If this still occurs then you should see a hang
* or crash shortly after this line:
*/
- printk("lockdep: fixing up alternatives.\n");
+ pr_info("lockdep: fixing up alternatives\n");
#endif
if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
@@ -444,14 +449,14 @@ void alternatives_smp_switch(int smp)
if (smp == smp_mode) {
/* nothing */
} else if (smp) {
- printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
+ pr_info("switching to SMP code\n");
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
list_for_each_entry(mod, &smp_alt_modules, next)
alternatives_smp_lock(mod->locks, mod->locks_end,
mod->text, mod->text_end);
} else {
- printk(KERN_INFO "SMP alternatives: switching to UP code\n");
+ pr_info("switching to UP code\n");
set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
list_for_each_entry(mod, &smp_alt_modules, next)
@@ -546,7 +551,7 @@ void __init alternative_instructions(void)
#ifdef CONFIG_SMP
if (smp_alt_once) {
if (1 == num_possible_cpus()) {
- printk(KERN_INFO "SMP alternatives: switching to UP code\n");
+ pr_info("switching to UP code\n");
set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
@@ -664,7 +669,7 @@ static int __kprobes stop_machine_text_poke(void *data)
struct text_poke_param *p;
int i;
- if (atomic_dec_and_test(&stop_machine_first)) {
+ if (atomic_xchg(&stop_machine_first, 0)) {
for (i = 0; i < tpp->nparams; i++) {
p = &tpp->params[i];
text_poke(p->addr, p->opcode, p->len);
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index be16854591c..aadf3359e2a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -2,6 +2,9 @@
* Shared support code for AMD K8 northbridges and derivates.
* Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/init.h>
@@ -16,6 +19,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
{}
};
EXPORT_SYMBOL(amd_nb_misc_ids);
@@ -258,7 +262,7 @@ void amd_flush_garts(void)
}
spin_unlock_irqrestore(&gart_lock, flags);
if (!flushed)
- printk("nothing to flush?\n");
+ pr_notice("nothing to flush?\n");
}
EXPORT_SYMBOL_GPL(amd_flush_garts);
@@ -269,11 +273,10 @@ static __init int init_amd_nbs(void)
err = amd_cache_northbridges();
if (err < 0)
- printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
+ pr_notice("Cannot enumerate AMD northbridges\n");
if (amd_cache_gart() < 0)
- printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
- "GART support disabled.\n");
+ pr_notice("Cannot initialize GART flush words, GART support disabled\n");
return err;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 39a222e094a..24deb308232 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -75,8 +75,8 @@ physid_mask_t phys_cpu_present_map;
/*
* Map cpu index to physical APIC ID
*/
-DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
-DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
@@ -88,7 +88,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
* used for the mapping. This is where the behaviors of x86_64 and 32
* actually diverge. Let's keep it ugly for now.
*/
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
/*
* Knob to control our willingness to enable the local APIC.
@@ -2123,6 +2123,42 @@ void default_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
+int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask,
+ unsigned int *apicid)
+{
+ unsigned int cpu;
+
+ for_each_cpu_and(cpu, cpumask, andmask) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask))
+ break;
+ }
+
+ if (likely(cpu < nr_cpu_ids)) {
+ *apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Override the generic EOI implementation with an optimized version.
+ * Only called during early boot when only one CPU is active and with
+ * interrupts disabled, so we know this does not race with actual APIC driver
+ * use.
+ */
+void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
+{
+ struct apic **drv;
+
+ for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+ /* Should happen once for each apic */
+ WARN_ON((*drv)->eoi_write == eoi_write);
+ (*drv)->eoi_write = eoi_write;
+ }
+}
+
/*
* Power management
*/
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 0e881c46e8c..00c77cf78e9 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -36,25 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 1;
}
-static const struct cpumask *flat_target_cpus(void)
-{
- return cpu_online_mask;
-}
-
-static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- cpumask_clear(retmask);
- cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
/*
* Set up the logical destination ID.
*
@@ -92,7 +73,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
}
static void
- flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
+flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
{
unsigned long mask = cpumask_bits(cpumask)[0];
int cpu = smp_processor_id();
@@ -186,7 +167,7 @@ static struct apic apic_flat = {
.irq_delivery_mode = dest_LowestPrio,
.irq_dest_mode = 1, /* logical */
- .target_cpus = flat_target_cpus,
+ .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
@@ -210,8 +191,7 @@ static struct apic apic_flat = {
.set_apic_id = set_apic_id,
.apic_id_mask = 0xFFu << 24,
- .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
.send_IPI_mask = flat_send_IPI_mask,
.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
@@ -262,17 +242,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
-static const struct cpumask *physflat_target_cpus(void)
-{
- return cpu_online_mask;
-}
-
-static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- cpumask_clear(retmask);
- cpumask_set_cpu(cpu, retmask);
-}
-
static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
{
default_send_IPI_mask_sequence_phys(cpumask, vector);
@@ -294,38 +263,6 @@ static void physflat_send_IPI_all(int vector)
physflat_send_IPI_mask(cpu_online_mask, vector);
}
-static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- cpu = cpumask_first(cpumask);
- if ((unsigned)cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
- else
- return BAD_APICID;
-}
-
-static unsigned int
-physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask) {
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- }
- return per_cpu(x86_cpu_to_apicid, cpu);
-}
-
static int physflat_probe(void)
{
if (apic == &apic_physflat || num_possible_cpus() > 8)
@@ -345,13 +282,13 @@ static struct apic apic_physflat = {
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* physical */
- .target_cpus = physflat_target_cpus,
+ .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
.check_apicid_present = NULL,
- .vector_allocation_domain = physflat_vector_allocation_domain,
+ .vector_allocation_domain = default_vector_allocation_domain,
/* not needed, but shouldn't hurt: */
.init_apic_ldr = flat_init_apic_ldr,
@@ -370,8 +307,7 @@ static struct apic apic_physflat = {
.set_apic_id = set_apic_id,
.apic_id_mask = 0xFFu << 24,
- .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
.send_IPI_mask = physflat_send_IPI_mask,
.send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index a6e4c6e06c0..e145f28b409 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -100,12 +100,12 @@ static unsigned long noop_check_apicid_present(int bit)
return physid_isset(bit, phys_cpu_present_map);
}
-static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
+static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
+ const struct cpumask *mask)
{
if (cpu != 0)
pr_warning("APIC: Vector allocated for non-BSP cpu\n");
- cpumask_clear(retmask);
- cpumask_set_cpu(cpu, retmask);
+ cpumask_copy(retmask, cpumask_of(cpu));
}
static u32 noop_apic_read(u32 reg)
@@ -159,8 +159,7 @@ struct apic apic_noop = {
.set_apic_id = NULL,
.apic_id_mask = 0x0F << 24,
- .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
.send_IPI_mask = noop_send_IPI_mask,
.send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 6ec6d5d297c..bc552cff257 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -72,17 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
return initial_apic_id >> index_msb;
}
-static const struct cpumask *numachip_target_cpus(void)
-{
- return cpu_online_mask;
-}
-
-static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- cpumask_clear(retmask);
- cpumask_set_cpu(cpu, retmask);
-}
-
static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
union numachip_csr_g3_ext_irq_gen int_gen;
@@ -157,38 +146,6 @@ static void numachip_send_IPI_self(int vector)
__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
}
-static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- cpu = cpumask_first(cpumask);
- if (likely((unsigned)cpu < nr_cpu_ids))
- return per_cpu(x86_cpu_to_apicid, cpu);
-
- return BAD_APICID;
-}
-
-static unsigned int
-numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask) {
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- }
- return per_cpu(x86_cpu_to_apicid, cpu);
-}
-
static int __init numachip_probe(void)
{
return apic == &apic_numachip;
@@ -253,13 +210,13 @@ static struct apic apic_numachip __refconst = {
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* physical */
- .target_cpus = numachip_target_cpus,
+ .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
.check_apicid_present = NULL,
- .vector_allocation_domain = numachip_vector_allocation_domain,
+ .vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = flat_init_apic_ldr,
.ioapic_phys_id_map = NULL,
@@ -277,8 +234,7 @@ static struct apic apic_numachip __refconst = {
.set_apic_id = set_apic_id,
.apic_id_mask = 0xffU << 24,
- .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
.send_IPI_mask = numachip_send_IPI_mask,
.send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 31fbdbfbf96..d50e3640d5a 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void)
return 1;
}
-static const struct cpumask *bigsmp_target_cpus(void)
-{
-#ifdef CONFIG_SMP
- return cpu_online_mask;
-#else
- return cpumask_of(0);
-#endif
-}
-
static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
{
return 0;
@@ -105,32 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
return 1;
}
-/* As we are using single CPU as destination, pick only one CPU here */
-static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- int cpu = cpumask_first(cpumask);
-
- if (cpu < nr_cpu_ids)
- return cpu_physical_id(cpu);
- return BAD_APICID;
-}
-
-static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask) {
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- return cpu_physical_id(cpu);
- }
- return BAD_APICID;
-}
-
static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
{
return cpuid_apic >> index_msb;
@@ -177,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
{ } /* NULL entry stops DMI scanning */
};
-static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- cpumask_clear(retmask);
- cpumask_set_cpu(cpu, retmask);
-}
-
static int probe_bigsmp(void)
{
if (def_to_bigsmp)
@@ -205,13 +164,13 @@ static struct apic apic_bigsmp = {
/* phys delivery to target CPU: */
.irq_dest_mode = 0,
- .target_cpus = bigsmp_target_cpus,
+ .target_cpus = default_target_cpus,
.disable_esr = 1,
.dest_logical = 0,
.check_apicid_used = bigsmp_check_apicid_used,
.check_apicid_present = bigsmp_check_apicid_present,
- .vector_allocation_domain = bigsmp_vector_allocation_domain,
+ .vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = bigsmp_init_apic_ldr,
.ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
@@ -229,8 +188,7 @@ static struct apic apic_bigsmp = {
.set_apic_id = NULL,
.apic_id_mask = 0xFF << 24,
- .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
.send_IPI_mask = bigsmp_send_IPI_mask,
.send_IPI_mask_allbutself = NULL,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index db4ab1be3c7..0874799a98c 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -394,21 +394,6 @@ static void es7000_enable_apic_mode(void)
WARN(1, "Command failed, status = %x\n", mip_status);
}
-static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- cpumask_clear(retmask);
- cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
-
static void es7000_wait_for_init_deassert(atomic_t *deassert)
{
while (!atomic_read(deassert))
@@ -540,45 +525,49 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
return 1;
}
-static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static inline int
+es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
{
unsigned int round = 0;
- int cpu, uninitialized_var(apicid);
+ unsigned int cpu, uninitialized_var(apicid);
/*
* The cpus in the mask must all be on the apic cluster.
*/
- for_each_cpu(cpu, cpumask) {
+ for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
WARN(1, "Not a valid mask!");
- return BAD_APICID;
+ return -EINVAL;
}
- apicid = new_apicid;
+ apicid |= new_apicid;
round++;
}
- return apicid;
+ if (!round)
+ return -EINVAL;
+ *dest_id = apicid;
+ return 0;
}
-static unsigned int
+static int
es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
- const struct cpumask *andmask)
+ const struct cpumask *andmask,
+ unsigned int *apicid)
{
- int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
cpumask_var_t cpumask;
+ *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
- return apicid;
+ return 0;
cpumask_and(cpumask, inmask, andmask);
- cpumask_and(cpumask, cpumask, cpu_online_mask);
- apicid = es7000_cpu_mask_to_apicid(cpumask);
+ es7000_cpu_mask_to_apicid(cpumask, apicid);
free_cpumask_var(cpumask);
- return apicid;
+ return 0;
}
static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -638,7 +627,7 @@ static struct apic __refdata apic_es7000_cluster = {
.check_apicid_used = es7000_check_apicid_used,
.check_apicid_present = es7000_check_apicid_present,
- .vector_allocation_domain = es7000_vector_allocation_domain,
+ .vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = es7000_init_apic_ldr_cluster,
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
@@ -656,7 +645,6 @@ static struct apic __refdata apic_es7000_cluster = {
.set_apic_id = NULL,
.apic_id_mask = 0xFF << 24,
- .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
.send_IPI_mask = es7000_send_IPI_mask,
@@ -705,7 +693,7 @@ static struct apic __refdata apic_es7000 = {
.check_apicid_used = es7000_check_apicid_used,
.check_apicid_present = es7000_check_apicid_present,
- .vector_allocation_domain = es7000_vector_allocation_domain,
+ .vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = es7000_init_apic_ldr,
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
@@ -723,7 +711,6 @@ static struct apic __refdata apic_es7000 = {
.set_apic_id = NULL,
.apic_id_mask = 0xFF << 24,
- .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
.send_IPI_mask = es7000_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5f0ff597437..a6c64aaddf9 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -448,8 +448,8 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi
entry = alloc_irq_pin_list(node);
if (!entry) {
- printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
- node, apic, pin);
+ pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
+ node, apic, pin);
return -ENOMEM;
}
entry->apic = apic;
@@ -661,7 +661,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
ioapic_mask_entry(apic, pin);
entry = ioapic_read_entry(apic, pin);
if (entry.irr)
- printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n",
+ pr_err("Unable to reset IRR for apic: %d, pin :%d\n",
mpc_ioapic_id(apic), pin);
}
@@ -895,7 +895,7 @@ static int irq_polarity(int idx)
}
case 2: /* reserved */
{
- printk(KERN_WARNING "broken BIOS!!\n");
+ pr_warn("broken BIOS!!\n");
polarity = 1;
break;
}
@@ -906,7 +906,7 @@ static int irq_polarity(int idx)
}
default: /* invalid */
{
- printk(KERN_WARNING "broken BIOS!!\n");
+ pr_warn("broken BIOS!!\n");
polarity = 1;
break;
}
@@ -948,7 +948,7 @@ static int irq_trigger(int idx)
}
default:
{
- printk(KERN_WARNING "broken BIOS!!\n");
+ pr_warn("broken BIOS!!\n");
trigger = 1;
break;
}
@@ -962,7 +962,7 @@ static int irq_trigger(int idx)
}
case 2: /* reserved */
{
- printk(KERN_WARNING "broken BIOS!!\n");
+ pr_warn("broken BIOS!!\n");
trigger = 1;
break;
}
@@ -973,7 +973,7 @@ static int irq_trigger(int idx)
}
default: /* invalid */
{
- printk(KERN_WARNING "broken BIOS!!\n");
+ pr_warn("broken BIOS!!\n");
trigger = 0;
break;
}
@@ -991,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin)
* Debugging check, we are in big trouble if this message pops up!
*/
if (mp_irqs[idx].dstirq != pin)
- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+ pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
if (test_bit(bus, mp_bus_not_pci)) {
irq = mp_irqs[idx].srcbusirq;
@@ -1112,8 +1112,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
- static int current_offset = VECTOR_OFFSET_START % 8;
- unsigned int old_vector;
+ static int current_offset = VECTOR_OFFSET_START % 16;
int cpu, err;
cpumask_var_t tmp_mask;
@@ -1123,35 +1122,45 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
return -ENOMEM;
- old_vector = cfg->vector;
- if (old_vector) {
- cpumask_and(tmp_mask, mask, cpu_online_mask);
- cpumask_and(tmp_mask, cfg->domain, tmp_mask);
- if (!cpumask_empty(tmp_mask)) {
- free_cpumask_var(tmp_mask);
- return 0;
- }
- }
-
/* Only try and allocate irqs on cpus that are present */
err = -ENOSPC;
- for_each_cpu_and(cpu, mask, cpu_online_mask) {
- int new_cpu;
- int vector, offset;
+ cpumask_clear(cfg->old_domain);
+ cpu = cpumask_first_and(mask, cpu_online_mask);
+ while (cpu < nr_cpu_ids) {
+ int new_cpu, vector, offset;
- apic->vector_allocation_domain(cpu, tmp_mask);
+ apic->vector_allocation_domain(cpu, tmp_mask, mask);
+
+ if (cpumask_subset(tmp_mask, cfg->domain)) {
+ err = 0;
+ if (cpumask_equal(tmp_mask, cfg->domain))
+ break;
+ /*
+ * New cpumask using the vector is a proper subset of
+ * the current in use mask. So cleanup the vector
+ * allocation for the members that are not used anymore.
+ */
+ cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
+ cfg->move_in_progress = 1;
+ cpumask_and(cfg->domain, cfg->domain, tmp_mask);
+ break;
+ }
vector = current_vector;
offset = current_offset;
next:
- vector += 8;
+ vector += 16;
if (vector >= first_system_vector) {
- /* If out of vectors on large boxen, must share them. */
- offset = (offset + 1) % 8;
+ offset = (offset + 1) % 16;
vector = FIRST_EXTERNAL_VECTOR + offset;
}
- if (unlikely(current_vector == vector))
+
+ if (unlikely(current_vector == vector)) {
+ cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
+ cpumask_andnot(tmp_mask, mask, cfg->old_domain);
+ cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
continue;
+ }
if (test_bit(vector, used_vectors))
goto next;
@@ -1162,7 +1171,7 @@ next:
/* Found one! */
current_vector = vector;
current_offset = offset;
- if (old_vector) {
+ if (cfg->vector) {
cfg->move_in_progress = 1;
cpumask_copy(cfg->old_domain, cfg->domain);
}
@@ -1195,7 +1204,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
BUG_ON(!cfg->vector);
vector = cfg->vector;
- for_each_cpu(cpu, cfg->domain)
+ for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
per_cpu(vector_irq, cpu)[vector] = -1;
cfg->vector = 0;
@@ -1203,7 +1212,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
if (likely(!cfg->move_in_progress))
return;
- for_each_cpu(cpu, cfg->old_domain) {
+ for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
vector++) {
if (per_cpu(vector_irq, cpu)[vector] != irq)
@@ -1346,18 +1355,18 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
if (!IO_APIC_IRQ(irq))
return;
- /*
- * For legacy irqs, cfg->domain starts with cpu 0 for legacy
- * controllers like 8259. Now that IO-APIC can handle this irq, update
- * the cfg->domain.
- */
- if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
- apic->vector_allocation_domain(0, cfg->domain);
if (assign_irq_vector(irq, cfg, apic->target_cpus()))
return;
- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(),
+ &dest)) {
+ pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
+ mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
+ __clear_irq_vector(irq, cfg);
+
+ return;
+ }
apic_printk(APIC_VERBOSE,KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1366,7 +1375,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
cfg->vector, irq, attr->trigger, attr->polarity, dest);
if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
- pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
+ pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
__clear_irq_vector(irq, cfg);
@@ -1469,9 +1478,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)
* Set up the timer pin, possibly with the 8259A-master behind.
*/
static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
- unsigned int pin, int vector)
+ unsigned int pin, int vector)
{
struct IO_APIC_route_entry entry;
+ unsigned int dest;
if (irq_remapping_enabled)
return;
@@ -1482,9 +1492,13 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
* We use logical delivery to get the timer IRQ
* to the first CPU.
*/
+ if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(),
+ apic->target_cpus(), &dest)))
+ dest = BAD_APICID;
+
entry.dest_mode = apic->irq_dest_mode;
entry.mask = 0; /* don't mask IRQ for edge */
- entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
+ entry.dest = dest;
entry.delivery_mode = apic->irq_delivery_mode;
entry.polarity = 0;
entry.trigger = 0;
@@ -1521,7 +1535,6 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
reg_03.raw = io_apic_read(ioapic_idx, 3);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
- printk("\n");
printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
@@ -1578,7 +1591,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
i,
ir_entry->index
);
- printk("%1d %1d %1d %1d %1d "
+ pr_cont("%1d %1d %1d %1d %1d "
"%1d %1d %X %02X\n",
ir_entry->format,
ir_entry->mask,
@@ -1598,7 +1611,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
i,
entry.dest
);
- printk("%1d %1d %1d %1d %1d "
+ pr_cont("%1d %1d %1d %1d %1d "
"%1d %1d %02X\n",
entry.mask,
entry.trigger,
@@ -1651,8 +1664,8 @@ __apicdebuginit(void) print_IO_APICs(void)
continue;
printk(KERN_DEBUG "IRQ%d ", irq);
for_each_irq_pin(entry, cfg->irq_2_pin)
- printk("-> %d:%d", entry->apic, entry->pin);
- printk("\n");
+ pr_cont("-> %d:%d", entry->apic, entry->pin);
+ pr_cont("\n");
}
printk(KERN_INFO ".................................... done.\n");
@@ -1665,9 +1678,9 @@ __apicdebuginit(void) print_APIC_field(int base)
printk(KERN_DEBUG);
for (i = 0; i < 8; i++)
- printk(KERN_CONT "%08x", apic_read(base + i*0x10));
+ pr_cont("%08x", apic_read(base + i*0x10));
- printk(KERN_CONT "\n");
+ pr_cont("\n");
}
__apicdebuginit(void) print_local_APIC(void *dummy)
@@ -1769,7 +1782,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
}
}
- printk("\n");
+ pr_cont("\n");
}
__apicdebuginit(void) print_local_APICs(int maxcpu)
@@ -2065,7 +2078,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
reg_00.raw = io_apic_read(ioapic_idx, 0);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
- printk("could not set ID!\n");
+ pr_cont("could not set ID!\n");
else
apic_printk(APIC_VERBOSE, " ok.\n");
}
@@ -2210,71 +2223,6 @@ void send_cleanup_vector(struct irq_cfg *cfg)
cfg->move_in_progress = 0;
}
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
- int apic, pin;
- struct irq_pin_list *entry;
- u8 vector = cfg->vector;
-
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- unsigned int reg;
-
- apic = entry->apic;
- pin = entry->pin;
- /*
- * With interrupt-remapping, destination information comes
- * from interrupt-remapping table entry.
- */
- if (!irq_remapped(cfg))
- io_apic_write(apic, 0x11 + pin*2, dest);
- reg = io_apic_read(apic, 0x10 + pin*2);
- reg &= ~IO_APIC_REDIR_VECTOR_MASK;
- reg |= vector;
- io_apic_modify(apic, 0x10 + pin*2, reg);
- }
-}
-
-/*
- * Either sets data->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves data->affinity untouched.
- */
-int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- unsigned int *dest_id)
-{
- struct irq_cfg *cfg = data->chip_data;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return -1;
-
- if (assign_irq_vector(data->irq, data->chip_data, mask))
- return -1;
-
- cpumask_copy(data->affinity, mask);
-
- *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
- return 0;
-}
-
-static int
-ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
-{
- unsigned int dest, irq = data->irq;
- unsigned long flags;
- int ret;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- ret = __ioapic_set_affinity(data, mask, &dest);
- if (!ret) {
- /* Only the high 8 bits are valid. */
- dest = SET_APIC_LOGICAL_ID(dest);
- __target_IO_APIC_irq(irq, dest, data->chip_data);
- }
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
- return ret;
-}
-
asmlinkage void smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
@@ -2362,6 +2310,87 @@ void irq_force_complete_move(int irq)
static inline void irq_complete_move(struct irq_cfg *cfg) { }
#endif
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+ int apic, pin;
+ struct irq_pin_list *entry;
+ u8 vector = cfg->vector;
+
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
+ unsigned int reg;
+
+ apic = entry->apic;
+ pin = entry->pin;
+ /*
+ * With interrupt-remapping, destination information comes
+ * from interrupt-remapping table entry.
+ */
+ if (!irq_remapped(cfg))
+ io_apic_write(apic, 0x11 + pin*2, dest);
+ reg = io_apic_read(apic, 0x10 + pin*2);
+ reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+ reg |= vector;
+ io_apic_modify(apic, 0x10 + pin*2, reg);
+ }
+}
+
+/*
+ * Either sets data->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
+ * leaves data->affinity untouched.
+ */
+int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ unsigned int *dest_id)
+{
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int irq = data->irq;
+ int err;
+
+ if (!config_enabled(CONFIG_SMP))
+ return -1;
+
+ if (!cpumask_intersects(mask, cpu_online_mask))
+ return -EINVAL;
+
+ err = assign_irq_vector(irq, cfg, mask);
+ if (err)
+ return err;
+
+ err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
+ if (err) {
+ if (assign_irq_vector(irq, cfg, data->affinity))
+ pr_err("Failed to recover vector for irq %d\n", irq);
+ return err;
+ }
+
+ cpumask_copy(data->affinity, mask);
+
+ return 0;
+}
+
+static int
+ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
+{
+ unsigned int dest, irq = data->irq;
+ unsigned long flags;
+ int ret;
+
+ if (!config_enabled(CONFIG_SMP))
+ return -1;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ ret = __ioapic_set_affinity(data, mask, &dest);
+ if (!ret) {
+ /* Only the high 8 bits are valid. */
+ dest = SET_APIC_LOGICAL_ID(dest);
+ __target_IO_APIC_irq(irq, dest, data->chip_data);
+ ret = IRQ_SET_MASK_OK_NOCOPY;
+ }
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ return ret;
+}
+
static void ack_apic_edge(struct irq_data *data)
{
irq_complete_move(data->chip_data);
@@ -2541,9 +2570,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
chip->irq_ack = ir_ack_apic_edge;
chip->irq_eoi = ir_ack_apic_level;
-#ifdef CONFIG_SMP
chip->irq_set_affinity = set_remapped_irq_affinity;
-#endif
}
#endif /* CONFIG_IRQ_REMAP */
@@ -2554,9 +2581,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_unmask = unmask_ioapic_irq,
.irq_ack = ack_apic_edge,
.irq_eoi = ack_apic_level,
-#ifdef CONFIG_SMP
.irq_set_affinity = ioapic_set_affinity,
-#endif
.irq_retrigger = ioapic_retrigger_irq,
};
@@ -3038,7 +3063,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
if (err)
return err;
- dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+ err = apic->cpu_mask_to_apicid_and(cfg->domain,
+ apic->target_cpus(), &dest);
+ if (err)
+ return err;
if (irq_remapped(cfg)) {
compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
@@ -3072,7 +3100,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
return err;
}
-#ifdef CONFIG_SMP
static int
msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
@@ -3092,9 +3119,8 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
__write_msi_msg(data->msi_desc, &msg);
- return 0;
+ return IRQ_SET_MASK_OK_NOCOPY;
}
-#endif /* CONFIG_SMP */
/*
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
@@ -3105,9 +3131,7 @@ static struct irq_chip msi_chip = {
.irq_unmask = unmask_msi_irq,
.irq_mask = mask_msi_irq,
.irq_ack = ack_apic_edge,
-#ifdef CONFIG_SMP
.irq_set_affinity = msi_set_affinity,
-#endif
.irq_retrigger = ioapic_retrigger_irq,
};
@@ -3192,7 +3216,6 @@ void native_teardown_msi_irq(unsigned int irq)
}
#ifdef CONFIG_DMAR_TABLE
-#ifdef CONFIG_SMP
static int
dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
@@ -3214,19 +3237,15 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
dmar_msi_write(irq, &msg);
- return 0;
+ return IRQ_SET_MASK_OK_NOCOPY;
}
-#endif /* CONFIG_SMP */
-
static struct irq_chip dmar_msi_type = {
.name = "DMAR_MSI",
.irq_unmask = dmar_msi_unmask,
.irq_mask = dmar_msi_mask,
.irq_ack = ack_apic_edge,
-#ifdef CONFIG_SMP
.irq_set_affinity = dmar_msi_set_affinity,
-#endif
.irq_retrigger = ioapic_retrigger_irq,
};
@@ -3247,7 +3266,6 @@ int arch_setup_dmar_msi(unsigned int irq)
#ifdef CONFIG_HPET_TIMER
-#ifdef CONFIG_SMP
static int hpet_msi_set_affinity(struct irq_data *data,
const struct cpumask *mask, bool force)
{
@@ -3267,19 +3285,15 @@ static int hpet_msi_set_affinity(struct irq_data *data,
hpet_msi_write(data->handler_data, &msg);
- return 0;
+ return IRQ_SET_MASK_OK_NOCOPY;
}
-#endif /* CONFIG_SMP */
-
static struct irq_chip hpet_msi_type = {
.name = "HPET_MSI",
.irq_unmask = hpet_msi_unmask,
.irq_mask = hpet_msi_mask,
.irq_ack = ack_apic_edge,
-#ifdef CONFIG_SMP
.irq_set_affinity = hpet_msi_set_affinity,
-#endif
.irq_retrigger = ioapic_retrigger_irq,
};
@@ -3314,8 +3328,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
*/
#ifdef CONFIG_HT_IRQ
-#ifdef CONFIG_SMP
-
static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
{
struct ht_irq_msg msg;
@@ -3340,25 +3352,23 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
return -1;
target_ht_irq(data->irq, dest, cfg->vector);
- return 0;
+ return IRQ_SET_MASK_OK_NOCOPY;
}
-#endif
-
static struct irq_chip ht_irq_chip = {
.name = "PCI-HT",
.irq_mask = mask_ht_irq,
.irq_unmask = unmask_ht_irq,
.irq_ack = ack_apic_edge,
-#ifdef CONFIG_SMP
.irq_set_affinity = ht_set_affinity,
-#endif
.irq_retrigger = ioapic_retrigger_irq,
};
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
struct irq_cfg *cfg;
+ struct ht_irq_msg msg;
+ unsigned dest;
int err;
if (disable_apic)
@@ -3366,36 +3376,37 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, apic->target_cpus());
- if (!err) {
- struct ht_irq_msg msg;
- unsigned dest;
+ if (err)
+ return err;
+
+ err = apic->cpu_mask_to_apicid_and(cfg->domain,
+ apic->target_cpus(), &dest);
+ if (err)
+ return err;
- dest = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus());
+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
- msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+ msg.address_lo =
+ HT_IRQ_LOW_BASE |
+ HT_IRQ_LOW_DEST_ID(dest) |
+ HT_IRQ_LOW_VECTOR(cfg->vector) |
+ ((apic->irq_dest_mode == 0) ?
+ HT_IRQ_LOW_DM_PHYSICAL :
+ HT_IRQ_LOW_DM_LOGICAL) |
+ HT_IRQ_LOW_RQEOI_EDGE |
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
+ HT_IRQ_LOW_MT_FIXED :
+ HT_IRQ_LOW_MT_ARBITRATED) |
+ HT_IRQ_LOW_IRQ_MASKED;
- msg.address_lo =
- HT_IRQ_LOW_BASE |
- HT_IRQ_LOW_DEST_ID(dest) |
- HT_IRQ_LOW_VECTOR(cfg->vector) |
- ((apic->irq_dest_mode == 0) ?
- HT_IRQ_LOW_DM_PHYSICAL :
- HT_IRQ_LOW_DM_LOGICAL) |
- HT_IRQ_LOW_RQEOI_EDGE |
- ((apic->irq_delivery_mode != dest_LowestPrio) ?
- HT_IRQ_LOW_MT_FIXED :
- HT_IRQ_LOW_MT_ARBITRATED) |
- HT_IRQ_LOW_IRQ_MASKED;
+ write_ht_irq_msg(irq, &msg);
- write_ht_irq_msg(irq, &msg);
+ irq_set_chip_and_handler_name(irq, &ht_irq_chip,
+ handle_edge_irq, "edge");
- irq_set_chip_and_handler_name(irq, &ht_irq_chip,
- handle_edge_irq, "edge");
+ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
- dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
- }
- return err;
+ return 0;
}
#endif /* CONFIG_HT_IRQ */
@@ -3563,7 +3574,8 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id)
/* Sanity check */
if (reg_00.bits.ID != apic_id) {
- printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+ pr_err("IOAPIC[%d]: Unable to change apic_id!\n",
+ ioapic);
return -1;
}
}
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index f00a68cca37..d661ee95cab 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -406,16 +406,13 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid)
* We use physical apicids here, not logical, so just return the default
* physical broadcast to stop people from breaking us
*/
-static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- return 0x0F;
-}
-
-static inline unsigned int
+static int
numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
+ const struct cpumask *andmask,
+ unsigned int *apicid)
{
- return 0x0F;
+ *apicid = 0x0F;
+ return 0;
}
/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
@@ -441,20 +438,6 @@ static int probe_numaq(void)
return found_numaq;
}
-static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- cpumask_clear(retmask);
- cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
static void numaq_setup_portio_remap(void)
{
int num_quads = num_online_nodes();
@@ -491,7 +474,7 @@ static struct apic __refdata apic_numaq = {
.check_apicid_used = numaq_check_apicid_used,
.check_apicid_present = numaq_check_apicid_present,
- .vector_allocation_domain = numaq_vector_allocation_domain,
+ .vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = numaq_init_apic_ldr,
.ioapic_phys_id_map = numaq_ioapic_phys_id_map,
@@ -509,7 +492,6 @@ static struct apic __refdata apic_numaq = {
.set_apic_id = NULL,
.apic_id_mask = 0x0F << 24,
- .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and,
.send_IPI_mask = numaq_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1b291da09e6..eb35ef9ee63 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -66,21 +66,6 @@ static void setup_apic_flat_routing(void)
#endif
}
-static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- /*
- * Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- cpumask_clear(retmask);
- cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
/* should be called last. */
static int probe_default(void)
{
@@ -105,7 +90,7 @@ static struct apic apic_default = {
.check_apicid_used = default_check_apicid_used,
.check_apicid_present = default_check_apicid_present,
- .vector_allocation_domain = default_vector_allocation_domain,
+ .vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = default_init_apic_ldr,
.ioapic_phys_id_map = default_ioapic_phys_id_map,
@@ -123,8 +108,7 @@ static struct apic apic_default = {
.set_apic_id = NULL,
.apic_id_mask = 0x0F << 24,
- .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
.send_IPI_mask = default_send_IPI_mask_logical,
.send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
@@ -208,6 +192,9 @@ void __init default_setup_apic_routing(void)
if (apic->setup_apic_routing)
apic->setup_apic_routing();
+
+ if (x86_platform.apic_post_init)
+ x86_platform.apic_post_init();
}
void __init generic_apic_probe(void)
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 3fe98669892..1793dba7a74 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -23,11 +23,6 @@
#include <asm/ipi.h>
#include <asm/setup.h>
-static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
-{
- return hard_smp_processor_id() >> index_msb;
-}
-
/*
* Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
*/
@@ -48,10 +43,8 @@ void __init default_setup_apic_routing(void)
}
}
- if (is_vsmp_box()) {
- /* need to update phys_pkg_id */
- apic->phys_pkg_id = apicid_phys_pkg_id;
- }
+ if (x86_platform.apic_post_init)
+ x86_platform.apic_post_init();
}
/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 659897c0075..77c95c0e1bf 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -26,6 +26,8 @@
*
*/
+#define pr_fmt(fmt) "summit: %s: " fmt, __func__
+
#include <linux/mm.h>
#include <linux/init.h>
#include <asm/io.h>
@@ -235,8 +237,8 @@ static int summit_apic_id_registered(void)
static void summit_setup_apic_routing(void)
{
- printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
- nr_ioapics);
+ pr_info("Enabling APIC mode: Summit. Using %d I/O APICs\n",
+ nr_ioapics);
}
static int summit_cpu_present_to_apicid(int mps_cpu)
@@ -263,43 +265,48 @@ static int summit_check_phys_apicid_present(int physical_apicid)
return 1;
}
-static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static inline int
+summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
{
unsigned int round = 0;
- int cpu, apicid = 0;
+ unsigned int cpu, apicid = 0;
/*
* The cpus in the mask must all be on the apic cluster.
*/
- for_each_cpu(cpu, cpumask) {
+ for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
- printk("%s: Not a valid mask!\n", __func__);
- return BAD_APICID;
+ pr_err("Not a valid mask!\n");
+ return -EINVAL;
}
apicid |= new_apicid;
round++;
}
- return apicid;
+ if (!round)
+ return -EINVAL;
+ *dest_id = apicid;
+ return 0;
}
-static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
- const struct cpumask *andmask)
+static int
+summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
+ const struct cpumask *andmask,
+ unsigned int *apicid)
{
- int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
cpumask_var_t cpumask;
+ *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
- return apicid;
+ return 0;
cpumask_and(cpumask, inmask, andmask);
- cpumask_and(cpumask, cpumask, cpu_online_mask);
- apicid = summit_cpu_mask_to_apicid(cpumask);
+ summit_cpu_mask_to_apicid(cpumask, apicid);
free_cpumask_var(cpumask);
- return apicid;
+ return 0;
}
/*
@@ -320,20 +327,6 @@ static int probe_summit(void)
return 0;
}
-static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- /* Careful. Some cpus do not strictly honor the set of cpus
- * specified in the interrupt destination when using lowest
- * priority interrupt delivery mode.
- *
- * In particular there was a hyperthreading cpu observed to
- * deliver interrupts to the wrong hyperthread when only one
- * hyperthread was specified in the interrupt desitination.
- */
- cpumask_clear(retmask);
- cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
-}
-
#ifdef CONFIG_X86_SUMMIT_NUMA
static struct rio_table_hdr *rio_table_hdr;
static struct scal_detail *scal_devs[MAX_NUMNODES];
@@ -355,7 +348,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
}
}
if (i == rio_table_hdr->num_rio_dev) {
- printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
+ pr_err("Couldn't find owner Cyclone for Winnipeg!\n");
return last_bus;
}
@@ -366,7 +359,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
}
}
if (i == rio_table_hdr->num_scal_dev) {
- printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
+ pr_err("Couldn't find owner Twister for Cyclone!\n");
return last_bus;
}
@@ -396,7 +389,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
num_buses = 9;
break;
default:
- printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
+ pr_info("Unsupported Winnipeg type!\n");
return last_bus;
}
@@ -411,13 +404,15 @@ static int build_detail_arrays(void)
int i, scal_detail_size, rio_detail_size;
if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
- printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+ pr_warn("MAX_NUMNODES too low! Defined as %d, but system has %d nodes\n",
+ MAX_NUMNODES, rio_table_hdr->num_scal_dev);
return 0;
}
switch (rio_table_hdr->version) {
default:
- printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
+ pr_warn("Invalid Rio Grande Table Version: %d\n",
+ rio_table_hdr->version);
return 0;
case 2:
scal_detail_size = 11;
@@ -462,7 +457,7 @@ void setup_summit(void)
offset = *((unsigned short *)(ptr + offset));
}
if (!rio_table_hdr) {
- printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
+ pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n");
return;
}
@@ -509,7 +504,7 @@ static struct apic apic_summit = {
.check_apicid_used = summit_check_apicid_used,
.check_apicid_present = summit_check_apicid_present,
- .vector_allocation_domain = summit_vector_allocation_domain,
+ .vector_allocation_domain = flat_vector_allocation_domain,
.init_apic_ldr = summit_init_apic_ldr,
.ioapic_phys_id_map = summit_ioapic_phys_id_map,
@@ -527,7 +522,6 @@ static struct apic apic_summit = {
.set_apic_id = NULL,
.apic_id_mask = 0xFF << 24,
- .cpu_mask_to_apicid = summit_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and,
.send_IPI_mask = summit_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index ff35cff0e1a..c88baa4ff0e 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -81,7 +81,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
}
static void
- x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
{
__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
}
@@ -96,36 +96,37 @@ static void x2apic_send_IPI_all(int vector)
__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
}
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+static int
+x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+ const struct cpumask *andmask,
+ unsigned int *apicid)
{
- /*
- * We're using fixed IRQ delivery, can only return one logical APIC ID.
- * May as well be the first.
- */
- int cpu = cpumask_first(cpumask);
+ u32 dest = 0;
+ u16 cluster;
+ int i;
- if ((unsigned)cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_logical_apicid, cpu);
- else
- return BAD_APICID;
-}
+ for_each_cpu_and(i, cpumask, andmask) {
+ if (!cpumask_test_cpu(i, cpu_online_mask))
+ continue;
+ dest = per_cpu(x86_cpu_to_logical_apicid, i);
+ cluster = x2apic_cluster(i);
+ break;
+ }
-static unsigned int
-x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- int cpu;
+ if (!dest)
+ return -EINVAL;
- /*
- * We're using fixed IRQ delivery, can only return one logical APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask) {
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
+ for_each_cpu_and(i, cpumask, andmask) {
+ if (!cpumask_test_cpu(i, cpu_online_mask))
+ continue;
+ if (cluster != x2apic_cluster(i))
+ continue;
+ dest |= per_cpu(x86_cpu_to_logical_apicid, i);
}
- return per_cpu(x86_cpu_to_logical_apicid, cpu);
+ *apicid = dest;
+
+ return 0;
}
static void init_x2apic_ldr(void)
@@ -208,6 +209,32 @@ static int x2apic_cluster_probe(void)
return 0;
}
+static const struct cpumask *x2apic_cluster_target_cpus(void)
+{
+ return cpu_all_mask;
+}
+
+/*
+ * Each x2apic cluster is an allocation domain.
+ */
+static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
+ const struct cpumask *mask)
+{
+ /*
+ * To minimize vector pressure, default case of boot, device bringup
+ * etc will use a single cpu for the interrupt destination.
+ *
+ * On explicit migration requests coming from irqbalance etc,
+ * interrupts will be routed to the x2apic cluster (cluster-id
+ * derived from the first cpu in the mask) members specified
+ * in the mask.
+ */
+ if (mask == x2apic_cluster_target_cpus())
+ cpumask_copy(retmask, cpumask_of(cpu));
+ else
+ cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
+}
+
static struct apic apic_x2apic_cluster = {
.name = "cluster x2apic",
@@ -219,13 +246,13 @@ static struct apic apic_x2apic_cluster = {
.irq_delivery_mode = dest_LowestPrio,
.irq_dest_mode = 1, /* logical */
- .target_cpus = x2apic_target_cpus,
+ .target_cpus = x2apic_cluster_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
.check_apicid_present = NULL,
- .vector_allocation_domain = x2apic_vector_allocation_domain,
+ .vector_allocation_domain = cluster_vector_allocation_domain,
.init_apic_ldr = init_x2apic_ldr,
.ioapic_phys_id_map = NULL,
@@ -243,7 +270,6 @@ static struct apic apic_x2apic_cluster = {
.set_apic_id = x2apic_set_apic_id,
.apic_id_mask = 0xFFFFFFFFu,
- .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
.send_IPI_mask = x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index c17e982db27..e03a1e180e8 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -76,38 +76,6 @@ static void x2apic_send_IPI_all(int vector)
__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
}
-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- int cpu = cpumask_first(cpumask);
-
- if ((unsigned)cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu);
- else
- return BAD_APICID;
-}
-
-static unsigned int
-x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
-{
- int cpu;
-
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- for_each_cpu_and(cpu, cpumask, andmask) {
- if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
- }
-
- return per_cpu(x86_cpu_to_apicid, cpu);
-}
-
static void init_x2apic_ldr(void)
{
}
@@ -131,13 +99,13 @@ static struct apic apic_x2apic_phys = {
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* physical */
- .target_cpus = x2apic_target_cpus,
+ .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = 0,
.check_apicid_used = NULL,
.check_apicid_present = NULL,
- .vector_allocation_domain = x2apic_vector_allocation_domain,
+ .vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = init_x2apic_ldr,
.ioapic_phys_id_map = NULL,
@@ -155,8 +123,7 @@ static struct apic apic_x2apic_phys = {
.set_apic_id = x2apic_set_apic_id,
.apic_id_mask = 0xFFFFFFFFu,
- .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
- .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
.send_IPI_mask = x2apic_send_IPI_mask,
.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index c6d03f7a440..8cfade9510a 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -185,17 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
unsigned long sn_rtc_cycles_per_second;
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
-static const struct cpumask *uv_target_cpus(void)
-{
- return cpu_online_mask;
-}
-
-static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
-{
- cpumask_clear(retmask);
- cpumask_set_cpu(cpu, retmask);
-}
-
static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
#ifdef CONFIG_SMP
@@ -280,25 +269,12 @@ static void uv_init_apic_ldr(void)
{
}
-static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
-{
- /*
- * We're using fixed IRQ delivery, can only return one phys APIC ID.
- * May as well be the first.
- */
- int cpu = cpumask_first(cpumask);
-
- if ((unsigned)cpu < nr_cpu_ids)
- return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
- else
- return BAD_APICID;
-}
-
-static unsigned int
+static int
uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
- const struct cpumask *andmask)
+ const struct cpumask *andmask,
+ unsigned int *apicid)
{
- int cpu;
+ int unsigned cpu;
/*
* We're using fixed IRQ delivery, can only return one phys APIC ID.
@@ -308,7 +284,13 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
if (cpumask_test_cpu(cpu, cpu_online_mask))
break;
}
- return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
+
+ if (likely(cpu < nr_cpu_ids)) {
+ *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
+ return 0;
+ }
+
+ return -EINVAL;
}
static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -362,13 +344,13 @@ static struct apic __refdata apic_x2apic_uv_x = {
.irq_delivery_mode = dest_Fixed,
.irq_dest_mode = 0, /* physical */
- .target_cpus = uv_target_cpus,
+ .target_cpus = online_target_cpus,
.disable_esr = 0,
.dest_logical = APIC_DEST_LOGICAL,
.check_apicid_used = NULL,
.check_apicid_present = NULL,
- .vector_allocation_domain = uv_vector_allocation_domain,
+ .vector_allocation_domain = default_vector_allocation_domain,
.init_apic_ldr = uv_init_apic_ldr,
.ioapic_phys_id_map = NULL,
@@ -386,7 +368,6 @@ static struct apic __refdata apic_x2apic_uv_x = {
.set_apic_id = set_apic_id,
.apic_id_mask = 0xFFFFFFFFu,
- .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
.send_IPI_mask = uv_send_IPI_mask,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 07b0c0db466..d65464e4350 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -201,6 +201,8 @@
* http://www.microsoft.com/whdc/archive/amp_12.mspx]
*/
+#define pr_fmt(fmt) "apm: " fmt
+
#include <linux/module.h>
#include <linux/poll.h>
@@ -485,11 +487,11 @@ static void apm_error(char *str, int err)
if (error_table[i].key == err)
break;
if (i < ERROR_COUNT)
- printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
+ pr_notice("%s: %s\n", str, error_table[i].msg);
else if (err < 0)
- printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err);
+ pr_notice("%s: linux error code %i\n", str, err);
else
- printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
+ pr_notice("%s: unknown error code %#2.2x\n",
str, err);
}
@@ -1184,7 +1186,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender)
static int notified;
if (notified++ == 0)
- printk(KERN_ERR "apm: an event queue overflowed\n");
+ pr_err("an event queue overflowed\n");
if (++as->event_tail >= APM_MAX_EVENTS)
as->event_tail = 0;
}
@@ -1447,7 +1449,7 @@ static void apm_mainloop(void)
static int check_apm_user(struct apm_user *as, const char *func)
{
if (as == NULL || as->magic != APM_BIOS_MAGIC) {
- printk(KERN_ERR "apm: %s passed bad filp\n", func);
+ pr_err("%s passed bad filp\n", func);
return 1;
}
return 0;
@@ -1586,7 +1588,7 @@ static int do_release(struct inode *inode, struct file *filp)
as1 = as1->next)
;
if (as1 == NULL)
- printk(KERN_ERR "apm: filp not in user list\n");
+ pr_err("filp not in user list\n");
else
as1->next = as->next;
}
@@ -1600,11 +1602,9 @@ static int do_open(struct inode *inode, struct file *filp)
struct apm_user *as;
as = kmalloc(sizeof(*as), GFP_KERNEL);
- if (as == NULL) {
- printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
- sizeof(*as));
+ if (as == NULL)
return -ENOMEM;
- }
+
as->magic = APM_BIOS_MAGIC;
as->event_tail = as->event_head = 0;
as->suspends_pending = as->standbys_pending = 0;
@@ -2313,16 +2313,16 @@ static int __init apm_init(void)
}
if (apm_info.disabled) {
- printk(KERN_NOTICE "apm: disabled on user request.\n");
+ pr_notice("disabled on user request.\n");
return -ENODEV;
}
if ((num_online_cpus() > 1) && !power_off && !smp) {
- printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
+ pr_notice("disabled - APM is not SMP safe.\n");
apm_info.disabled = 1;
return -ENODEV;
}
if (!acpi_disabled) {
- printk(KERN_NOTICE "apm: overridden by ACPI.\n");
+ pr_notice("overridden by ACPI.\n");
apm_info.disabled = 1;
return -ENODEV;
}
@@ -2356,8 +2356,7 @@ static int __init apm_init(void)
kapmd_task = kthread_create(apm, NULL, "kapmd");
if (IS_ERR(kapmd_task)) {
- printk(KERN_ERR "apm: disabled - Unable to start kernel "
- "thread.\n");
+ pr_err("disabled - Unable to start kernel thread\n");
err = PTR_ERR(kapmd_task);
kapmd_task = NULL;
remove_proc_entry("apm", NULL);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6ab6aa2fdfd..d30a6a9a012 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp)
obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y += proc.o capflags.o powerflags.o common.o
-obj-y += vmware.o hypervisor.o sched.o mshyperv.o
+obj-y += vmware.o hypervisor.o mshyperv.o
obj-y += rdrand.o
obj-y += match.o
@@ -32,7 +32,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
+obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o
+obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
+obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
endif
obj-$(CONFIG_X86_MCE) += mcheck/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 146bb6218ee..9d92e19039f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -19,6 +19,39 @@
#include "cpu.h"
+static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
+{
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ u32 gprs[8] = { 0 };
+ int err;
+
+ WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__);
+
+ gprs[1] = msr;
+ gprs[7] = 0x9c5a203a;
+
+ err = rdmsr_safe_regs(gprs);
+
+ *p = gprs[0] | ((u64)gprs[2] << 32);
+
+ return err;
+}
+
+static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
+{
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ u32 gprs[8] = { 0 };
+
+ WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__);
+
+ gprs[0] = (u32)val;
+ gprs[1] = msr;
+ gprs[2] = val >> 32;
+ gprs[7] = 0x9c5a203a;
+
+ return wrmsr_safe_regs(gprs);
+}
+
#ifdef CONFIG_X86_32
/*
* B step AMD K6 before B 9730xxxx have hardware bugs that can cause
@@ -586,9 +619,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
!cpu_has(c, X86_FEATURE_TOPOEXT)) {
u64 val;
- if (!rdmsrl_amd_safe(0xc0011005, &val)) {
+ if (!rdmsrl_safe(0xc0011005, &val)) {
val |= 1ULL << 54;
- wrmsrl_amd_safe(0xc0011005, val);
+ wrmsrl_safe(0xc0011005, val);
rdmsrl(0xc0011005, val);
if (val & (1ULL << 54)) {
set_cpu_cap(c, X86_FEATURE_TOPOEXT);
@@ -679,7 +712,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask);
if (err == 0) {
mask |= (1 << 10);
- checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask);
+ wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask);
}
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 46674fbb62b..c97bb7b5a9f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -55,8 +55,8 @@ static void __init check_fpu(void)
if (!boot_cpu_data.hard_math) {
#ifndef CONFIG_MATH_EMULATION
- printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
- printk(KERN_EMERG "Giving up.\n");
+ pr_emerg("No coprocessor found and no math emulation present\n");
+ pr_emerg("Giving up\n");
for (;;) ;
#endif
return;
@@ -86,7 +86,7 @@ static void __init check_fpu(void)
boot_cpu_data.fdiv_bug = fdiv_bug;
if (boot_cpu_data.fdiv_bug)
- printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n");
+ pr_warn("Hmm, FPU with FDIV bug\n");
}
static void __init check_hlt(void)
@@ -94,16 +94,16 @@ static void __init check_hlt(void)
if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
return;
- printk(KERN_INFO "Checking 'hlt' instruction... ");
+ pr_info("Checking 'hlt' instruction... ");
if (!boot_cpu_data.hlt_works_ok) {
- printk("disabled\n");
+ pr_cont("disabled\n");
return;
}
halt();
halt();
halt();
halt();
- printk(KERN_CONT "OK.\n");
+ pr_cont("OK\n");
}
/*
@@ -116,7 +116,7 @@ static void __init check_popad(void)
#ifndef CONFIG_X86_POPAD_OK
int res, inp = (int) &res;
- printk(KERN_INFO "Checking for popad bug... ");
+ pr_info("Checking for popad bug... ");
__asm__ __volatile__(
"movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
: "=&a" (res)
@@ -127,9 +127,9 @@ static void __init check_popad(void)
* CPU hard. Too bad.
*/
if (res != 12345678)
- printk(KERN_CONT "Buggy.\n");
+ pr_cont("Buggy\n");
else
- printk(KERN_CONT "OK.\n");
+ pr_cont("OK\n");
#endif
}
@@ -161,7 +161,7 @@ void __init check_bugs(void)
{
identify_boot_cpu();
#ifndef CONFIG_SMP
- printk(KERN_INFO "CPU: ");
+ pr_info("CPU: ");
print_cpu_info(&boot_cpu_data);
#endif
check_config();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6b9333b429b..46d8786d655 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -452,6 +452,35 @@ void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
c->x86_cache_size = l2size;
}
+u16 __read_mostly tlb_lli_4k[NR_INFO];
+u16 __read_mostly tlb_lli_2m[NR_INFO];
+u16 __read_mostly tlb_lli_4m[NR_INFO];
+u16 __read_mostly tlb_lld_4k[NR_INFO];
+u16 __read_mostly tlb_lld_2m[NR_INFO];
+u16 __read_mostly tlb_lld_4m[NR_INFO];
+
+/*
+ * tlb_flushall_shift shows the balance point in replacing cr3 write
+ * with multiple 'invlpg'. It will do this replacement when
+ * flush_tlb_lines <= active_lines/2^tlb_flushall_shift.
+ * If tlb_flushall_shift is -1, means the replacement will be disabled.
+ */
+s8 __read_mostly tlb_flushall_shift = -1;
+
+void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
+{
+ if (this_cpu->c_detect_tlb)
+ this_cpu->c_detect_tlb(c);
+
+ printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
+ "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
+ "tlb_flushall_shift is 0x%x\n",
+ tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
+ tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
+ tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
+ tlb_flushall_shift);
+}
+
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_HT
@@ -911,6 +940,8 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
+ if (boot_cpu_data.cpuid_level >= 2)
+ cpu_detect_tlb(&boot_cpu_data);
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -947,7 +978,7 @@ static void __cpuinit __print_cpu_msr(void)
index_max = msr_range_array[i].max;
for (index = index_min; index < index_max; index++) {
- if (rdmsrl_amd_safe(index, &val))
+ if (rdmsrl_safe(index, &val))
continue;
printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 8bacc7826fb..4041c24ae7d 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -20,10 +20,19 @@ struct cpu_dev {
void (*c_bsp_init)(struct cpuinfo_x86 *);
void (*c_init)(struct cpuinfo_x86 *);
void (*c_identify)(struct cpuinfo_x86 *);
+ void (*c_detect_tlb)(struct cpuinfo_x86 *);
unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
int c_x86_vendor;
};
+struct _tlb_table {
+ unsigned char descriptor;
+ char tlb_type;
+ unsigned int entries;
+ /* unsigned int ways; */
+ char info[128];
+};
+
#define cpu_dev_register(cpu_devX) \
static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
__attribute__((__section__(".x86_cpu_dev.init"))) = \
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 755f64fb074..a8f8fa9769d 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -37,6 +37,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
#endif
&x86_hyper_vmware,
&x86_hyper_ms_hyperv,
+#ifdef CONFIG_KVM_GUEST
+ &x86_hyper_kvm,
+#endif
};
const struct hypervisor_x86 *x86_hyper;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3e6ff6cbf42..0a4ce2980a5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -491,6 +491,181 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
}
#endif
+#define TLB_INST_4K 0x01
+#define TLB_INST_4M 0x02
+#define TLB_INST_2M_4M 0x03
+
+#define TLB_INST_ALL 0x05
+#define TLB_INST_1G 0x06
+
+#define TLB_DATA_4K 0x11
+#define TLB_DATA_4M 0x12
+#define TLB_DATA_2M_4M 0x13
+#define TLB_DATA_4K_4M 0x14
+
+#define TLB_DATA_1G 0x16
+
+#define TLB_DATA0_4K 0x21
+#define TLB_DATA0_4M 0x22
+#define TLB_DATA0_2M_4M 0x23
+
+#define STLB_4K 0x41
+
+static const struct _tlb_table intel_tlb_table[] __cpuinitconst = {
+ { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
+ { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" },
+ { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" },
+ { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" },
+ { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" },
+ { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" },
+ { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" },
+ { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
+ { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
+ { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
+ { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" },
+ { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" },
+ { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" },
+ { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" },
+ { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" },
+ { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" },
+ { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" },
+ { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" },
+ { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" },
+ { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" },
+ { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" },
+ { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" },
+ { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" },
+ { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
+ { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
+ { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" },
+ { 0x00, 0, 0 }
+};
+
+static void __cpuinit intel_tlb_lookup(const unsigned char desc)
+{
+ unsigned char k;
+ if (desc == 0)
+ return;
+
+ /* look up this descriptor in the table */
+ for (k = 0; intel_tlb_table[k].descriptor != desc && \
+ intel_tlb_table[k].descriptor != 0; k++)
+ ;
+
+ if (intel_tlb_table[k].tlb_type == 0)
+ return;
+
+ switch (intel_tlb_table[k].tlb_type) {
+ case STLB_4K:
+ if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
+ if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
+ break;
+ case TLB_INST_ALL:
+ if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
+ if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
+ if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
+ break;
+ case TLB_INST_4K:
+ if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries;
+ break;
+ case TLB_INST_4M:
+ if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
+ break;
+ case TLB_INST_2M_4M:
+ if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries;
+ if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries;
+ break;
+ case TLB_DATA_4K:
+ case TLB_DATA0_4K:
+ if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
+ break;
+ case TLB_DATA_4M:
+ case TLB_DATA0_4M:
+ if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
+ break;
+ case TLB_DATA_2M_4M:
+ case TLB_DATA0_2M_4M:
+ if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries;
+ if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
+ break;
+ case TLB_DATA_4K_4M:
+ if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries;
+ if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
+ tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
+ break;
+ }
+}
+
+static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has_invlpg) {
+ tlb_flushall_shift = -1;
+ return;
+ }
+ switch ((c->x86 << 8) + c->x86_model) {
+ case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+ case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+ case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+ case 0x61d: /* six-core 45 nm xeon "Dunnington" */
+ tlb_flushall_shift = -1;
+ break;
+ case 0x61a: /* 45 nm nehalem, "Bloomfield" */
+ case 0x61e: /* 45 nm nehalem, "Lynnfield" */
+ case 0x625: /* 32 nm nehalem, "Clarkdale" */
+ case 0x62c: /* 32 nm nehalem, "Gulftown" */
+ case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
+ case 0x62f: /* 32 nm Xeon E7 */
+ tlb_flushall_shift = 6;
+ break;
+ case 0x62a: /* SandyBridge */
+ case 0x62d: /* SandyBridge, "Romely-EP" */
+ tlb_flushall_shift = 5;
+ break;
+ case 0x63a: /* Ivybridge */
+ tlb_flushall_shift = 1;
+ break;
+ default:
+ tlb_flushall_shift = 6;
+ }
+}
+
+static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
+{
+ int i, j, n;
+ unsigned int regs[4];
+ unsigned char *desc = (unsigned char *)regs;
+ /* Number of times to iterate */
+ n = cpuid_eax(2) & 0xFF;
+
+ for (i = 0 ; i < n ; i++) {
+ cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
+
+ /* If bit 31 is set, this is an unknown format */
+ for (j = 0 ; j < 3 ; j++)
+ if (regs[j] & (1 << 31))
+ regs[j] = 0;
+
+ /* Byte 0 is level count, not a descriptor */
+ for (j = 1 ; j < 16 ; j++)
+ intel_tlb_lookup(desc[j]);
+ }
+ intel_tlb_flushall_shift_set(c);
+}
+
static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
.c_vendor = "Intel",
.c_ident = { "GenuineIntel" },
@@ -546,6 +721,7 @@ static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
},
.c_size_cache = intel_size_cache,
#endif
+ .c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
.c_init = init_intel,
.c_x86_vendor = X86_VENDOR_INTEL,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 413c2ced887..13017626f9a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -55,13 +55,6 @@ static struct severity {
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
-#define MCACOD 0xffff
-/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
-#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
-#define MCACOD_SCRUBMSK 0xfff0
-#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
-#define MCACOD_DATA 0x0134 /* Data Load */
-#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
MCESEV(
NO, "Invalid",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index da27c5d2168..292d0258311 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -7,6 +7,9 @@
* Copyright 2008 Intel Corporation
* Author: Andi Kleen
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/thread_info.h>
#include <linux/capability.h>
#include <linux/miscdevice.h>
@@ -57,8 +60,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
int mce_disabled __read_mostly;
-#define MISC_MCELOG_MINOR 227
-
#define SPINUNIT 100 /* 100ns */
atomic_t mce_entry;
@@ -102,6 +103,8 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
static DEFINE_PER_CPU(struct work_struct, mce_work);
+static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
+
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
@@ -210,7 +213,7 @@ static void drain_mcelog_buffer(void)
cpu_relax();
if (!m->finished && retries >= 4) {
- pr_err("MCE: skipping error being logged currently!\n");
+ pr_err("skipping error being logged currently!\n");
break;
}
}
@@ -649,14 +652,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
* Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them.
*/
-static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
+static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
+ struct pt_regs *regs)
{
int i, ret = 0;
for (i = 0; i < banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
- if (m->status & MCI_STATUS_VAL)
+ if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
+ if (quirk_no_way_out)
+ quirk_no_way_out(i, m, regs);
+ }
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
ret = 1;
}
@@ -1039,7 +1046,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*final = m;
memset(valid_banks, 0, sizeof(valid_banks));
- no_way_out = mce_no_way_out(&m, &msg, valid_banks);
+ no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
barrier();
@@ -1167,8 +1174,9 @@ int memory_failure(unsigned long pfn, int vector, int flags)
{
/* mce_severity() should not hand us an ACTION_REQUIRED error */
BUG_ON(flags & MF_ACTION_REQUIRED);
- printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
- "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);
+ pr_err("Uncorrected memory error in page 0x%lx ignored\n"
+ "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n",
+ pfn);
return 0;
}
@@ -1186,6 +1194,7 @@ void mce_notify_process(void)
{
unsigned long pfn;
struct mce_info *mi = mce_find_info();
+ int flags = MF_ACTION_REQUIRED;
if (!mi)
mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
@@ -1200,8 +1209,9 @@ void mce_notify_process(void)
* doomed. We still need to mark the page as poisoned and alert any
* other users of the page.
*/
- if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 ||
- mi->restartable == 0) {
+ if (!mi->restartable)
+ flags |= MF_MUST_KILL;
+ if (memory_failure(pfn, MCE_VECTOR, flags) < 0) {
pr_err("Memory error not recovered");
force_sig(SIGBUS, current);
}
@@ -1358,11 +1368,10 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
b = cap & MCG_BANKCNT_MASK;
if (!banks)
- printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
+ pr_info("CPU supports %d MCE banks\n", b);
if (b > MAX_NR_BANKS) {
- printk(KERN_WARNING
- "MCE: Using only %u machine check banks out of %u\n",
+ pr_warn("Using only %u machine check banks out of %u\n",
MAX_NR_BANKS, b);
b = MAX_NR_BANKS;
}
@@ -1415,11 +1424,39 @@ static void __mcheck_cpu_init_generic(void)
}
}
+/*
+ * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
+ * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
+ * Vol 3B Table 15-20). But this confuses both the code that determines
+ * whether the machine check occurred in kernel or user mode, and also
+ * the severity assessment code. Pretend that EIPV was set, and take the
+ * ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
+ */
+static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
+{
+ if (bank != 0)
+ return;
+ if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
+ return;
+ if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
+ MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
+ MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
+ MCACOD)) !=
+ (MCI_STATUS_UC|MCI_STATUS_EN|
+ MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
+ MCI_STATUS_AR|MCACOD_INSTR))
+ return;
+
+ m->mcgstatus |= MCG_STATUS_EIPV;
+ m->ip = regs->ip;
+ m->cs = regs->cs;
+}
+
/* Add per CPU specific workarounds here */
static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
- pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
+ pr_info("unknown CPU type - not enabling MCE support\n");
return -EOPNOTSUPP;
}
@@ -1512,6 +1549,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
mce_bootlog = 0;
+
+ if (c->x86 == 6 && c->x86_model == 45)
+ quirk_no_way_out = quirk_sandybridge_ifu;
}
if (monarch_timeout < 0)
monarch_timeout = 0;
@@ -1574,7 +1614,7 @@ static void __mcheck_cpu_init_timer(void)
/* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
{
- printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+ pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
smp_processor_id());
}
@@ -1893,8 +1933,7 @@ static int __init mcheck_enable(char *str)
get_option(&str, &monarch_timeout);
}
} else {
- printk(KERN_INFO "mce argument %s ignored. Please use /sys\n",
- str);
+ pr_info("mce argument %s ignored. Please use /sys\n", str);
return 0;
}
return 1;
@@ -2342,7 +2381,7 @@ static __init int mcheck_init_device(void)
return err;
}
-device_initcall(mcheck_init_device);
+device_initcall_sync(mcheck_init_device);
/*
* Old style boot options parsing. Only for compatibility.
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f4873a64f46..c4e916d7737 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -1,15 +1,17 @@
/*
- * (c) 2005, 2006 Advanced Micro Devices, Inc.
+ * (c) 2005-2012 Advanced Micro Devices, Inc.
* Your use of this code is subject to the terms and conditions of the
* GNU general public license version 2. See "COPYING" or
* http://www.gnu.org/licenses/gpl.html
*
* Written by Jacob Shin - AMD, Inc.
*
- * Support : jacob.shin@amd.com
+ * Support: borislav.petkov@amd.com
*
* April 2006
* - added support for AMD Family 0x10 processors
+ * May 2012
+ * - major scrubbing
*
* All MC4_MISCi registers are shared between multi-cores
*/
@@ -25,6 +27,7 @@
#include <linux/cpu.h>
#include <linux/smp.h>
+#include <asm/amd_nb.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
@@ -45,23 +48,15 @@
#define MASK_BLKPTR_LO 0xFF000000
#define MCG_XBLK_ADDR 0xC0000400
-struct threshold_block {
- unsigned int block;
- unsigned int bank;
- unsigned int cpu;
- u32 address;
- u16 interrupt_enable;
- bool interrupt_capable;
- u16 threshold_limit;
- struct kobject kobj;
- struct list_head miscj;
+static const char * const th_names[] = {
+ "load_store",
+ "insn_fetch",
+ "combined_unit",
+ "",
+ "northbridge",
+ "execution_unit",
};
-struct threshold_bank {
- struct kobject *kobj;
- struct threshold_block *blocks;
- cpumask_var_t cpus;
-};
static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
static unsigned char shared_bank[NR_BANKS] = {
@@ -84,6 +79,26 @@ struct thresh_restart {
u16 old_limit;
};
+static const char * const bank4_names(struct threshold_block *b)
+{
+ switch (b->address) {
+ /* MSR4_MISC0 */
+ case 0x00000413:
+ return "dram";
+
+ case 0xc0000408:
+ return "ht_links";
+
+ case 0xc0000409:
+ return "l3_cache";
+
+ default:
+ WARN(1, "Funny MSR: 0x%08x\n", b->address);
+ return "";
+ }
+};
+
+
static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
{
/*
@@ -224,8 +239,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (!block)
per_cpu(bank_map, cpu) |= (1 << bank);
- if (shared_bank[bank] && c->cpu_core_id)
- break;
memset(&b, 0, sizeof(b));
b.cpu = cpu;
@@ -326,7 +339,7 @@ struct threshold_attr {
#define SHOW_FIELDS(name) \
static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
{ \
- return sprintf(buf, "%lx\n", (unsigned long) b->name); \
+ return sprintf(buf, "%lu\n", (unsigned long) b->name); \
}
SHOW_FIELDS(interrupt_enable)
SHOW_FIELDS(threshold_limit)
@@ -377,38 +390,21 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
return size;
}
-struct threshold_block_cross_cpu {
- struct threshold_block *tb;
- long retval;
-};
-
-static void local_error_count_handler(void *_tbcc)
-{
- struct threshold_block_cross_cpu *tbcc = _tbcc;
- struct threshold_block *b = tbcc->tb;
- u32 low, high;
-
- rdmsr(b->address, low, high);
- tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
-}
-
static ssize_t show_error_count(struct threshold_block *b, char *buf)
{
- struct threshold_block_cross_cpu tbcc = { .tb = b, };
+ u32 lo, hi;
- smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1);
- return sprintf(buf, "%lx\n", tbcc.retval);
-}
-
-static ssize_t store_error_count(struct threshold_block *b,
- const char *buf, size_t count)
-{
- struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
+ rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
- smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
- return 1;
+ return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
+ (THRESHOLD_MAX - b->threshold_limit)));
}
+static struct threshold_attr error_count = {
+ .attr = {.name = __stringify(error_count), .mode = 0444 },
+ .show = show_error_count,
+};
+
#define RW_ATTR(val) \
static struct threshold_attr val = { \
.attr = {.name = __stringify(val), .mode = 0644 }, \
@@ -418,7 +414,6 @@ static struct threshold_attr val = { \
RW_ATTR(interrupt_enable);
RW_ATTR(threshold_limit);
-RW_ATTR(error_count);
static struct attribute *default_attrs[] = {
&threshold_limit.attr,
@@ -517,7 +512,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
err = kobject_init_and_add(&b->kobj, &threshold_ktype,
per_cpu(threshold_banks, cpu)[bank]->kobj,
- "misc%i", block);
+ (bank == 4 ? bank4_names(b) : th_names[bank]));
if (err)
goto out_free;
recurse:
@@ -548,98 +543,91 @@ out_free:
return err;
}
-static __cpuinit long
-local_allocate_threshold_blocks(int cpu, unsigned int bank)
+static __cpuinit int __threshold_add_blocks(struct threshold_bank *b)
{
- return allocate_threshold_blocks(cpu, bank, 0,
- MSR_IA32_MC0_MISC + bank * 4);
+ struct list_head *head = &b->blocks->miscj;
+ struct threshold_block *pos = NULL;
+ struct threshold_block *tmp = NULL;
+ int err = 0;
+
+ err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
+ if (err)
+ return err;
+
+ list_for_each_entry_safe(pos, tmp, head, miscj) {
+
+ err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
+ if (err) {
+ list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
+ kobject_del(&pos->kobj);
+
+ return err;
+ }
+ }
+ return err;
}
-/* symlinks sibling shared banks to first core. first core owns dir/files. */
static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
{
- int i, err = 0;
- struct threshold_bank *b = NULL;
struct device *dev = per_cpu(mce_device, cpu);
- char name[32];
+ struct amd_northbridge *nb = NULL;
+ struct threshold_bank *b = NULL;
+ const char *name = th_names[bank];
+ int err = 0;
- sprintf(name, "threshold_bank%i", bank);
+ if (shared_bank[bank]) {
-#ifdef CONFIG_SMP
- if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
- i = cpumask_first(cpu_llc_shared_mask(cpu));
+ nb = node_to_amd_nb(amd_get_nb_id(cpu));
+ WARN_ON(!nb);
- /* first core not up yet */
- if (cpu_data(i).cpu_core_id)
- goto out;
+ /* threshold descriptor already initialized on this node? */
+ if (nb->bank4) {
+ /* yes, use it */
+ b = nb->bank4;
+ err = kobject_add(b->kobj, &dev->kobj, name);
+ if (err)
+ goto out;
- /* already linked */
- if (per_cpu(threshold_banks, cpu)[bank])
- goto out;
+ per_cpu(threshold_banks, cpu)[bank] = b;
+ atomic_inc(&b->cpus);
- b = per_cpu(threshold_banks, i)[bank];
+ err = __threshold_add_blocks(b);
- if (!b)
goto out;
-
- err = sysfs_create_link(&dev->kobj, b->kobj, name);
- if (err)
- goto out;
-
- cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
- per_cpu(threshold_banks, cpu)[bank] = b;
-
- goto out;
+ }
}
-#endif
b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
if (!b) {
err = -ENOMEM;
goto out;
}
- if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
- kfree(b);
- err = -ENOMEM;
- goto out;
- }
b->kobj = kobject_create_and_add(name, &dev->kobj);
- if (!b->kobj)
+ if (!b->kobj) {
+ err = -EINVAL;
goto out_free;
-
-#ifndef CONFIG_SMP
- cpumask_setall(b->cpus);
-#else
- cpumask_set_cpu(cpu, b->cpus);
-#endif
+ }
per_cpu(threshold_banks, cpu)[bank] = b;
- err = local_allocate_threshold_blocks(cpu, bank);
- if (err)
- goto out_free;
-
- for_each_cpu(i, b->cpus) {
- if (i == cpu)
- continue;
-
- dev = per_cpu(mce_device, i);
- if (dev)
- err = sysfs_create_link(&dev->kobj,b->kobj, name);
- if (err)
- goto out;
+ if (shared_bank[bank]) {
+ atomic_set(&b->cpus, 1);
- per_cpu(threshold_banks, i)[bank] = b;
+ /* nb is already initialized, see above */
+ WARN_ON(nb->bank4);
+ nb->bank4 = b;
}
- goto out;
+ err = allocate_threshold_blocks(cpu, bank, 0,
+ MSR_IA32_MC0_MISC + bank * 4);
+ if (!err)
+ goto out;
-out_free:
- per_cpu(threshold_banks, cpu)[bank] = NULL;
- free_cpumask_var(b->cpus);
+ out_free:
kfree(b);
-out:
+
+ out:
return err;
}
@@ -660,12 +648,6 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
return err;
}
-/*
- * let's be hotplug friendly.
- * in case of multiple core processors, the first core always takes ownership
- * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
- */
-
static void deallocate_threshold_block(unsigned int cpu,
unsigned int bank)
{
@@ -686,41 +668,42 @@ static void deallocate_threshold_block(unsigned int cpu,
per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
}
+static void __threshold_remove_blocks(struct threshold_bank *b)
+{
+ struct threshold_block *pos = NULL;
+ struct threshold_block *tmp = NULL;
+
+ kobject_del(b->kobj);
+
+ list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
+ kobject_del(&pos->kobj);
+}
+
static void threshold_remove_bank(unsigned int cpu, int bank)
{
+ struct amd_northbridge *nb;
struct threshold_bank *b;
- struct device *dev;
- char name[32];
- int i = 0;
b = per_cpu(threshold_banks, cpu)[bank];
if (!b)
return;
+
if (!b->blocks)
goto free_out;
- sprintf(name, "threshold_bank%i", bank);
-
-#ifdef CONFIG_SMP
- /* sibling symlink */
- if (shared_bank[bank] && b->blocks->cpu != cpu) {
- dev = per_cpu(mce_device, cpu);
- sysfs_remove_link(&dev->kobj, name);
- per_cpu(threshold_banks, cpu)[bank] = NULL;
-
- return;
- }
-#endif
-
- /* remove all sibling symlinks before unregistering */
- for_each_cpu(i, b->cpus) {
- if (i == cpu)
- continue;
-
- dev = per_cpu(mce_device, i);
- if (dev)
- sysfs_remove_link(&dev->kobj, name);
- per_cpu(threshold_banks, i)[bank] = NULL;
+ if (shared_bank[bank]) {
+ if (!atomic_dec_and_test(&b->cpus)) {
+ __threshold_remove_blocks(b);
+ per_cpu(threshold_banks, cpu)[bank] = NULL;
+ return;
+ } else {
+ /*
+ * the last CPU on this node using the shared bank is
+ * going away, remove that bank now.
+ */
+ nb = node_to_amd_nb(amd_get_nb_id(cpu));
+ nb->bank4 = NULL;
+ }
}
deallocate_threshold_block(cpu, bank);
@@ -728,7 +711,6 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
free_out:
kobject_del(b->kobj);
kobject_put(b->kobj);
- free_cpumask_var(b->cpus);
kfree(b);
per_cpu(threshold_banks, cpu)[bank] = NULL;
}
@@ -777,4 +759,24 @@ static __init int threshold_init_device(void)
return 0;
}
-device_initcall(threshold_init_device);
+/*
+ * there are 3 funcs which need to be _initcalled in a logic sequence:
+ * 1. xen_late_init_mcelog
+ * 2. mcheck_init_device
+ * 3. threshold_init_device
+ *
+ * xen_late_init_mcelog must register xen_mce_chrdev_device before
+ * native mce_chrdev_device registration if running under xen platform;
+ *
+ * mcheck_init_device should be inited before threshold_init_device to
+ * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
+ *
+ * so we use following _initcalls
+ * 1. device_initcall(xen_late_init_mcelog);
+ * 2. device_initcall_sync(mcheck_init_device);
+ * 3. late_initcall(threshold_init_device);
+ *
+ * when running under xen, the initcall order is 1,2,3;
+ * on baremetal, we skip 1 and we do only 2 and 3.
+ */
+late_initcall(threshold_init_device);
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
index dfea390e160..c7b3fe2d72e 100644
--- a/arch/x86/kernel/cpu/mkcapflags.pl
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/perl -w
#
# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
#
@@ -11,22 +11,35 @@ open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
print OUT "#include <asm/cpufeature.h>\n\n";
print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
+%features = ();
+$err = 0;
+
while (defined($line = <IN>)) {
if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
$macro = $1;
- $feature = $2;
+ $feature = "\L$2";
$tail = $3;
if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
- $feature = $1;
+ $feature = "\L$1";
}
- if ($feature ne '') {
- printf OUT "\t%-32s = \"%s\",\n",
- "[$macro]", "\L$feature";
+ next if ($feature eq '');
+
+ if ($features{$feature}++) {
+ print STDERR "$in: duplicate feature name: $feature\n";
+ $err++;
}
+ printf OUT "\t%-32s = \"%s\",\n", "[$macro]", $feature;
}
}
print OUT "};\n";
close(IN);
close(OUT);
+
+if ($err) {
+ unlink($out);
+ exit(1);
+}
+
+exit(0);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index bdda2e6c673..35ffda5d072 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -258,11 +258,11 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
/* Compute the maximum size with which we can make a range: */
if (range_startk)
- max_align = ffs(range_startk) - 1;
+ max_align = __ffs(range_startk);
else
- max_align = 32;
+ max_align = BITS_PER_LONG - 1;
- align = fls(range_sizek) - 1;
+ align = __fls(range_sizek);
if (align > max_align)
align = max_align;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 75772ae6c65..e9fe907cd24 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -361,11 +361,7 @@ static void __init print_mtrr_state(void)
}
pr_debug("MTRR variable ranges %sabled:\n",
mtrr_state.enabled & 2 ? "en" : "dis");
- if (size_or_mask & 0xffffffffUL)
- high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
- else
- high_width = ffs(size_or_mask>>32) + 32 - 1;
- high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
+ high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c4706cf9c01..915b876edd1 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -32,20 +32,11 @@
#include <asm/smp.h>
#include <asm/alternative.h>
#include <asm/timer.h>
+#include <asm/desc.h>
+#include <asm/ldt.h>
#include "perf_event.h"
-#if 0
-#undef wrmsrl
-#define wrmsrl(msr, val) \
-do { \
- trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
- (unsigned long)(val)); \
- native_write_msr((msr), (u32)((u64)(val)), \
- (u32)((u64)(val) >> 32)); \
-} while (0)
-#endif
-
struct x86_pmu x86_pmu __read_mostly;
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
@@ -74,7 +65,7 @@ u64 x86_perf_event_update(struct perf_event *event)
int idx = hwc->idx;
s64 delta;
- if (idx == X86_PMC_IDX_FIXED_BTS)
+ if (idx == INTEL_PMC_IDX_FIXED_BTS)
return 0;
/*
@@ -86,7 +77,7 @@ u64 x86_perf_event_update(struct perf_event *event)
*/
again:
prev_raw_count = local64_read(&hwc->prev_count);
- rdmsrl(hwc->event_base, new_raw_count);
+ rdpmcl(hwc->event_base_rdpmc, new_raw_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -189,7 +180,7 @@ static void release_pmc_hardware(void) {}
static bool check_hw_exists(void)
{
- u64 val, val_new = 0;
+ u64 val, val_new = ~0;
int i, reg, ret = 0;
/*
@@ -222,8 +213,9 @@ static bool check_hw_exists(void)
* that don't trap on the MSR access and always return 0s.
*/
val = 0xabcdUL;
- ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
- ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
+ reg = x86_pmu_event_addr(0);
+ ret = wrmsrl_safe(reg, val);
+ ret |= rdmsrl_safe(reg, &val_new);
if (ret || val != val_new)
goto msr_fail;
@@ -240,6 +232,7 @@ bios_fail:
msr_fail:
printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+ printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new);
return false;
}
@@ -388,7 +381,7 @@ int x86_pmu_hw_config(struct perf_event *event)
int precise = 0;
/* Support for constant skid */
- if (x86_pmu.pebs_active) {
+ if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
precise++;
/* Support for IP fixup */
@@ -637,8 +630,8 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
c = sched->constraints[sched->state.event];
/* Prefer fixed purpose counters */
- if (x86_pmu.num_counters_fixed) {
- idx = X86_PMC_IDX_FIXED;
+ if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
+ idx = INTEL_PMC_IDX_FIXED;
for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
if (!__test_and_set_bit(idx, sched->state.used))
goto done;
@@ -646,7 +639,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
}
/* Grab the first unused counter starting with idx */
idx = sched->state.counter;
- for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
+ for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
if (!__test_and_set_bit(idx, sched->state.used))
goto done;
}
@@ -704,8 +697,8 @@ static bool perf_sched_next_event(struct perf_sched *sched)
/*
* Assign a counter for each event.
*/
-static int perf_assign_events(struct event_constraint **constraints, int n,
- int wmin, int wmax, int *assign)
+int perf_assign_events(struct event_constraint **constraints, int n,
+ int wmin, int wmax, int *assign)
{
struct perf_sched sched;
@@ -824,15 +817,17 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->last_cpu = smp_processor_id();
hwc->last_tag = ++cpuc->tags[i];
- if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
+ if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
hwc->config_base = 0;
hwc->event_base = 0;
- } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
+ } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
+ hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
+ hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
} else {
hwc->config_base = x86_pmu_config_addr(hwc->idx);
hwc->event_base = x86_pmu_event_addr(hwc->idx);
+ hwc->event_base_rdpmc = hwc->idx;
}
}
@@ -930,7 +925,7 @@ int x86_perf_event_set_period(struct perf_event *event)
s64 period = hwc->sample_period;
int ret = 0, idx = hwc->idx;
- if (idx == X86_PMC_IDX_FIXED_BTS)
+ if (idx == INTEL_PMC_IDX_FIXED_BTS)
return 0;
/*
@@ -1316,7 +1311,6 @@ static struct attribute_group x86_pmu_format_group = {
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
- struct event_constraint *c;
int err;
pr_info("Performance Events: ");
@@ -1347,21 +1341,8 @@ static int __init init_hw_perf_events(void)
for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
quirk->func();
- if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
- WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
- x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
- x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
- }
- x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
-
- if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
- WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
- x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
- x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
- }
-
- x86_pmu.intel_ctrl |=
- ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
+ if (!x86_pmu.intel_ctrl)
+ x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
perf_events_lapic_init();
register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
@@ -1370,22 +1351,6 @@ static int __init init_hw_perf_events(void)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
0, x86_pmu.num_counters, 0);
- if (x86_pmu.event_constraints) {
- /*
- * event on fixed counter2 (REF_CYCLES) only works on this
- * counter, so do not extend mask to generic counters
- */
- for_each_event_constraint(c, x86_pmu.event_constraints) {
- if (c->cmask != X86_RAW_EVENT_MASK
- || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
- continue;
- }
-
- c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
- c->weight += x86_pmu.num_counters;
- }
- }
-
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
@@ -1620,8 +1585,8 @@ static int x86_pmu_event_idx(struct perf_event *event)
if (!x86_pmu.attr_rdpmc)
return 0;
- if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
- idx -= X86_PMC_IDX_FIXED;
+ if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
+ idx -= INTEL_PMC_IDX_FIXED;
idx |= 1 << 30;
}
@@ -1649,7 +1614,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- unsigned long val = simple_strtoul(buf, NULL, 0);
+ unsigned long val;
+ ssize_t ret;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret)
+ return ret;
if (!!val != !!x86_pmu.attr_rdpmc) {
x86_pmu.attr_rdpmc = !!val;
@@ -1682,13 +1652,20 @@ static void x86_pmu_flush_branch_stack(void)
x86_pmu.flush_branch_stack();
}
+void perf_check_microcode(void)
+{
+ if (x86_pmu.check_microcode)
+ x86_pmu.check_microcode();
+}
+EXPORT_SYMBOL_GPL(perf_check_microcode);
+
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
- .attr_groups = x86_pmu_attr_groups,
+ .attr_groups = x86_pmu_attr_groups,
- .event_init = x86_pmu_event_init,
+ .event_init = x86_pmu_event_init,
.add = x86_pmu_add,
.del = x86_pmu_del,
@@ -1696,11 +1673,11 @@ static struct pmu pmu = {
.stop = x86_pmu_stop,
.read = x86_pmu_read,
- .start_txn = x86_pmu_start_txn,
- .cancel_txn = x86_pmu_cancel_txn,
- .commit_txn = x86_pmu_commit_txn,
+ .start_txn = x86_pmu_start_txn,
+ .cancel_txn = x86_pmu_cancel_txn,
+ .commit_txn = x86_pmu_commit_txn,
- .event_idx = x86_pmu_event_idx,
+ .event_idx = x86_pmu_event_idx,
.flush_branch_stack = x86_pmu_flush_branch_stack,
};
@@ -1763,6 +1740,29 @@ valid_user_frame(const void __user *fp, unsigned long size)
return (__range_not_ok(fp, size, TASK_SIZE) == 0);
}
+static unsigned long get_segment_base(unsigned int segment)
+{
+ struct desc_struct *desc;
+ int idx = segment >> 3;
+
+ if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+ if (idx > LDT_ENTRIES)
+ return 0;
+
+ if (idx > current->active_mm->context.size)
+ return 0;
+
+ desc = current->active_mm->context.ldt;
+ } else {
+ if (idx > GDT_ENTRIES)
+ return 0;
+
+ desc = __this_cpu_ptr(&gdt_page.gdt[0]);
+ }
+
+ return get_desc_base(desc + idx);
+}
+
#ifdef CONFIG_COMPAT
#include <asm/compat.h>
@@ -1771,13 +1771,17 @@ static inline int
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
/* 32-bit process in 64-bit kernel. */
+ unsigned long ss_base, cs_base;
struct stack_frame_ia32 frame;
const void __user *fp;
if (!test_thread_flag(TIF_IA32))
return 0;
- fp = compat_ptr(regs->bp);
+ cs_base = get_segment_base(regs->cs);
+ ss_base = get_segment_base(regs->ss);
+
+ fp = compat_ptr(ss_base + regs->bp);
while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = 0;
@@ -1790,8 +1794,8 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
if (!valid_user_frame(fp, sizeof(frame)))
break;
- perf_callchain_store(entry, frame.return_address);
- fp = compat_ptr(frame.next_frame);
+ perf_callchain_store(entry, cs_base + frame.return_address);
+ fp = compat_ptr(ss_base + frame.next_frame);
}
return 1;
}
@@ -1814,6 +1818,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
return;
}
+ /*
+ * We don't know what to do with VM86 stacks.. ignore them for now.
+ */
+ if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
+ return;
+
fp = (void __user *)regs->bp;
perf_callchain_store(entry, regs->ip);
@@ -1841,16 +1851,50 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
}
}
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+/*
+ * Deal with code segment offsets for the various execution modes:
+ *
+ * VM86 - the good olde 16 bit days, where the linear address is
+ * 20 bits and we use regs->ip + 0x10 * regs->cs.
+ *
+ * IA32 - Where we need to look at GDT/LDT segment descriptor tables
+ * to figure out what the 32bit base address is.
+ *
+ * X32 - has TIF_X32 set, but is running in x86_64
+ *
+ * X86_64 - CS,DS,SS,ES are all zero based.
+ */
+static unsigned long code_segment_base(struct pt_regs *regs)
{
- unsigned long ip;
+ /*
+ * If we are in VM86 mode, add the segment offset to convert to a
+ * linear address.
+ */
+ if (regs->flags & X86_VM_MASK)
+ return 0x10 * regs->cs;
+
+ /*
+ * For IA32 we look at the GDT/LDT segment base to convert the
+ * effective IP to a linear address.
+ */
+#ifdef CONFIG_X86_32
+ if (user_mode(regs) && regs->cs != __USER_CS)
+ return get_segment_base(regs->cs);
+#else
+ if (test_thread_flag(TIF_IA32)) {
+ if (user_mode(regs) && regs->cs != __USER32_CS)
+ return get_segment_base(regs->cs);
+ }
+#endif
+ return 0;
+}
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
- ip = perf_guest_cbs->get_guest_ip();
- else
- ip = instruction_pointer(regs);
+ return perf_guest_cbs->get_guest_ip();
- return ip;
+ return regs->ip + code_segment_base(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 7241e2fc3c1..6605a81ba33 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -14,6 +14,18 @@
#include <linux/perf_event.h>
+#if 0
+#undef wrmsrl
+#define wrmsrl(msr, val) \
+do { \
+ unsigned int _msr = (msr); \
+ u64 _val = (val); \
+ trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr), \
+ (unsigned long long)(_val)); \
+ native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32)); \
+} while (0)
+#endif
+
/*
* | NHM/WSM | SNB |
* register -------------------------------
@@ -57,7 +69,7 @@ struct amd_nb {
};
/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS 4
+#define MAX_PEBS_EVENTS 8
/*
* A debug store configuration.
@@ -349,6 +361,8 @@ struct x86_pmu {
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu);
+
+ void (*check_microcode)(void);
void (*flush_branch_stack)(void);
/*
@@ -360,12 +374,16 @@ struct x86_pmu {
/*
* Intel DebugStore bits
*/
- int bts, pebs;
- int bts_active, pebs_active;
+ unsigned int bts :1,
+ bts_active :1,
+ pebs :1,
+ pebs_active :1,
+ pebs_broken :1;
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
+ int max_pebs_events;
/*
* Intel LBR
@@ -468,6 +486,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
void x86_pmu_enable_all(int added);
+int perf_assign_events(struct event_constraint **constraints, int n,
+ int wmin, int wmax, int *assign);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
void x86_pmu_stop(struct perf_event *event, int flags);
@@ -496,6 +516,26 @@ static inline bool kernel_ip(unsigned long ip)
#endif
}
+/*
+ * Not all PMUs provide the right context information to place the reported IP
+ * into full context. Specifically segment registers are typically not
+ * supplied.
+ *
+ * Assuming the address is a linear address (it is for IBS), we fake the CS and
+ * vm86 mode using the known zero-based code segment and 'fix up' the registers
+ * to reflect this.
+ *
+ * Intel PEBS/LBR appear to typically provide the effective address, nothing
+ * much we can do about that but pray and treat it like a linear address.
+ */
+static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
+{
+ regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
+ if (regs->flags & X86_VM_MASK)
+ regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
+ regs->ip = ip;
+}
+
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 11a4eb9131d..4528ae7b6ec 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -366,7 +366,7 @@ static void amd_pmu_cpu_starting(int cpu)
cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
- if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15)
+ if (boot_cpu_data.x86_max_cores < 2)
return;
nb_id = amd_get_nb_id(cpu);
@@ -422,35 +422,6 @@ static struct attribute *amd_format_attr[] = {
NULL,
};
-static __initconst const struct x86_pmu amd_pmu = {
- .name = "AMD",
- .handle_irq = x86_pmu_handle_irq,
- .disable_all = x86_pmu_disable_all,
- .enable_all = x86_pmu_enable_all,
- .enable = x86_pmu_enable_event,
- .disable = x86_pmu_disable_event,
- .hw_config = amd_pmu_hw_config,
- .schedule_events = x86_schedule_events,
- .eventsel = MSR_K7_EVNTSEL0,
- .perfctr = MSR_K7_PERFCTR0,
- .event_map = amd_pmu_event_map,
- .max_events = ARRAY_SIZE(amd_perfmon_event_map),
- .num_counters = AMD64_NUM_COUNTERS,
- .cntval_bits = 48,
- .cntval_mask = (1ULL << 48) - 1,
- .apic = 1,
- /* use highest bit to detect overflow */
- .max_period = (1ULL << 47) - 1,
- .get_event_constraints = amd_get_event_constraints,
- .put_event_constraints = amd_put_event_constraints,
-
- .format_attrs = amd_format_attr,
-
- .cpu_prepare = amd_pmu_cpu_prepare,
- .cpu_starting = amd_pmu_cpu_starting,
- .cpu_dead = amd_pmu_cpu_dead,
-};
-
/* AMD Family 15h */
#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
@@ -597,8 +568,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
}
}
-static __initconst const struct x86_pmu amd_pmu_f15h = {
- .name = "AMD Family 15h",
+static __initconst const struct x86_pmu amd_pmu = {
+ .name = "AMD",
.handle_irq = x86_pmu_handle_irq,
.disable_all = x86_pmu_disable_all,
.enable_all = x86_pmu_enable_all,
@@ -606,50 +577,68 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
.disable = x86_pmu_disable_event,
.hw_config = amd_pmu_hw_config,
.schedule_events = x86_schedule_events,
- .eventsel = MSR_F15H_PERF_CTL,
- .perfctr = MSR_F15H_PERF_CTR,
+ .eventsel = MSR_K7_EVNTSEL0,
+ .perfctr = MSR_K7_PERFCTR0,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
- .num_counters = AMD64_NUM_COUNTERS_F15H,
+ .num_counters = AMD64_NUM_COUNTERS,
.cntval_bits = 48,
.cntval_mask = (1ULL << 48) - 1,
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
- .get_event_constraints = amd_get_event_constraints_f15h,
- /* nortbridge counters not yet implemented: */
-#if 0
+ .get_event_constraints = amd_get_event_constraints,
.put_event_constraints = amd_put_event_constraints,
+ .format_attrs = amd_format_attr,
+
.cpu_prepare = amd_pmu_cpu_prepare,
- .cpu_dead = amd_pmu_cpu_dead,
-#endif
.cpu_starting = amd_pmu_cpu_starting,
- .format_attrs = amd_format_attr,
+ .cpu_dead = amd_pmu_cpu_dead,
};
+static int setup_event_constraints(void)
+{
+ if (boot_cpu_data.x86 >= 0x15)
+ x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
+ return 0;
+}
+
+static int setup_perfctr_core(void)
+{
+ if (!cpu_has_perfctr_core) {
+ WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
+ KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
+ return -ENODEV;
+ }
+
+ WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
+ KERN_ERR "hw perf events core counters need constraints handler!");
+
+ /*
+ * If core performance counter extensions exists, we must use
+ * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
+ * x86_pmu_addr_offset().
+ */
+ x86_pmu.eventsel = MSR_F15H_PERF_CTL;
+ x86_pmu.perfctr = MSR_F15H_PERF_CTR;
+ x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
+
+ printk(KERN_INFO "perf: AMD core performance counters detected\n");
+
+ return 0;
+}
+
__init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;
- /*
- * If core performance counter extensions exists, it must be
- * family 15h, otherwise fail. See x86_pmu_addr_offset().
- */
- switch (boot_cpu_data.x86) {
- case 0x15:
- if (!cpu_has_perfctr_core)
- return -ENODEV;
- x86_pmu = amd_pmu_f15h;
- break;
- default:
- if (cpu_has_perfctr_core)
- return -ENODEV;
- x86_pmu = amd_pmu;
- break;
- }
+ x86_pmu = amd_pmu;
+
+ setup_event_constraints();
+ setup_perfctr_core();
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index da9bcdcd985..7bfb5bec863 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -13,6 +13,8 @@
#include <asm/apic.h>
+#include "perf_event.h"
+
static u32 ibs_caps;
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
@@ -536,7 +538,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
regs.flags &= ~PERF_EFLAGS_EXACT;
} else {
- instruction_pointer_set(&regs, ibs_data.regs[1]);
+ set_linear_ip(&regs, ibs_data.regs[1]);
regs.flags |= PERF_EFLAGS_EXACT;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 187c294bc65..382366977d4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -5,6 +5,8 @@
* among events on a single PMU.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/init.h>
@@ -21,14 +23,14 @@
*/
static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
- [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
- [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
};
static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -136,6 +138,84 @@ static u64 intel_pmu_event_map(int hw_event)
return intel_perfmon_event_map[hw_event];
}
+#define SNB_DMND_DATA_RD (1ULL << 0)
+#define SNB_DMND_RFO (1ULL << 1)
+#define SNB_DMND_IFETCH (1ULL << 2)
+#define SNB_DMND_WB (1ULL << 3)
+#define SNB_PF_DATA_RD (1ULL << 4)
+#define SNB_PF_RFO (1ULL << 5)
+#define SNB_PF_IFETCH (1ULL << 6)
+#define SNB_LLC_DATA_RD (1ULL << 7)
+#define SNB_LLC_RFO (1ULL << 8)
+#define SNB_LLC_IFETCH (1ULL << 9)
+#define SNB_BUS_LOCKS (1ULL << 10)
+#define SNB_STRM_ST (1ULL << 11)
+#define SNB_OTHER (1ULL << 15)
+#define SNB_RESP_ANY (1ULL << 16)
+#define SNB_NO_SUPP (1ULL << 17)
+#define SNB_LLC_HITM (1ULL << 18)
+#define SNB_LLC_HITE (1ULL << 19)
+#define SNB_LLC_HITS (1ULL << 20)
+#define SNB_LLC_HITF (1ULL << 21)
+#define SNB_LOCAL (1ULL << 22)
+#define SNB_REMOTE (0xffULL << 23)
+#define SNB_SNP_NONE (1ULL << 31)
+#define SNB_SNP_NOT_NEEDED (1ULL << 32)
+#define SNB_SNP_MISS (1ULL << 33)
+#define SNB_NO_FWD (1ULL << 34)
+#define SNB_SNP_FWD (1ULL << 35)
+#define SNB_HITM (1ULL << 36)
+#define SNB_NON_DRAM (1ULL << 37)
+
+#define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
+#define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO)
+#define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
+ SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
+ SNB_HITM)
+
+#define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
+#define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY)
+
+#define SNB_L3_ACCESS SNB_RESP_ANY
+#define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 snb_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
+ [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
+ [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
+ },
+ },
+};
+
static __initconst const u64 snb_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -233,16 +313,16 @@ static __initconst const u64 snb_hw_cache_event_ids
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = -1,
- [ C(RESULT_MISS) ] = -1,
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ [ C(RESULT_MISS) ] = 0x01b7,
},
},
@@ -747,7 +827,7 @@ static void intel_pmu_disable_all(void)
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
- if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+ if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();
intel_pmu_pebs_disable_all();
@@ -763,9 +843,9 @@ static void intel_pmu_enable_all(int added)
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
- if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+ if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
struct perf_event *event =
- cpuc->events[X86_PMC_IDX_FIXED_BTS];
+ cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
if (WARN_ON_ONCE(!event))
return;
@@ -871,7 +951,7 @@ static inline void intel_pmu_ack_status(u64 ack)
static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
{
- int idx = hwc->idx - X86_PMC_IDX_FIXED;
+ int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask;
mask = 0xfULL << (idx * 4);
@@ -886,7 +966,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
+ if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
intel_pmu_disable_bts();
intel_pmu_drain_bts_buffer();
return;
@@ -915,7 +995,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
{
- int idx = hwc->idx - X86_PMC_IDX_FIXED;
+ int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, bits, mask;
/*
@@ -949,7 +1029,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
+ if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
if (!__this_cpu_read(cpu_hw_events.enabled))
return;
@@ -1000,14 +1080,14 @@ static void intel_pmu_reset(void)
local_irq_save(flags);
- printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+ pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
- checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
+ wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
+ wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
- checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+ wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
if (ds)
ds->bts_index = ds->bts_buffer_base;
@@ -1707,16 +1787,61 @@ static __init void intel_clovertown_quirk(void)
* But taken together it might just make sense to not enable PEBS on
* these chips.
*/
- printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
+ pr_warn("PEBS disabled due to CPU errata\n");
x86_pmu.pebs = 0;
x86_pmu.pebs_constraints = NULL;
}
+static int intel_snb_pebs_broken(int cpu)
+{
+ u32 rev = UINT_MAX; /* default to broken for unknown models */
+
+ switch (cpu_data(cpu).x86_model) {
+ case 42: /* SNB */
+ rev = 0x28;
+ break;
+
+ case 45: /* SNB-EP */
+ switch (cpu_data(cpu).x86_mask) {
+ case 6: rev = 0x618; break;
+ case 7: rev = 0x70c; break;
+ }
+ }
+
+ return (cpu_data(cpu).microcode < rev);
+}
+
+static void intel_snb_check_microcode(void)
+{
+ int pebs_broken = 0;
+ int cpu;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ if ((pebs_broken = intel_snb_pebs_broken(cpu)))
+ break;
+ }
+ put_online_cpus();
+
+ if (pebs_broken == x86_pmu.pebs_broken)
+ return;
+
+ /*
+ * Serialized by the microcode lock..
+ */
+ if (x86_pmu.pebs_broken) {
+ pr_info("PEBS enabled due to microcode update\n");
+ x86_pmu.pebs_broken = 0;
+ } else {
+ pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
+ x86_pmu.pebs_broken = 1;
+ }
+}
+
static __init void intel_sandybridge_quirk(void)
{
- printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
- x86_pmu.pebs = 0;
- x86_pmu.pebs_constraints = NULL;
+ x86_pmu.check_microcode = intel_snb_check_microcode;
+ intel_snb_check_microcode();
}
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
@@ -1736,8 +1861,8 @@ static __init void intel_arch_events_quirk(void)
/* disable event that reported as not presend by cpuid */
for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
- printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n",
- intel_arch_events_map[bit].name);
+ pr_warn("CPUID marked event: \'%s\' unavailable\n",
+ intel_arch_events_map[bit].name);
}
}
@@ -1756,7 +1881,7 @@ static __init void intel_nehalem_quirk(void)
intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
ebx.split.no_branch_misses_retired = 0;
x86_pmu.events_maskl = ebx.full;
- printk(KERN_INFO "CPU erratum AAJ80 worked around\n");
+ pr_info("CPU erratum AAJ80 worked around\n");
}
}
@@ -1765,6 +1890,7 @@ __init int intel_pmu_init(void)
union cpuid10_edx edx;
union cpuid10_eax eax;
union cpuid10_ebx ebx;
+ struct event_constraint *c;
unsigned int unused;
int version;
@@ -1800,6 +1926,8 @@ __init int intel_pmu_init(void)
x86_pmu.events_maskl = ebx.full;
x86_pmu.events_mask_len = eax.split.mask_length;
+ x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
+
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
* assume at least 3 events:
@@ -1914,6 +2042,8 @@ __init int intel_pmu_init(void)
case 58: /* IvyBridge */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_snb();
@@ -1951,5 +2081,37 @@ __init int intel_pmu_init(void)
}
}
+ if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
+ WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+ x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
+ x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
+ }
+ x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+
+ if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
+ WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+ x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
+ x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
+ }
+
+ x86_pmu.intel_ctrl |=
+ ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
+
+ if (x86_pmu.event_constraints) {
+ /*
+ * event on fixed counter2 (REF_CYCLES) only works on this
+ * counter, so do not extend mask to generic counters
+ */
+ for_each_event_constraint(c, x86_pmu.event_constraints) {
+ if (c->cmask != X86_RAW_EVENT_MASK
+ || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
+ continue;
+ }
+
+ c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
+ c->weight += x86_pmu.num_counters;
+ }
+ }
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 35e2192df9f..e38d97bf425 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -248,7 +248,7 @@ void reserve_ds_buffers(void)
*/
struct event_constraint bts_constraint =
- EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
+ EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
void intel_pmu_enable_bts(u64 config)
{
@@ -295,7 +295,7 @@ int intel_pmu_drain_bts_buffer(void)
u64 to;
u64 flags;
};
- struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
+ struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
struct bts_record *at, *top;
struct perf_output_handle handle;
struct perf_event_header header;
@@ -499,7 +499,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
* We sampled a branch insn, rewind using the LBR stack
*/
if (ip == to) {
- regs->ip = from;
+ set_linear_ip(regs, from);
return 1;
}
@@ -529,7 +529,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
} while (to < ip);
if (to == ip) {
- regs->ip = old_to;
+ set_linear_ip(regs, old_to);
return 1;
}
@@ -569,7 +569,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* A possible PERF_SAMPLE_REGS will have to transfer all regs.
*/
regs = *iregs;
- regs.ip = pebs->ip;
+ regs.flags = pebs->flags;
+ set_linear_ip(&regs, pebs->ip);
regs.bp = pebs->bp;
regs.sp = pebs->sp;
@@ -620,7 +621,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
* Should not happen, we program the threshold at 1 and do not
* set a reset value.
*/
- WARN_ON_ONCE(n > 1);
+ WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
at += n - 1;
__intel_pmu_pebs_event(event, iregs, at);
@@ -651,10 +652,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
* Should not happen, we program the threshold at 1 and do not
* set a reset value.
*/
- WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
+ WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
for ( ; at < top; at++) {
- for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
+ for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
event = cpuc->events[bit];
if (!test_bit(bit, cpuc->active_mask))
continue;
@@ -670,7 +671,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
break;
}
- if (!event || bit >= MAX_PEBS_EVENTS)
+ if (!event || bit >= x86_pmu.max_pebs_events)
continue;
__intel_pmu_pebs_event(event, iregs, at);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
new file mode 100644
index 00000000000..7563fda9f03
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -0,0 +1,2900 @@
+#include "perf_event_intel_uncore.h"
+
+static struct intel_uncore_type *empty_uncore[] = { NULL, };
+static struct intel_uncore_type **msr_uncores = empty_uncore;
+static struct intel_uncore_type **pci_uncores = empty_uncore;
+/* pci bus to socket mapping */
+static int pcibus_to_physid[256] = { [0 ... 255] = -1, };
+
+static DEFINE_RAW_SPINLOCK(uncore_box_lock);
+
+/* mask of cpus that collect uncore events */
+static cpumask_t uncore_cpu_mask;
+
+/* constraint for the fixed counter */
+static struct event_constraint constraint_fixed =
+ EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
+static struct event_constraint constraint_empty =
+ EVENT_CONSTRAINT(0, 0, 0);
+
+DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
+DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
+DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
+DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
+DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
+DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
+DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
+DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
+DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+
+static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ u64 count;
+
+ rdmsrl(event->hw.event_base, count);
+
+ return count;
+}
+
+/*
+ * generic get constraint function for shared match/mask registers.
+ */
+static struct event_constraint *
+uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ unsigned long flags;
+ bool ok = false;
+
+ /*
+ * reg->alloc can be set due to existing state, so for fake box we
+ * need to ignore this, otherwise we might fail to allocate proper
+ * fake state for this extra reg constraint.
+ */
+ if (reg1->idx == EXTRA_REG_NONE ||
+ (!uncore_box_is_fake(box) && reg1->alloc))
+ return NULL;
+
+ er = &box->shared_regs[reg1->idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (!atomic_read(&er->ref) ||
+ (er->config1 == reg1->config && er->config2 == reg2->config)) {
+ atomic_inc(&er->ref);
+ er->config1 = reg1->config;
+ er->config2 = reg2->config;
+ ok = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ if (ok) {
+ if (!uncore_box_is_fake(box))
+ reg1->alloc = 1;
+ return NULL;
+ }
+
+ return &constraint_empty;
+}
+
+static void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+
+ /*
+ * Only put constraint if extra reg was actually allocated. Also
+ * takes care of event which do not use an extra shared reg.
+ *
+ * Also, if this is a fake box we shouldn't touch any event state
+ * (reg->alloc) and we don't care about leaving inconsistent box
+ * state either since it will be thrown out.
+ */
+ if (uncore_box_is_fake(box) || !reg1->alloc)
+ return;
+
+ er = &box->shared_regs[reg1->idx];
+ atomic_dec(&er->ref);
+ reg1->alloc = 0;
+}
+
+/* Sandy Bridge-EP uncore support */
+static struct intel_uncore_type snbep_uncore_cbox;
+static struct intel_uncore_type snbep_uncore_pcu;
+
+static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+ u32 config;
+
+ pci_read_config_dword(pdev, box_ctl, &config);
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+}
+
+static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ int box_ctl = uncore_pci_box_ctl(box);
+ u32 config;
+
+ pci_read_config_dword(pdev, box_ctl, &config);
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+}
+
+static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config);
+}
+
+static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ u64 count;
+
+ pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
+ pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
+
+ return count;
+}
+
+static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = box->pci_dev;
+
+ pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ u64 config;
+ unsigned msr;
+
+ msr = uncore_msr_box_ctl(box);
+ if (msr) {
+ rdmsrl(msr, config);
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ wrmsrl(msr, config);
+ }
+}
+
+static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ u64 config;
+ unsigned msr;
+
+ msr = uncore_msr_box_ctl(box);
+ if (msr) {
+ rdmsrl(msr, config);
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ wrmsrl(msr, config);
+ }
+}
+
+static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE)
+ wrmsrl(reg1->reg, reg1->config);
+
+ wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ wrmsrl(hwc->config_base, hwc->config);
+}
+
+static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+
+ if (msr)
+ wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
+}
+
+static int snbep_uncore_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+
+ if (box->pmu->type == &snbep_uncore_cbox) {
+ reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
+ SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+ reg1->config = event->attr.config1 &
+ SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK;
+ } else {
+ if (box->pmu->type == &snbep_uncore_pcu) {
+ reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
+ reg1->config = event->attr.config1 & SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK;
+ } else {
+ return 0;
+ }
+ }
+ reg1->idx = 0;
+
+ return 0;
+}
+
+static struct attribute *snbep_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_tid_en.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_filter_tid.attr,
+ &format_attr_filter_nid.attr,
+ &format_attr_filter_state.attr,
+ &format_attr_filter_opc.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_pcu_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_occ_sel.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh5.attr,
+ &format_attr_occ_invert.attr,
+ &format_attr_occ_edge.attr,
+ &format_attr_filter_band0.attr,
+ &format_attr_filter_band1.attr,
+ &format_attr_filter_band2.attr,
+ &format_attr_filter_band3.attr,
+ NULL,
+};
+
+static struct attribute *snbep_uncore_qpi_formats_attr[] = {
+ &format_attr_event_ext.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct uncore_event_desc snbep_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
+ INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
+ { /* end: all zeroes */ },
+};
+
+static struct uncore_event_desc snbep_uncore_qpi_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"),
+ INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
+ INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x02,umask=0x08"),
+ INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x03,umask=0x04"),
+ { /* end: all zeroes */ },
+};
+
+static struct attribute_group snbep_uncore_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_ubox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_cbox_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_pcu_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_pcu_formats_attr,
+};
+
+static struct attribute_group snbep_uncore_qpi_format_group = {
+ .name = "format",
+ .attrs = snbep_uncore_qpi_formats_attr,
+};
+
+static struct intel_uncore_ops snbep_uncore_msr_ops = {
+ .init_box = snbep_uncore_msr_init_box,
+ .disable_box = snbep_uncore_msr_disable_box,
+ .enable_box = snbep_uncore_msr_enable_box,
+ .disable_event = snbep_uncore_msr_disable_event,
+ .enable_event = snbep_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+ .hw_config = snbep_uncore_hw_config,
+};
+
+static struct intel_uncore_ops snbep_uncore_pci_ops = {
+ .init_box = snbep_uncore_pci_init_box,
+ .disable_box = snbep_uncore_pci_disable_box,
+ .enable_box = snbep_uncore_pci_enable_box,
+ .disable_event = snbep_uncore_pci_disable_event,
+ .enable_event = snbep_uncore_pci_enable_event,
+ .read_counter = snbep_uncore_pci_read_counter,
+};
+
+static struct event_constraint snbep_uncore_cbox_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
+ EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snbep_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 2,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNBEP_U_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_U_MSR_PMON_CTL0,
+ .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+ .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
+ .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &snbep_uncore_ubox_format_group,
+};
+
+static struct intel_uncore_type snbep_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 8,
+ .perf_ctr_bits = 44,
+ .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
+ .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
+ .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
+ .msr_offset = SNBEP_CBO_MSR_OFFSET,
+ .num_shared_regs = 1,
+ .constraints = snbep_uncore_cbox_constraints,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &snbep_uncore_cbox_format_group,
+};
+
+static struct intel_uncore_type snbep_uncore_pcu = {
+ .name = "pcu",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
+ .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
+ .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_msr_ops,
+ .format_group = &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *snbep_msr_uncores[] = {
+ &snbep_uncore_ubox,
+ &snbep_uncore_cbox,
+ &snbep_uncore_pcu,
+ NULL,
+};
+
+#define SNBEP_UNCORE_PCI_COMMON_INIT() \
+ .perf_ctr = SNBEP_PCI_PMON_CTR0, \
+ .event_ctl = SNBEP_PCI_PMON_CTL0, \
+ .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \
+ .ops = &snbep_uncore_pci_ops, \
+ .format_group = &snbep_uncore_format_group
+
+static struct intel_uncore_type snbep_uncore_ha = {
+ .name = "ha",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_imc = {
+ .name = "imc",
+ .num_counters = 4,
+ .num_boxes = 4,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+ .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+ .event_descs = snbep_uncore_imc_events,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_qpi = {
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .ops = &snbep_uncore_pci_ops,
+ .event_descs = snbep_uncore_qpi_events,
+ .format_group = &snbep_uncore_qpi_format_group,
+};
+
+
+static struct intel_uncore_type snbep_uncore_r2pcie = {
+ .name = "r2pcie",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 44,
+ .constraints = snbep_uncore_r2pcie_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type snbep_uncore_r3qpi = {
+ .name = "r3qpi",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .perf_ctr_bits = 44,
+ .constraints = snbep_uncore_r3qpi_constraints,
+ SNBEP_UNCORE_PCI_COMMON_INIT(),
+};
+
+static struct intel_uncore_type *snbep_pci_uncores[] = {
+ &snbep_uncore_ha,
+ &snbep_uncore_imc,
+ &snbep_uncore_qpi,
+ &snbep_uncore_r2pcie,
+ &snbep_uncore_r3qpi,
+ NULL,
+};
+
+static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = {
+ { /* Home Agent */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
+ .driver_data = (unsigned long)&snbep_uncore_ha,
+ },
+ { /* MC Channel 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
+ .driver_data = (unsigned long)&snbep_uncore_imc,
+ },
+ { /* MC Channel 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
+ .driver_data = (unsigned long)&snbep_uncore_imc,
+ },
+ { /* MC Channel 2 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
+ .driver_data = (unsigned long)&snbep_uncore_imc,
+ },
+ { /* MC Channel 3 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
+ .driver_data = (unsigned long)&snbep_uncore_imc,
+ },
+ { /* QPI Port 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
+ .driver_data = (unsigned long)&snbep_uncore_qpi,
+ },
+ { /* QPI Port 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
+ .driver_data = (unsigned long)&snbep_uncore_qpi,
+ },
+ { /* P2PCIe */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
+ .driver_data = (unsigned long)&snbep_uncore_r2pcie,
+ },
+ { /* R3QPI Link 0 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
+ .driver_data = (unsigned long)&snbep_uncore_r3qpi,
+ },
+ { /* R3QPI Link 1 */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
+ .driver_data = (unsigned long)&snbep_uncore_r3qpi,
+ },
+ { /* end: all zeroes */ }
+};
+
+static struct pci_driver snbep_uncore_pci_driver = {
+ .name = "snbep_uncore",
+ .id_table = snbep_uncore_pci_ids,
+};
+
+/*
+ * build pci bus to socket mapping
+ */
+static void snbep_pci2phy_map_init(void)
+{
+ struct pci_dev *ubox_dev = NULL;
+ int i, bus, nodeid;
+ u32 config;
+
+ while (1) {
+ /* find the UBOX device */
+ ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX,
+ ubox_dev);
+ if (!ubox_dev)
+ break;
+ bus = ubox_dev->bus->number;
+ /* get the Node ID of the local register */
+ pci_read_config_dword(ubox_dev, 0x40, &config);
+ nodeid = config;
+ /* get the Node ID mapping */
+ pci_read_config_dword(ubox_dev, 0x54, &config);
+ /*
+ * every three bits in the Node ID mapping register maps
+ * to a particular node.
+ */
+ for (i = 0; i < 8; i++) {
+ if (nodeid == ((config >> (3 * i)) & 0x7)) {
+ pcibus_to_physid[bus] = i;
+ break;
+ }
+ }
+ };
+ return;
+}
+/* end of Sandy Bridge-EP uncore support */
+
+/* Sandy Bridge uncore support */
+static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+ wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+ else
+ wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
+}
+
+static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ wrmsrl(event->hw.config_base, 0);
+}
+
+static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0) {
+ wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+ }
+}
+
+static struct attribute *snb_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask5.attr,
+ NULL,
+};
+
+static struct attribute_group snb_uncore_format_group = {
+ .name = "format",
+ .attrs = snb_uncore_formats_attr,
+};
+
+static struct intel_uncore_ops snb_uncore_msr_ops = {
+ .init_box = snb_uncore_msr_init_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = snb_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct event_constraint snb_uncore_cbox_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
+ UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type snb_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 2,
+ .num_boxes = 4,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
+ .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
+ .fixed_ctr = SNB_UNC_FIXED_CTR,
+ .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
+ .single_fixed = 1,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
+ .constraints = snb_uncore_cbox_constraints,
+ .ops = &snb_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+};
+
+static struct intel_uncore_type *snb_msr_uncores[] = {
+ &snb_uncore_cbox,
+ NULL,
+};
+/* end of Sandy Bridge uncore support */
+
+/* Nehalem uncore support */
+static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
+}
+
+static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx < UNCORE_PMC_IDX_FIXED)
+ wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
+ else
+ wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
+}
+
+static struct attribute *nhm_uncore_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask8.attr,
+ NULL,
+};
+
+static struct attribute_group nhm_uncore_format_group = {
+ .name = "format",
+ .attrs = nhm_uncore_formats_attr,
+};
+
+static struct uncore_event_desc nhm_uncore_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+ INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"),
+ INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"),
+ INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhm_uncore_msr_ops = {
+ .disable_box = nhm_uncore_msr_disable_box,
+ .enable_box = nhm_uncore_msr_enable_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = nhm_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type nhm_uncore = {
+ .name = "",
+ .num_counters = 8,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .fixed_ctr_bits = 48,
+ .event_ctl = NHM_UNC_PERFEVTSEL0,
+ .perf_ctr = NHM_UNC_UNCORE_PMC0,
+ .fixed_ctr = NHM_UNC_FIXED_CTR,
+ .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL,
+ .event_mask = NHM_UNC_RAW_EVENT_MASK,
+ .event_descs = nhm_uncore_events,
+ .ops = &nhm_uncore_msr_ops,
+ .format_group = &nhm_uncore_format_group,
+};
+
+static struct intel_uncore_type *nhm_msr_uncores[] = {
+ &nhm_uncore,
+ NULL,
+};
+/* end of Nehalem uncore support */
+
+/* Nehalem-EX uncore support */
+#define __BITS_VALUE(x, i, n) ((typeof(x))(((x) >> ((i) * (n))) & \
+ ((1ULL << (n)) - 1)))
+
+DEFINE_UNCORE_FORMAT_ATTR(event5, event, "config:1-5");
+DEFINE_UNCORE_FORMAT_ATTR(counter, counter, "config:6-7");
+DEFINE_UNCORE_FORMAT_ATTR(mm_cfg, mm_cfg, "config:63");
+DEFINE_UNCORE_FORMAT_ATTR(match, match, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63");
+
+static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL);
+}
+
+static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+ u64 config;
+
+ if (msr) {
+ rdmsrl(msr, config);
+ config &= ~((1ULL << uncore_num_counters(box)) - 1);
+ /* WBox has a fixed counter */
+ if (uncore_msr_fixed_ctl(box))
+ config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN;
+ wrmsrl(msr, config);
+ }
+}
+
+static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ unsigned msr = uncore_msr_box_ctl(box);
+ u64 config;
+
+ if (msr) {
+ rdmsrl(msr, config);
+ config |= (1ULL << uncore_num_counters(box)) - 1;
+ /* WBox has a fixed counter */
+ if (uncore_msr_fixed_ctl(box))
+ config |= NHMEX_W_PMON_GLOBAL_FIXED_EN;
+ wrmsrl(msr, config);
+ }
+}
+
+static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ wrmsrl(event->hw.config_base, 0);
+}
+
+static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
+ else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+ else
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+#define NHMEX_UNCORE_OPS_COMMON_INIT() \
+ .init_box = nhmex_uncore_msr_init_box, \
+ .disable_box = nhmex_uncore_msr_disable_box, \
+ .enable_box = nhmex_uncore_msr_enable_box, \
+ .disable_event = nhmex_uncore_msr_disable_event, \
+ .read_counter = uncore_msr_read_counter
+
+static struct intel_uncore_ops nhmex_uncore_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_uncore_msr_enable_event,
+};
+
+static struct attribute *nhmex_uncore_ubox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_edge.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_ubox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_ubox_formats_attr,
+};
+
+static struct intel_uncore_type nhmex_uncore_ubox = {
+ .name = "ubox",
+ .num_counters = 1,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_U_MSR_PMON_EV_SEL,
+ .perf_ctr = NHMEX_U_MSR_PMON_CTR,
+ .event_mask = NHMEX_U_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_U_MSR_PMON_GLOBAL_CTL,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_ubox_format_group
+};
+
+static struct attribute *nhmex_uncore_cbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_cbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_cbox_formats_attr,
+};
+
+static struct intel_uncore_type nhmex_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 6,
+ .num_boxes = 8,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_C0_MSR_PMON_EV_SEL0,
+ .perf_ctr = NHMEX_C0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_C0_MSR_PMON_GLOBAL_CTL,
+ .msr_offset = NHMEX_C_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_cbox_format_group
+};
+
+static struct uncore_event_desc nhmex_uncore_wbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_type nhmex_uncore_wbox = {
+ .name = "wbox",
+ .num_counters = 4,
+ .num_boxes = 1,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_W_MSR_PMON_CNT0,
+ .perf_ctr = NHMEX_W_MSR_PMON_EVT_SEL0,
+ .fixed_ctr = NHMEX_W_MSR_PMON_FIXED_CTR,
+ .fixed_ctl = NHMEX_W_MSR_PMON_FIXED_CTL,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_W_MSR_GLOBAL_CTL,
+ .pair_ctr_ctl = 1,
+ .event_descs = nhmex_uncore_wbox_events,
+ .ops = &nhmex_uncore_ops,
+ .format_group = &nhmex_uncore_cbox_format_group
+};
+
+static int nhmex_bbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int ctr, ev_sel;
+
+ ctr = (hwc->config & NHMEX_B_PMON_CTR_MASK) >>
+ NHMEX_B_PMON_CTR_SHIFT;
+ ev_sel = (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK) >>
+ NHMEX_B_PMON_CTL_EV_SEL_SHIFT;
+
+ /* events that do not use the match/mask registers */
+ if ((ctr == 0 && ev_sel > 0x3) || (ctr == 1 && ev_sel > 0x6) ||
+ (ctr == 2 && ev_sel != 0x4) || ctr == 3)
+ return 0;
+
+ if (box->pmu->pmu_idx == 0)
+ reg1->reg = NHMEX_B0_MSR_MATCH;
+ else
+ reg1->reg = NHMEX_B1_MSR_MATCH;
+ reg1->idx = 0;
+ reg1->config = event->attr.config1;
+ reg2->config = event->attr.config2;
+ return 0;
+}
+
+static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ wrmsrl(reg1->reg, reg1->config);
+ wrmsrl(reg1->reg + 1, reg2->config);
+ }
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+ (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK));
+}
+
+/*
+ * The Bbox has 4 counters, but each counter monitors different events.
+ * Use bits 6-7 in the event config to select counter.
+ */
+static struct event_constraint nhmex_uncore_bbox_constraints[] = {
+ EVENT_CONSTRAINT(0 , 1, 0xc0),
+ EVENT_CONSTRAINT(0x40, 2, 0xc0),
+ EVENT_CONSTRAINT(0x80, 4, 0xc0),
+ EVENT_CONSTRAINT(0xc0, 8, 0xc0),
+ EVENT_CONSTRAINT_END,
+};
+
+static struct attribute *nhmex_uncore_bbox_formats_attr[] = {
+ &format_attr_event5.attr,
+ &format_attr_counter.attr,
+ &format_attr_match.attr,
+ &format_attr_mask.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_bbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_bbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_bbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_bbox_msr_enable_event,
+ .hw_config = nhmex_bbox_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_bbox = {
+ .name = "bbox",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_B0_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_B0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_B_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_B0_MSR_PMON_GLOBAL_CTL,
+ .msr_offset = NHMEX_B_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 1,
+ .constraints = nhmex_uncore_bbox_constraints,
+ .ops = &nhmex_uncore_bbox_ops,
+ .format_group = &nhmex_uncore_bbox_format_group
+};
+
+static int nhmex_sbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+
+ if (event->attr.config & NHMEX_S_PMON_MM_CFG_EN) {
+ reg1->config = event->attr.config1;
+ reg2->config = event->attr.config2;
+ } else {
+ reg1->config = ~0ULL;
+ reg2->config = ~0ULL;
+ }
+
+ if (box->pmu->pmu_idx == 0)
+ reg1->reg = NHMEX_S0_MSR_MM_CFG;
+ else
+ reg1->reg = NHMEX_S1_MSR_MM_CFG;
+
+ reg1->idx = 0;
+
+ return 0;
+}
+
+static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ wrmsrl(reg1->reg, 0);
+ if (reg1->config != ~0ULL || reg2->config != ~0ULL) {
+ wrmsrl(reg1->reg + 1, reg1->config);
+ wrmsrl(reg1->reg + 2, reg2->config);
+ wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN);
+ }
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
+}
+
+static struct attribute *nhmex_uncore_sbox_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_thresh8.attr,
+ &format_attr_mm_cfg.attr,
+ &format_attr_match.attr,
+ &format_attr_mask.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_sbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_sbox_formats_attr,
+};
+
+static struct intel_uncore_ops nhmex_uncore_sbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_sbox_msr_enable_event,
+ .hw_config = nhmex_sbox_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_sbox = {
+ .name = "sbox",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_S0_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_S0_MSR_PMON_CTR0,
+ .event_mask = NHMEX_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_S0_MSR_PMON_GLOBAL_CTL,
+ .msr_offset = NHMEX_S_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 1,
+ .ops = &nhmex_uncore_sbox_ops,
+ .format_group = &nhmex_uncore_sbox_format_group
+};
+
+enum {
+ EXTRA_REG_NHMEX_M_FILTER,
+ EXTRA_REG_NHMEX_M_DSP,
+ EXTRA_REG_NHMEX_M_ISS,
+ EXTRA_REG_NHMEX_M_MAP,
+ EXTRA_REG_NHMEX_M_MSC_THR,
+ EXTRA_REG_NHMEX_M_PGT,
+ EXTRA_REG_NHMEX_M_PLD,
+ EXTRA_REG_NHMEX_M_ZDP_CTL_FVC,
+};
+
+static struct extra_reg nhmex_uncore_mbox_extra_regs[] = {
+ MBOX_INC_SEL_EXTAR_REG(0x0, DSP),
+ MBOX_INC_SEL_EXTAR_REG(0x4, MSC_THR),
+ MBOX_INC_SEL_EXTAR_REG(0x5, MSC_THR),
+ MBOX_INC_SEL_EXTAR_REG(0x9, ISS),
+ /* event 0xa uses two extra registers */
+ MBOX_INC_SEL_EXTAR_REG(0xa, ISS),
+ MBOX_INC_SEL_EXTAR_REG(0xa, PLD),
+ MBOX_INC_SEL_EXTAR_REG(0xb, PLD),
+ /* events 0xd ~ 0x10 use the same extra register */
+ MBOX_INC_SEL_EXTAR_REG(0xd, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0xe, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0xf, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0x10, ZDP_CTL_FVC),
+ MBOX_INC_SEL_EXTAR_REG(0x16, PGT),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x0, DSP),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x1, ISS),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x5, PGT),
+ MBOX_SET_FLAG_SEL_EXTRA_REG(0x6, MAP),
+ EVENT_EXTRA_END
+};
+
+static bool nhmex_mbox_get_shared_reg(struct intel_uncore_box *box, int idx, u64 config)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ bool ret = false;
+ u64 mask;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ er = &box->shared_regs[idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (!atomic_read(&er->ref) || er->config == config) {
+ atomic_inc(&er->ref);
+ er->config = config;
+ ret = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return ret;
+ }
+ /*
+ * The ZDP_CTL_FVC MSR has 4 fields which are used to control
+ * events 0xd ~ 0x10. Besides these 4 fields, there are additional
+ * fields which are shared.
+ */
+ idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (WARN_ON_ONCE(idx >= 4))
+ return false;
+
+ /* mask of the shared fields */
+ mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK;
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+
+ raw_spin_lock_irqsave(&er->lock, flags);
+ /* add mask of the non-shared field if it's in use */
+ if (__BITS_VALUE(atomic_read(&er->ref), idx, 8))
+ mask |= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+
+ if (!atomic_read(&er->ref) || !((er->config ^ config) & mask)) {
+ atomic_add(1 << (idx * 8), &er->ref);
+ mask = NHMEX_M_PMON_ZDP_CTL_FVC_MASK |
+ NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ er->config &= ~mask;
+ er->config |= (config & mask);
+ ret = true;
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return ret;
+}
+
+static void nhmex_mbox_put_shared_reg(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ er = &box->shared_regs[idx];
+ atomic_dec(&er->ref);
+ return;
+ }
+
+ idx -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+ atomic_sub(1 << (idx * 8), &er->ref);
+}
+
+u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modify)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+ u64 config = reg1->config;
+
+ /* get the non-shared control bits and shift them */
+ idx = orig_idx - EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ config &= NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(idx);
+ if (new_idx > orig_idx) {
+ idx = new_idx - orig_idx;
+ config <<= 3 * idx;
+ } else {
+ idx = orig_idx - new_idx;
+ config >>= 3 * idx;
+ }
+
+ /* add the shared control bits back */
+ config |= NHMEX_M_PMON_ZDP_CTL_FVC_MASK & reg1->config;
+ if (modify) {
+ /* adjust the main event selector */
+ if (new_idx > orig_idx)
+ hwc->config += idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+ else
+ hwc->config -= idx << NHMEX_M_PMON_CTL_INC_SEL_SHIFT;
+ reg1->config = config;
+ reg1->idx = ~0xff | new_idx;
+ }
+ return config;
+}
+
+static struct event_constraint *
+nhmex_mbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ int i, idx[2], alloc = 0;
+ u64 config1 = reg1->config;
+
+ idx[0] = __BITS_VALUE(reg1->idx, 0, 8);
+ idx[1] = __BITS_VALUE(reg1->idx, 1, 8);
+again:
+ for (i = 0; i < 2; i++) {
+ if (!uncore_box_is_fake(box) && (reg1->alloc & (0x1 << i)))
+ idx[i] = 0xff;
+
+ if (idx[i] == 0xff)
+ continue;
+
+ if (!nhmex_mbox_get_shared_reg(box, idx[i],
+ __BITS_VALUE(config1, i, 32)))
+ goto fail;
+ alloc |= (0x1 << i);
+ }
+
+ /* for the match/mask registers */
+ if ((uncore_box_is_fake(box) || !reg2->alloc) &&
+ !nhmex_mbox_get_shared_reg(box, reg2->idx, reg2->config))
+ goto fail;
+
+ /*
+ * If it's a fake box -- as per validate_{group,event}() we
+ * shouldn't touch event state and we can avoid doing so
+ * since both will only call get_event_constraints() once
+ * on each event, this avoids the need for reg->alloc.
+ */
+ if (!uncore_box_is_fake(box)) {
+ if (idx[0] != 0xff && idx[0] != __BITS_VALUE(reg1->idx, 0, 8))
+ nhmex_mbox_alter_er(event, idx[0], true);
+ reg1->alloc |= alloc;
+ reg2->alloc = 1;
+ }
+ return NULL;
+fail:
+ if (idx[0] != 0xff && !(alloc & 0x1) &&
+ idx[0] >= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC) {
+ /*
+ * events 0xd ~ 0x10 are functional identical, but are
+ * controlled by different fields in the ZDP_CTL_FVC
+ * register. If we failed to take one field, try the
+ * rest 3 choices.
+ */
+ BUG_ON(__BITS_VALUE(reg1->idx, 1, 8) != 0xff);
+ idx[0] -= EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ idx[0] = (idx[0] + 1) % 4;
+ idx[0] += EXTRA_REG_NHMEX_M_ZDP_CTL_FVC;
+ if (idx[0] != __BITS_VALUE(reg1->idx, 0, 8)) {
+ config1 = nhmex_mbox_alter_er(event, idx[0], false);
+ goto again;
+ }
+ }
+
+ if (alloc & 0x1)
+ nhmex_mbox_put_shared_reg(box, idx[0]);
+ if (alloc & 0x2)
+ nhmex_mbox_put_shared_reg(box, idx[1]);
+ return &constraint_empty;
+}
+
+static void nhmex_mbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+
+ if (uncore_box_is_fake(box))
+ return;
+
+ if (reg1->alloc & 0x1)
+ nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 0, 8));
+ if (reg1->alloc & 0x2)
+ nhmex_mbox_put_shared_reg(box, __BITS_VALUE(reg1->idx, 1, 8));
+ reg1->alloc = 0;
+
+ if (reg2->alloc) {
+ nhmex_mbox_put_shared_reg(box, reg2->idx);
+ reg2->alloc = 0;
+ }
+}
+
+static int nhmex_mbox_extra_reg_idx(struct extra_reg *er)
+{
+ if (er->idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+ return er->idx;
+ return er->idx + (er->event >> NHMEX_M_PMON_CTL_INC_SEL_SHIFT) - 0xd;
+}
+
+static int nhmex_mbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_type *type = box->pmu->type;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ struct extra_reg *er;
+ unsigned msr;
+ int reg_idx = 0;
+
+ if (WARN_ON_ONCE(reg1->idx != -1))
+ return -EINVAL;
+ /*
+ * The mbox events may require 2 extra MSRs at the most. But only
+ * the lower 32 bits in these MSRs are significant, so we can use
+ * config1 to pass two MSRs' config.
+ */
+ for (er = nhmex_uncore_mbox_extra_regs; er->msr; er++) {
+ if (er->event != (event->hw.config & er->config_mask))
+ continue;
+ if (event->attr.config1 & ~er->valid_mask)
+ return -EINVAL;
+ if (er->idx == __BITS_VALUE(reg1->idx, 0, 8) ||
+ er->idx == __BITS_VALUE(reg1->idx, 1, 8))
+ continue;
+ if (WARN_ON_ONCE(reg_idx >= 2))
+ return -EINVAL;
+
+ msr = er->msr + type->msr_offset * box->pmu->pmu_idx;
+ if (WARN_ON_ONCE(msr >= 0xffff || er->idx >= 0xff))
+ return -EINVAL;
+
+ /* always use the 32~63 bits to pass the PLD config */
+ if (er->idx == EXTRA_REG_NHMEX_M_PLD)
+ reg_idx = 1;
+
+ reg1->idx &= ~(0xff << (reg_idx * 8));
+ reg1->reg &= ~(0xffff << (reg_idx * 16));
+ reg1->idx |= nhmex_mbox_extra_reg_idx(er) << (reg_idx * 8);
+ reg1->reg |= msr << (reg_idx * 16);
+ reg1->config = event->attr.config1;
+ reg_idx++;
+ }
+ /* use config2 to pass the filter config */
+ reg2->idx = EXTRA_REG_NHMEX_M_FILTER;
+ if (event->attr.config2 & NHMEX_M_PMON_MM_CFG_EN)
+ reg2->config = event->attr.config2;
+ else
+ reg2->config = ~0ULL;
+ if (box->pmu->pmu_idx == 0)
+ reg2->reg = NHMEX_M0_MSR_PMU_MM_CFG;
+ else
+ reg2->reg = NHMEX_M1_MSR_PMU_MM_CFG;
+
+ return 0;
+}
+
+static u64 nhmex_mbox_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ u64 config;
+
+ if (idx < EXTRA_REG_NHMEX_M_ZDP_CTL_FVC)
+ return box->shared_regs[idx].config;
+
+ er = &box->shared_regs[EXTRA_REG_NHMEX_M_ZDP_CTL_FVC];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ config = er->config;
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+ return config;
+}
+
+static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int idx;
+
+ idx = __BITS_VALUE(reg1->idx, 0, 8);
+ if (idx != 0xff)
+ wrmsrl(__BITS_VALUE(reg1->reg, 0, 16),
+ nhmex_mbox_shared_reg_config(box, idx));
+ idx = __BITS_VALUE(reg1->idx, 1, 8);
+ if (idx != 0xff)
+ wrmsrl(__BITS_VALUE(reg1->reg, 1, 16),
+ nhmex_mbox_shared_reg_config(box, idx));
+
+ wrmsrl(reg2->reg, 0);
+ if (reg2->config != ~0ULL) {
+ wrmsrl(reg2->reg + 1,
+ reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK);
+ wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK &
+ (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT));
+ wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN);
+ }
+
+ wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0);
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3");
+DEFINE_UNCORE_FORMAT_ATTR(storage_mode, storage_mode, "config:4-5");
+DEFINE_UNCORE_FORMAT_ATTR(wrap_mode, wrap_mode, "config:6");
+DEFINE_UNCORE_FORMAT_ATTR(flag_mode, flag_mode, "config:7");
+DEFINE_UNCORE_FORMAT_ATTR(inc_sel, inc_sel, "config:9-13");
+DEFINE_UNCORE_FORMAT_ATTR(set_flag_sel, set_flag_sel, "config:19-21");
+DEFINE_UNCORE_FORMAT_ATTR(filter_cfg, filter_cfg, "config2:63");
+DEFINE_UNCORE_FORMAT_ATTR(filter_match, filter_match, "config2:0-33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_mask, filter_mask, "config2:34-61");
+DEFINE_UNCORE_FORMAT_ATTR(dsp, dsp, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(thr, thr, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(fvc, fvc, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pgt, pgt, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(map, map, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(iss, iss, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(pld, pld, "config1:32-63");
+
+static struct attribute *nhmex_uncore_mbox_formats_attr[] = {
+ &format_attr_count_mode.attr,
+ &format_attr_storage_mode.attr,
+ &format_attr_wrap_mode.attr,
+ &format_attr_flag_mode.attr,
+ &format_attr_inc_sel.attr,
+ &format_attr_set_flag_sel.attr,
+ &format_attr_filter_cfg.attr,
+ &format_attr_filter_match.attr,
+ &format_attr_filter_mask.attr,
+ &format_attr_dsp.attr,
+ &format_attr_thr.attr,
+ &format_attr_fvc.attr,
+ &format_attr_pgt.attr,
+ &format_attr_map.attr,
+ &format_attr_iss.attr,
+ &format_attr_pld.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_mbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_mbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_mbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_read, "inc_sel=0xd,fvc=0x2800"),
+ INTEL_UNCORE_EVENT_DESC(bbox_cmds_write, "inc_sel=0xd,fvc=0x2820"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_mbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_mbox_msr_enable_event,
+ .hw_config = nhmex_mbox_hw_config,
+ .get_constraint = nhmex_mbox_get_constraint,
+ .put_constraint = nhmex_mbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_mbox = {
+ .name = "mbox",
+ .num_counters = 6,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_M0_MSR_PMU_CTL0,
+ .perf_ctr = NHMEX_M0_MSR_PMU_CNT0,
+ .event_mask = NHMEX_M_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_M0_MSR_GLOBAL_CTL,
+ .msr_offset = NHMEX_M_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 8,
+ .event_descs = nhmex_uncore_mbox_events,
+ .ops = &nhmex_uncore_mbox_ops,
+ .format_group = &nhmex_uncore_mbox_format_group,
+};
+
+void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ int port;
+
+ /* adjust the main event selector */
+ if (reg1->idx % 2) {
+ reg1->idx--;
+ hwc->config -= 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ } else {
+ reg1->idx++;
+ hwc->config += 1 << NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ }
+
+ /* adjust address or config of extra register */
+ port = reg1->idx / 6 + box->pmu->pmu_idx * 4;
+ switch (reg1->idx % 6) {
+ case 0:
+ reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG0(port);
+ break;
+ case 1:
+ reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG1(port);
+ break;
+ case 2:
+ /* the 8~15 bits to the 0~7 bits */
+ reg1->config >>= 8;
+ break;
+ case 3:
+ /* the 0~7 bits to the 8~15 bits */
+ reg1->config <<= 8;
+ break;
+ case 4:
+ reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port);
+ break;
+ case 5:
+ reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port);
+ break;
+ };
+}
+
+/*
+ * Each rbox has 4 event set which monitor PQI port 0~3 or 4~7.
+ * An event set consists of 6 events, the 3rd and 4th events in
+ * an event set use the same extra register. So an event set uses
+ * 5 extra registers.
+ */
+static struct event_constraint *
+nhmex_rbox_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ int idx, er_idx;
+ u64 config1;
+ bool ok = false;
+
+ if (!uncore_box_is_fake(box) && reg1->alloc)
+ return NULL;
+
+ idx = reg1->idx % 6;
+ config1 = reg1->config;
+again:
+ er_idx = idx;
+ /* the 3rd and 4th events use the same extra register */
+ if (er_idx > 2)
+ er_idx--;
+ er_idx += (reg1->idx / 6) * 5;
+
+ er = &box->shared_regs[er_idx];
+ raw_spin_lock_irqsave(&er->lock, flags);
+ if (idx < 2) {
+ if (!atomic_read(&er->ref) || er->config == reg1->config) {
+ atomic_inc(&er->ref);
+ er->config = reg1->config;
+ ok = true;
+ }
+ } else if (idx == 2 || idx == 3) {
+ /*
+ * these two events use different fields in a extra register,
+ * the 0~7 bits and the 8~15 bits respectively.
+ */
+ u64 mask = 0xff << ((idx - 2) * 8);
+ if (!__BITS_VALUE(atomic_read(&er->ref), idx - 2, 8) ||
+ !((er->config ^ config1) & mask)) {
+ atomic_add(1 << ((idx - 2) * 8), &er->ref);
+ er->config &= ~mask;
+ er->config |= config1 & mask;
+ ok = true;
+ }
+ } else {
+ if (!atomic_read(&er->ref) ||
+ (er->config == (hwc->config >> 32) &&
+ er->config1 == reg1->config &&
+ er->config2 == reg2->config)) {
+ atomic_inc(&er->ref);
+ er->config = (hwc->config >> 32);
+ er->config1 = reg1->config;
+ er->config2 = reg2->config;
+ ok = true;
+ }
+ }
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ if (!ok) {
+ /*
+ * The Rbox events are always in pairs. The paired
+ * events are functional identical, but use different
+ * extra registers. If we failed to take an extra
+ * register, try the alternative.
+ */
+ if (idx % 2)
+ idx--;
+ else
+ idx++;
+ if (idx != reg1->idx % 6) {
+ if (idx == 2)
+ config1 >>= 8;
+ else if (idx == 3)
+ config1 <<= 8;
+ goto again;
+ }
+ } else {
+ if (!uncore_box_is_fake(box)) {
+ if (idx != reg1->idx % 6)
+ nhmex_rbox_alter_er(box, event);
+ reg1->alloc = 1;
+ }
+ return NULL;
+ }
+ return &constraint_empty;
+}
+
+static void nhmex_rbox_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_extra_reg *er;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ int idx, er_idx;
+
+ if (uncore_box_is_fake(box) || !reg1->alloc)
+ return;
+
+ idx = reg1->idx % 6;
+ er_idx = idx;
+ if (er_idx > 2)
+ er_idx--;
+ er_idx += (reg1->idx / 6) * 5;
+
+ er = &box->shared_regs[er_idx];
+ if (idx == 2 || idx == 3)
+ atomic_sub(1 << ((idx - 2) * 8), &er->ref);
+ else
+ atomic_dec(&er->ref);
+
+ reg1->alloc = 0;
+}
+
+static int nhmex_rbox_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+ struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
+ int port, idx;
+
+ idx = (event->hw.config & NHMEX_R_PMON_CTL_EV_SEL_MASK) >>
+ NHMEX_R_PMON_CTL_EV_SEL_SHIFT;
+ if (idx >= 0x18)
+ return -EINVAL;
+
+ reg1->idx = idx;
+ reg1->config = event->attr.config1;
+
+ port = idx / 6 + box->pmu->pmu_idx * 4;
+ idx %= 6;
+ switch (idx) {
+ case 0:
+ reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG0(port);
+ break;
+ case 1:
+ reg1->reg = NHMEX_R_MSR_PORTN_IPERF_CFG1(port);
+ break;
+ case 2:
+ case 3:
+ reg1->reg = NHMEX_R_MSR_PORTN_QLX_CFG(port);
+ break;
+ case 4:
+ case 5:
+ if (idx == 4)
+ reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port);
+ else
+ reg1->reg = NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port);
+ reg2->config = event->attr.config2;
+ hwc->config |= event->attr.config & (~0ULL << 32);
+ break;
+ };
+ return 0;
+}
+
+static u64 nhmex_rbox_shared_reg_config(struct intel_uncore_box *box, int idx)
+{
+ struct intel_uncore_extra_reg *er;
+ unsigned long flags;
+ u64 config;
+
+ er = &box->shared_regs[idx];
+
+ raw_spin_lock_irqsave(&er->lock, flags);
+ config = er->config;
+ raw_spin_unlock_irqrestore(&er->lock, flags);
+
+ return config;
+}
+
+static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+ int idx, er_idx;
+
+ idx = reg1->idx % 6;
+ er_idx = idx;
+ if (er_idx > 2)
+ er_idx--;
+ er_idx += (reg1->idx / 6) * 5;
+
+ switch (idx) {
+ case 0:
+ case 1:
+ wrmsrl(reg1->reg, reg1->config);
+ break;
+ case 2:
+ case 3:
+ wrmsrl(reg1->reg, nhmex_rbox_shared_reg_config(box, er_idx));
+ break;
+ case 4:
+ case 5:
+ wrmsrl(reg1->reg, reg1->config);
+ wrmsrl(reg1->reg + 1, hwc->config >> 32);
+ wrmsrl(reg1->reg + 2, reg2->config);
+ break;
+ };
+
+ wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 |
+ (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK));
+}
+
+DEFINE_UNCORE_FORMAT_ATTR(xbr_match, xbr_match, "config:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mm_cfg, xbr_mm_cfg, "config1:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(xbr_mask, xbr_mask, "config2:0-63");
+DEFINE_UNCORE_FORMAT_ATTR(qlx_cfg, qlx_cfg, "config1:0-15");
+DEFINE_UNCORE_FORMAT_ATTR(iperf_cfg, iperf_cfg, "config1:0-31");
+
+static struct attribute *nhmex_uncore_rbox_formats_attr[] = {
+ &format_attr_event5.attr,
+ &format_attr_xbr_mm_cfg.attr,
+ &format_attr_xbr_match.attr,
+ &format_attr_xbr_mask.attr,
+ &format_attr_qlx_cfg.attr,
+ &format_attr_iperf_cfg.attr,
+ NULL,
+};
+
+static struct attribute_group nhmex_uncore_rbox_format_group = {
+ .name = "format",
+ .attrs = nhmex_uncore_rbox_formats_attr,
+};
+
+static struct uncore_event_desc nhmex_uncore_rbox_events[] = {
+ INTEL_UNCORE_EVENT_DESC(qpi0_flit_send, "event=0x0,iperf_cfg=0x80000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_filt_send, "event=0x6,iperf_cfg=0x80000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi0_idle_filt, "event=0x0,iperf_cfg=0x40000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_idle_filt, "event=0x6,iperf_cfg=0x40000000"),
+ INTEL_UNCORE_EVENT_DESC(qpi0_date_response, "event=0x0,iperf_cfg=0xc4"),
+ INTEL_UNCORE_EVENT_DESC(qpi1_date_response, "event=0x6,iperf_cfg=0xc4"),
+ { /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops nhmex_uncore_rbox_ops = {
+ NHMEX_UNCORE_OPS_COMMON_INIT(),
+ .enable_event = nhmex_rbox_msr_enable_event,
+ .hw_config = nhmex_rbox_hw_config,
+ .get_constraint = nhmex_rbox_get_constraint,
+ .put_constraint = nhmex_rbox_put_constraint,
+};
+
+static struct intel_uncore_type nhmex_uncore_rbox = {
+ .name = "rbox",
+ .num_counters = 8,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .event_ctl = NHMEX_R_MSR_PMON_CTL0,
+ .perf_ctr = NHMEX_R_MSR_PMON_CNT0,
+ .event_mask = NHMEX_R_PMON_RAW_EVENT_MASK,
+ .box_ctl = NHMEX_R_MSR_GLOBAL_CTL,
+ .msr_offset = NHMEX_R_MSR_OFFSET,
+ .pair_ctr_ctl = 1,
+ .num_shared_regs = 20,
+ .event_descs = nhmex_uncore_rbox_events,
+ .ops = &nhmex_uncore_rbox_ops,
+ .format_group = &nhmex_uncore_rbox_format_group
+};
+
+static struct intel_uncore_type *nhmex_msr_uncores[] = {
+ &nhmex_uncore_ubox,
+ &nhmex_uncore_cbox,
+ &nhmex_uncore_bbox,
+ &nhmex_uncore_sbox,
+ &nhmex_uncore_mbox,
+ &nhmex_uncore_rbox,
+ &nhmex_uncore_wbox,
+ NULL,
+};
+/* end of Nehalem-EX uncore support */
+
+static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->idx = idx;
+ hwc->last_tag = ++box->tags[idx];
+
+ if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
+ hwc->event_base = uncore_fixed_ctr(box);
+ hwc->config_base = uncore_fixed_ctl(box);
+ return;
+ }
+
+ hwc->config_base = uncore_event_ctl(box, hwc->idx);
+ hwc->event_base = uncore_perf_ctr(box, hwc->idx);
+}
+
+static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
+{
+ u64 prev_count, new_count, delta;
+ int shift;
+
+ if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+ shift = 64 - uncore_fixed_ctr_bits(box);
+ else
+ shift = 64 - uncore_perf_ctr_bits(box);
+
+ /* the hrtimer might modify the previous event value */
+again:
+ prev_count = local64_read(&event->hw.prev_count);
+ new_count = uncore_read_counter(box, event);
+ if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
+ goto again;
+
+ delta = (new_count << shift) - (prev_count << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+}
+
+/*
+ * The overflow interrupt is unavailable for SandyBridge-EP, is broken
+ * for SandyBridge. So we use hrtimer to periodically poll the counter
+ * to avoid overflow.
+ */
+static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
+{
+ struct intel_uncore_box *box;
+ unsigned long flags;
+ int bit;
+
+ box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
+ if (!box->n_active || box->cpu != smp_processor_id())
+ return HRTIMER_NORESTART;
+ /*
+ * disable local interrupt to prevent uncore_pmu_event_start/stop
+ * to interrupt the update process
+ */
+ local_irq_save(flags);
+
+ for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
+ uncore_perf_event_update(box, box->events[bit]);
+
+ local_irq_restore(flags);
+
+ hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL));
+ return HRTIMER_RESTART;
+}
+
+static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
+{
+ __hrtimer_start_range_ns(&box->hrtimer,
+ ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0,
+ HRTIMER_MODE_REL_PINNED, 0);
+}
+
+static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
+{
+ hrtimer_cancel(&box->hrtimer);
+}
+
+static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
+{
+ hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ box->hrtimer.function = uncore_pmu_hrtimer;
+}
+
+struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu)
+{
+ struct intel_uncore_box *box;
+ int i, size;
+
+ size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
+
+ box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
+ if (!box)
+ return NULL;
+
+ for (i = 0; i < type->num_shared_regs; i++)
+ raw_spin_lock_init(&box->shared_regs[i].lock);
+
+ uncore_pmu_init_hrtimer(box);
+ atomic_set(&box->refcnt, 1);
+ box->cpu = -1;
+ box->phys_id = -1;
+
+ return box;
+}
+
+static struct intel_uncore_box *
+uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
+{
+ static struct intel_uncore_box *box;
+
+ box = *per_cpu_ptr(pmu->box, cpu);
+ if (box)
+ return box;
+
+ raw_spin_lock(&uncore_box_lock);
+ list_for_each_entry(box, &pmu->box_list, list) {
+ if (box->phys_id == topology_physical_package_id(cpu)) {
+ atomic_inc(&box->refcnt);
+ *per_cpu_ptr(pmu->box, cpu) = box;
+ break;
+ }
+ }
+ raw_spin_unlock(&uncore_box_lock);
+
+ return *per_cpu_ptr(pmu->box, cpu);
+}
+
+static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
+{
+ return container_of(event->pmu, struct intel_uncore_pmu, pmu);
+}
+
+static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
+{
+ /*
+ * perf core schedules event on the basis of cpu, uncore events are
+ * collected by one of the cpus inside a physical package.
+ */
+ return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
+}
+
+static int
+uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
+{
+ struct perf_event *event;
+ int n, max_count;
+
+ max_count = box->pmu->type->num_counters;
+ if (box->pmu->type->fixed_ctl)
+ max_count++;
+
+ if (box->n_events >= max_count)
+ return -EINVAL;
+
+ n = box->n_events;
+ box->event_list[n] = leader;
+ n++;
+ if (!dogrp)
+ return n;
+
+ list_for_each_entry(event, &leader->sibling_list, group_entry) {
+ if (event->state <= PERF_EVENT_STATE_OFF)
+ continue;
+
+ if (n >= max_count)
+ return -EINVAL;
+
+ box->event_list[n] = event;
+ n++;
+ }
+ return n;
+}
+
+static struct event_constraint *
+uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct intel_uncore_type *type = box->pmu->type;
+ struct event_constraint *c;
+
+ if (type->ops->get_constraint) {
+ c = type->ops->get_constraint(box, event);
+ if (c)
+ return c;
+ }
+
+ if (event->hw.config == ~0ULL)
+ return &constraint_fixed;
+
+ if (type->constraints) {
+ for_each_event_constraint(c, type->constraints) {
+ if ((event->hw.config & c->cmask) == c->code)
+ return c;
+ }
+ }
+
+ return &type->unconstrainted;
+}
+
+static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+ if (box->pmu->type->ops->put_constraint)
+ box->pmu->type->ops->put_constraint(box, event);
+}
+
+static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
+{
+ unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+ struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
+ int i, wmin, wmax, ret = 0;
+ struct hw_perf_event *hwc;
+
+ bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
+
+ for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ c = uncore_get_event_constraint(box, box->event_list[i]);
+ constraints[i] = c;
+ wmin = min(wmin, c->weight);
+ wmax = max(wmax, c->weight);
+ }
+
+ /* fastpath, try to reuse previous register */
+ for (i = 0; i < n; i++) {
+ hwc = &box->event_list[i]->hw;
+ c = constraints[i];
+
+ /* never assigned */
+ if (hwc->idx == -1)
+ break;
+
+ /* constraint still honored */
+ if (!test_bit(hwc->idx, c->idxmsk))
+ break;
+
+ /* not already used */
+ if (test_bit(hwc->idx, used_mask))
+ break;
+
+ __set_bit(hwc->idx, used_mask);
+ if (assign)
+ assign[i] = hwc->idx;
+ }
+ /* slow path */
+ if (i != n)
+ ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+
+ if (!assign || ret) {
+ for (i = 0; i < n; i++)
+ uncore_put_event_constraint(box, box->event_list[i]);
+ }
+ return ret ? -EINVAL : 0;
+}
+
+static void uncore_pmu_event_start(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ int idx = event->hw.idx;
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
+ return;
+
+ event->hw.state = 0;
+ box->events[idx] = event;
+ box->n_active++;
+ __set_bit(idx, box->active_mask);
+
+ local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
+ uncore_enable_event(box, event);
+
+ if (box->n_active == 1) {
+ uncore_enable_box(box);
+ uncore_pmu_start_hrtimer(box);
+ }
+}
+
+static void uncore_pmu_event_stop(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
+ uncore_disable_event(box, event);
+ box->n_active--;
+ box->events[hwc->idx] = NULL;
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (box->n_active == 0) {
+ uncore_disable_box(box);
+ uncore_pmu_cancel_hrtimer(box);
+ }
+ }
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ /*
+ * Drain the remaining delta count out of a event
+ * that we are disabling:
+ */
+ uncore_perf_event_update(box, event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int uncore_pmu_event_add(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ struct hw_perf_event *hwc = &event->hw;
+ int assign[UNCORE_PMC_IDX_MAX];
+ int i, n, ret;
+
+ if (!box)
+ return -ENODEV;
+
+ ret = n = uncore_collect_events(box, event, false);
+ if (ret < 0)
+ return ret;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (!(flags & PERF_EF_START))
+ hwc->state |= PERF_HES_ARCH;
+
+ ret = uncore_assign_events(box, assign, n);
+ if (ret)
+ return ret;
+
+ /* save events moving to new counters */
+ for (i = 0; i < box->n_events; i++) {
+ event = box->event_list[i];
+ hwc = &event->hw;
+
+ if (hwc->idx == assign[i] &&
+ hwc->last_tag == box->tags[assign[i]])
+ continue;
+ /*
+ * Ensure we don't accidentally enable a stopped
+ * counter simply because we rescheduled.
+ */
+ if (hwc->state & PERF_HES_STOPPED)
+ hwc->state |= PERF_HES_ARCH;
+
+ uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+ }
+
+ /* reprogram moved events into new counters */
+ for (i = 0; i < n; i++) {
+ event = box->event_list[i];
+ hwc = &event->hw;
+
+ if (hwc->idx != assign[i] ||
+ hwc->last_tag != box->tags[assign[i]])
+ uncore_assign_hw_event(box, event, assign[i]);
+ else if (i < box->n_events)
+ continue;
+
+ if (hwc->state & PERF_HES_ARCH)
+ continue;
+
+ uncore_pmu_event_start(event, 0);
+ }
+ box->n_events = n;
+
+ return 0;
+}
+
+static void uncore_pmu_event_del(struct perf_event *event, int flags)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ int i;
+
+ uncore_pmu_event_stop(event, PERF_EF_UPDATE);
+
+ for (i = 0; i < box->n_events; i++) {
+ if (event == box->event_list[i]) {
+ uncore_put_event_constraint(box, event);
+
+ while (++i < box->n_events)
+ box->event_list[i - 1] = box->event_list[i];
+
+ --box->n_events;
+ break;
+ }
+ }
+
+ event->hw.idx = -1;
+ event->hw.last_tag = ~0ULL;
+}
+
+static void uncore_pmu_event_read(struct perf_event *event)
+{
+ struct intel_uncore_box *box = uncore_event_to_box(event);
+ uncore_perf_event_update(box, event);
+}
+
+/*
+ * validation ensures the group can be loaded onto the
+ * PMU if it was the only group available.
+ */
+static int uncore_validate_group(struct intel_uncore_pmu *pmu,
+ struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+ struct intel_uncore_box *fake_box;
+ int ret = -EINVAL, n;
+
+ fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
+ if (!fake_box)
+ return -ENOMEM;
+
+ fake_box->pmu = pmu;
+ /*
+ * the event is not yet connected with its
+ * siblings therefore we must first collect
+ * existing siblings, then add the new event
+ * before we can simulate the scheduling
+ */
+ n = uncore_collect_events(fake_box, leader, true);
+ if (n < 0)
+ goto out;
+
+ fake_box->n_events = n;
+ n = uncore_collect_events(fake_box, event, false);
+ if (n < 0)
+ goto out;
+
+ fake_box->n_events = n;
+
+ ret = uncore_assign_events(fake_box, NULL, n);
+out:
+ kfree(fake_box);
+ return ret;
+}
+
+int uncore_pmu_event_init(struct perf_event *event)
+{
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ struct hw_perf_event *hwc = &event->hw;
+ int ret;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ pmu = uncore_event_to_pmu(event);
+ /* no device found for this pmu */
+ if (pmu->func_id < 0)
+ return -ENOENT;
+
+ /*
+ * Uncore PMU does measure at all privilege level all the time.
+ * So it doesn't make sense to specify any exclude bits.
+ */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_hv || event->attr.exclude_idle)
+ return -EINVAL;
+
+ /* Sampling not supported yet */
+ if (hwc->sample_period)
+ return -EINVAL;
+
+ /*
+ * Place all uncore events for a particular physical package
+ * onto a single cpu
+ */
+ if (event->cpu < 0)
+ return -EINVAL;
+ box = uncore_pmu_to_box(pmu, event->cpu);
+ if (!box || box->cpu < 0)
+ return -EINVAL;
+ event->cpu = box->cpu;
+
+ event->hw.idx = -1;
+ event->hw.last_tag = ~0ULL;
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+
+ if (event->attr.config == UNCORE_FIXED_EVENT) {
+ /* no fixed counter */
+ if (!pmu->type->fixed_ctl)
+ return -EINVAL;
+ /*
+ * if there is only one fixed counter, only the first pmu
+ * can access the fixed counter
+ */
+ if (pmu->type->single_fixed && pmu->pmu_idx > 0)
+ return -EINVAL;
+ hwc->config = ~0ULL;
+ } else {
+ hwc->config = event->attr.config & pmu->type->event_mask;
+ if (pmu->type->ops->hw_config) {
+ ret = pmu->type->ops->hw_config(box, event);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (event->group_leader != event)
+ ret = uncore_validate_group(pmu, event);
+ else
+ ret = 0;
+
+ return ret;
+}
+
+static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
+{
+ int ret;
+
+ pmu->pmu = (struct pmu) {
+ .attr_groups = pmu->type->attr_groups,
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = uncore_pmu_event_init,
+ .add = uncore_pmu_event_add,
+ .del = uncore_pmu_event_del,
+ .start = uncore_pmu_event_start,
+ .stop = uncore_pmu_event_stop,
+ .read = uncore_pmu_event_read,
+ };
+
+ if (pmu->type->num_boxes == 1) {
+ if (strlen(pmu->type->name) > 0)
+ sprintf(pmu->name, "uncore_%s", pmu->type->name);
+ else
+ sprintf(pmu->name, "uncore");
+ } else {
+ sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
+ pmu->pmu_idx);
+ }
+
+ ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
+ return ret;
+}
+
+static void __init uncore_type_exit(struct intel_uncore_type *type)
+{
+ int i;
+
+ for (i = 0; i < type->num_boxes; i++)
+ free_percpu(type->pmus[i].box);
+ kfree(type->pmus);
+ type->pmus = NULL;
+ kfree(type->attr_groups[1]);
+ type->attr_groups[1] = NULL;
+}
+
+static void uncore_types_exit(struct intel_uncore_type **types)
+{
+ int i;
+ for (i = 0; types[i]; i++)
+ uncore_type_exit(types[i]);
+}
+
+static int __init uncore_type_init(struct intel_uncore_type *type)
+{
+ struct intel_uncore_pmu *pmus;
+ struct attribute_group *events_group;
+ struct attribute **attrs;
+ int i, j;
+
+ pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
+ if (!pmus)
+ return -ENOMEM;
+
+ type->unconstrainted = (struct event_constraint)
+ __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
+ 0, type->num_counters, 0);
+
+ for (i = 0; i < type->num_boxes; i++) {
+ pmus[i].func_id = -1;
+ pmus[i].pmu_idx = i;
+ pmus[i].type = type;
+ INIT_LIST_HEAD(&pmus[i].box_list);
+ pmus[i].box = alloc_percpu(struct intel_uncore_box *);
+ if (!pmus[i].box)
+ goto fail;
+ }
+
+ if (type->event_descs) {
+ i = 0;
+ while (type->event_descs[i].attr.attr.name)
+ i++;
+
+ events_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
+ sizeof(*events_group), GFP_KERNEL);
+ if (!events_group)
+ goto fail;
+
+ attrs = (struct attribute **)(events_group + 1);
+ events_group->name = "events";
+ events_group->attrs = attrs;
+
+ for (j = 0; j < i; j++)
+ attrs[j] = &type->event_descs[j].attr.attr;
+
+ type->attr_groups[1] = events_group;
+ }
+
+ type->pmus = pmus;
+ return 0;
+fail:
+ uncore_type_exit(type);
+ return -ENOMEM;
+}
+
+static int __init uncore_types_init(struct intel_uncore_type **types)
+{
+ int i, ret;
+
+ for (i = 0; types[i]; i++) {
+ ret = uncore_type_init(types[i]);
+ if (ret)
+ goto fail;
+ }
+ return 0;
+fail:
+ while (--i >= 0)
+ uncore_type_exit(types[i]);
+ return ret;
+}
+
+static struct pci_driver *uncore_pci_driver;
+static bool pcidrv_registered;
+
+/*
+ * add a pci uncore device
+ */
+static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
+{
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ int i, phys_id;
+
+ phys_id = pcibus_to_physid[pdev->bus->number];
+ if (phys_id < 0)
+ return -ENODEV;
+
+ box = uncore_alloc_box(type, 0);
+ if (!box)
+ return -ENOMEM;
+
+ /*
+ * for performance monitoring unit with multiple boxes,
+ * each box has a different function id.
+ */
+ for (i = 0; i < type->num_boxes; i++) {
+ pmu = &type->pmus[i];
+ if (pmu->func_id == pdev->devfn)
+ break;
+ if (pmu->func_id < 0) {
+ pmu->func_id = pdev->devfn;
+ break;
+ }
+ pmu = NULL;
+ }
+
+ if (!pmu) {
+ kfree(box);
+ return -EINVAL;
+ }
+
+ box->phys_id = phys_id;
+ box->pci_dev = pdev;
+ box->pmu = pmu;
+ uncore_box_init(box);
+ pci_set_drvdata(pdev, box);
+
+ raw_spin_lock(&uncore_box_lock);
+ list_add_tail(&box->list, &pmu->box_list);
+ raw_spin_unlock(&uncore_box_lock);
+
+ return 0;
+}
+
+static void uncore_pci_remove(struct pci_dev *pdev)
+{
+ struct intel_uncore_box *box = pci_get_drvdata(pdev);
+ struct intel_uncore_pmu *pmu = box->pmu;
+ int cpu, phys_id = pcibus_to_physid[pdev->bus->number];
+
+ if (WARN_ON_ONCE(phys_id != box->phys_id))
+ return;
+
+ raw_spin_lock(&uncore_box_lock);
+ list_del(&box->list);
+ raw_spin_unlock(&uncore_box_lock);
+
+ for_each_possible_cpu(cpu) {
+ if (*per_cpu_ptr(pmu->box, cpu) == box) {
+ *per_cpu_ptr(pmu->box, cpu) = NULL;
+ atomic_dec(&box->refcnt);
+ }
+ }
+
+ WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
+ kfree(box);
+}
+
+static int __devinit uncore_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct intel_uncore_type *type;
+
+ type = (struct intel_uncore_type *)id->driver_data;
+
+ return uncore_pci_add(type, pdev);
+}
+
+static int __init uncore_pci_init(void)
+{
+ int ret;
+
+ switch (boot_cpu_data.x86_model) {
+ case 45: /* Sandy Bridge-EP */
+ pci_uncores = snbep_pci_uncores;
+ uncore_pci_driver = &snbep_uncore_pci_driver;
+ snbep_pci2phy_map_init();
+ break;
+ default:
+ return 0;
+ }
+
+ ret = uncore_types_init(pci_uncores);
+ if (ret)
+ return ret;
+
+ uncore_pci_driver->probe = uncore_pci_probe;
+ uncore_pci_driver->remove = uncore_pci_remove;
+
+ ret = pci_register_driver(uncore_pci_driver);
+ if (ret == 0)
+ pcidrv_registered = true;
+ else
+ uncore_types_exit(pci_uncores);
+
+ return ret;
+}
+
+static void __init uncore_pci_exit(void)
+{
+ if (pcidrv_registered) {
+ pcidrv_registered = false;
+ pci_unregister_driver(uncore_pci_driver);
+ uncore_types_exit(pci_uncores);
+ }
+}
+
+static void __cpuinit uncore_cpu_dying(int cpu)
+{
+ struct intel_uncore_type *type;
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ int i, j;
+
+ for (i = 0; msr_uncores[i]; i++) {
+ type = msr_uncores[i];
+ for (j = 0; j < type->num_boxes; j++) {
+ pmu = &type->pmus[j];
+ box = *per_cpu_ptr(pmu->box, cpu);
+ *per_cpu_ptr(pmu->box, cpu) = NULL;
+ if (box && atomic_dec_and_test(&box->refcnt))
+ kfree(box);
+ }
+ }
+}
+
+static int __cpuinit uncore_cpu_starting(int cpu)
+{
+ struct intel_uncore_type *type;
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box, *exist;
+ int i, j, k, phys_id;
+
+ phys_id = topology_physical_package_id(cpu);
+
+ for (i = 0; msr_uncores[i]; i++) {
+ type = msr_uncores[i];
+ for (j = 0; j < type->num_boxes; j++) {
+ pmu = &type->pmus[j];
+ box = *per_cpu_ptr(pmu->box, cpu);
+ /* called by uncore_cpu_init? */
+ if (box && box->phys_id >= 0) {
+ uncore_box_init(box);
+ continue;
+ }
+
+ for_each_online_cpu(k) {
+ exist = *per_cpu_ptr(pmu->box, k);
+ if (exist && exist->phys_id == phys_id) {
+ atomic_inc(&exist->refcnt);
+ *per_cpu_ptr(pmu->box, cpu) = exist;
+ kfree(box);
+ box = NULL;
+ break;
+ }
+ }
+
+ if (box) {
+ box->phys_id = phys_id;
+ uncore_box_init(box);
+ }
+ }
+ }
+ return 0;
+}
+
+static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
+{
+ struct intel_uncore_type *type;
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ int i, j;
+
+ for (i = 0; msr_uncores[i]; i++) {
+ type = msr_uncores[i];
+ for (j = 0; j < type->num_boxes; j++) {
+ pmu = &type->pmus[j];
+ if (pmu->func_id < 0)
+ pmu->func_id = j;
+
+ box = uncore_alloc_box(type, cpu);
+ if (!box)
+ return -ENOMEM;
+
+ box->pmu = pmu;
+ box->phys_id = phys_id;
+ *per_cpu_ptr(pmu->box, cpu) = box;
+ }
+ }
+ return 0;
+}
+
+static void __cpuinit
+uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
+{
+ struct intel_uncore_type *type;
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_box *box;
+ int i, j;
+
+ for (i = 0; uncores[i]; i++) {
+ type = uncores[i];
+ for (j = 0; j < type->num_boxes; j++) {
+ pmu = &type->pmus[j];
+ if (old_cpu < 0)
+ box = uncore_pmu_to_box(pmu, new_cpu);
+ else
+ box = uncore_pmu_to_box(pmu, old_cpu);
+ if (!box)
+ continue;
+
+ if (old_cpu < 0) {
+ WARN_ON_ONCE(box->cpu != -1);
+ box->cpu = new_cpu;
+ continue;
+ }
+
+ WARN_ON_ONCE(box->cpu != old_cpu);
+ if (new_cpu >= 0) {
+ uncore_pmu_cancel_hrtimer(box);
+ perf_pmu_migrate_context(&pmu->pmu,
+ old_cpu, new_cpu);
+ box->cpu = new_cpu;
+ } else {
+ box->cpu = -1;
+ }
+ }
+ }
+}
+
+static void __cpuinit uncore_event_exit_cpu(int cpu)
+{
+ int i, phys_id, target;
+
+ /* if exiting cpu is used for collecting uncore events */
+ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
+ return;
+
+ /* find a new cpu to collect uncore events */
+ phys_id = topology_physical_package_id(cpu);
+ target = -1;
+ for_each_online_cpu(i) {
+ if (i == cpu)
+ continue;
+ if (phys_id == topology_physical_package_id(i)) {
+ target = i;
+ break;
+ }
+ }
+
+ /* migrate uncore events to the new cpu */
+ if (target >= 0)
+ cpumask_set_cpu(target, &uncore_cpu_mask);
+
+ uncore_change_context(msr_uncores, cpu, target);
+ uncore_change_context(pci_uncores, cpu, target);
+}
+
+static void __cpuinit uncore_event_init_cpu(int cpu)
+{
+ int i, phys_id;
+
+ phys_id = topology_physical_package_id(cpu);
+ for_each_cpu(i, &uncore_cpu_mask) {
+ if (phys_id == topology_physical_package_id(i))
+ return;
+ }
+
+ cpumask_set_cpu(cpu, &uncore_cpu_mask);
+
+ uncore_change_context(msr_uncores, -1, cpu);
+ uncore_change_context(pci_uncores, -1, cpu);
+}
+
+static int
+ __cpuinit uncore_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long)hcpu;
+
+ /* allocate/free data structure for uncore box */
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_UP_PREPARE:
+ uncore_cpu_prepare(cpu, -1);
+ break;
+ case CPU_STARTING:
+ uncore_cpu_starting(cpu);
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DYING:
+ uncore_cpu_dying(cpu);
+ break;
+ default:
+ break;
+ }
+
+ /* select the cpu that collects uncore events */
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_FAILED:
+ case CPU_STARTING:
+ uncore_event_init_cpu(cpu);
+ break;
+ case CPU_DOWN_PREPARE:
+ uncore_event_exit_cpu(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block uncore_cpu_nb __cpuinitdata = {
+ .notifier_call = uncore_cpu_notifier,
+ /*
+ * to migrate uncore events, our notifier should be executed
+ * before perf core's notifier.
+ */
+ .priority = CPU_PRI_PERF + 1,
+};
+
+static void __init uncore_cpu_setup(void *dummy)
+{
+ uncore_cpu_starting(smp_processor_id());
+}
+
+static int __init uncore_cpu_init(void)
+{
+ int ret, cpu, max_cores;
+
+ max_cores = boot_cpu_data.x86_max_cores;
+ switch (boot_cpu_data.x86_model) {
+ case 26: /* Nehalem */
+ case 30:
+ case 37: /* Westmere */
+ case 44:
+ msr_uncores = nhm_msr_uncores;
+ break;
+ case 42: /* Sandy Bridge */
+ if (snb_uncore_cbox.num_boxes > max_cores)
+ snb_uncore_cbox.num_boxes = max_cores;
+ msr_uncores = snb_msr_uncores;
+ break;
+ case 45: /* Sandy Birdge-EP */
+ if (snbep_uncore_cbox.num_boxes > max_cores)
+ snbep_uncore_cbox.num_boxes = max_cores;
+ msr_uncores = snbep_msr_uncores;
+ break;
+ case 46:
+ msr_uncores = nhmex_msr_uncores;
+ break;
+ default:
+ return 0;
+ }
+
+ ret = uncore_types_init(msr_uncores);
+ if (ret)
+ return ret;
+
+ get_online_cpus();
+
+ for_each_online_cpu(cpu) {
+ int i, phys_id = topology_physical_package_id(cpu);
+
+ for_each_cpu(i, &uncore_cpu_mask) {
+ if (phys_id == topology_physical_package_id(i)) {
+ phys_id = -1;
+ break;
+ }
+ }
+ if (phys_id < 0)
+ continue;
+
+ uncore_cpu_prepare(cpu, phys_id);
+ uncore_event_init_cpu(cpu);
+ }
+ on_each_cpu(uncore_cpu_setup, NULL, 1);
+
+ register_cpu_notifier(&uncore_cpu_nb);
+
+ put_online_cpus();
+
+ return 0;
+}
+
+static int __init uncore_pmus_register(void)
+{
+ struct intel_uncore_pmu *pmu;
+ struct intel_uncore_type *type;
+ int i, j;
+
+ for (i = 0; msr_uncores[i]; i++) {
+ type = msr_uncores[i];
+ for (j = 0; j < type->num_boxes; j++) {
+ pmu = &type->pmus[j];
+ uncore_pmu_register(pmu);
+ }
+ }
+
+ for (i = 0; pci_uncores[i]; i++) {
+ type = pci_uncores[i];
+ for (j = 0; j < type->num_boxes; j++) {
+ pmu = &type->pmus[j];
+ uncore_pmu_register(pmu);
+ }
+ }
+
+ return 0;
+}
+
+static int __init intel_uncore_init(void)
+{
+ int ret;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return -ENODEV;
+
+ ret = uncore_pci_init();
+ if (ret)
+ goto fail;
+ ret = uncore_cpu_init();
+ if (ret) {
+ uncore_pci_exit();
+ goto fail;
+ }
+
+ uncore_pmus_register();
+ return 0;
+fail:
+ return ret;
+}
+device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
new file mode 100644
index 00000000000..c9e5dc56630
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -0,0 +1,621 @@
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/perf_event.h>
+#include "perf_event.h"
+
+#define UNCORE_PMU_NAME_LEN 32
+#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
+
+#define UNCORE_FIXED_EVENT 0xff
+#define UNCORE_PMC_IDX_MAX_GENERIC 8
+#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
+#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+
+#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+
+/* SNB event control */
+#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
+#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
+#define SNB_UNC_CTL_EDGE_DET (1 << 18)
+#define SNB_UNC_CTL_EN (1 << 22)
+#define SNB_UNC_CTL_INVERT (1 << 23)
+#define SNB_UNC_CTL_CMASK_MASK 0x1f000000
+#define NHM_UNC_CTL_CMASK_MASK 0xff000000
+#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0)
+
+#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ SNB_UNC_CTL_UMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET | \
+ SNB_UNC_CTL_INVERT | \
+ SNB_UNC_CTL_CMASK_MASK)
+
+#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
+ SNB_UNC_CTL_UMASK_MASK | \
+ SNB_UNC_CTL_EDGE_DET | \
+ SNB_UNC_CTL_INVERT | \
+ NHM_UNC_CTL_CMASK_MASK)
+
+/* SNB global control register */
+#define SNB_UNC_PERF_GLOBAL_CTL 0x391
+#define SNB_UNC_FIXED_CTR_CTRL 0x394
+#define SNB_UNC_FIXED_CTR 0x395
+
+/* SNB uncore global control */
+#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1)
+#define SNB_UNC_GLOBAL_CTL_EN (1 << 29)
+
+/* SNB Cbo register */
+#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700
+#define SNB_UNC_CBO_0_PER_CTR0 0x706
+#define SNB_UNC_CBO_MSR_OFFSET 0x10
+
+/* NHM global control register */
+#define NHM_UNC_PERF_GLOBAL_CTL 0x391
+#define NHM_UNC_FIXED_CTR 0x394
+#define NHM_UNC_FIXED_CTR_CTRL 0x395
+
+/* NHM uncore global control */
+#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1)
+#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32)
+
+/* NHM uncore register */
+#define NHM_UNC_PERFEVTSEL0 0x3c0
+#define NHM_UNC_UNCORE_PMC0 0x3b0
+
+/* SNB-EP Box level control */
+#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0)
+#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1)
+#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8)
+#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16)
+#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
+ SNBEP_PMON_BOX_CTL_RST_CTRS | \
+ SNBEP_PMON_BOX_CTL_FRZ_EN)
+/* SNB-EP event control */
+#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff
+#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00
+#define SNBEP_PMON_CTL_RST (1 << 17)
+#define SNBEP_PMON_CTL_EDGE_DET (1 << 18)
+#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) /* only for QPI */
+#define SNBEP_PMON_CTL_EN (1 << 22)
+#define SNBEP_PMON_CTL_INVERT (1 << 23)
+#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000
+#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_INVERT | \
+ SNBEP_PMON_CTL_TRESH_MASK)
+
+/* SNB-EP Ubox event control */
+#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000
+#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PMON_CTL_UMASK_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_INVERT | \
+ SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
+
+#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19)
+#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_CBO_PMON_CTL_TID_EN)
+
+/* SNB-EP PCU event control */
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000
+#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30)
+#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31)
+#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_CTL_EV_SEL_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
+ SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
+ SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
+
+#define SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK \
+ (SNBEP_PMON_RAW_EVENT_MASK | \
+ SNBEP_PMON_CTL_EV_SEL_EXT)
+
+/* SNB-EP pci control register */
+#define SNBEP_PCI_PMON_BOX_CTL 0xf4
+#define SNBEP_PCI_PMON_CTL0 0xd8
+/* SNB-EP pci counter register */
+#define SNBEP_PCI_PMON_CTR0 0xa0
+
+/* SNB-EP home agent register */
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40
+#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44
+#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48
+/* SNB-EP memory controller register */
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0
+#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0
+/* SNB-EP QPI register */
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228
+#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238
+#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c
+
+/* SNB-EP Ubox register */
+#define SNBEP_U_MSR_PMON_CTR0 0xc16
+#define SNBEP_U_MSR_PMON_CTL0 0xc10
+
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08
+#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09
+
+/* SNB-EP Cbo register */
+#define SNBEP_C0_MSR_PMON_CTR0 0xd16
+#define SNBEP_C0_MSR_PMON_CTL0 0xd10
+#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04
+#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14
+#define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK 0xfffffc1f
+#define SNBEP_CBO_MSR_OFFSET 0x20
+
+/* SNB-EP PCU register */
+#define SNBEP_PCU_MSR_PMON_CTR0 0xc36
+#define SNBEP_PCU_MSR_PMON_CTL0 0xc30
+#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34
+#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff
+#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc
+#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd
+
+/* NHM-EX event control */
+#define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff
+#define NHMEX_PMON_CTL_UMASK_MASK 0x0000ff00
+#define NHMEX_PMON_CTL_EN_BIT0 (1 << 0)
+#define NHMEX_PMON_CTL_EDGE_DET (1 << 18)
+#define NHMEX_PMON_CTL_PMI_EN (1 << 20)
+#define NHMEX_PMON_CTL_EN_BIT22 (1 << 22)
+#define NHMEX_PMON_CTL_INVERT (1 << 23)
+#define NHMEX_PMON_CTL_TRESH_MASK 0xff000000
+#define NHMEX_PMON_RAW_EVENT_MASK (NHMEX_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_PMON_CTL_UMASK_MASK | \
+ NHMEX_PMON_CTL_EDGE_DET | \
+ NHMEX_PMON_CTL_INVERT | \
+ NHMEX_PMON_CTL_TRESH_MASK)
+
+/* NHM-EX Ubox */
+#define NHMEX_U_MSR_PMON_GLOBAL_CTL 0xc00
+#define NHMEX_U_MSR_PMON_CTR 0xc11
+#define NHMEX_U_MSR_PMON_EV_SEL 0xc10
+
+#define NHMEX_U_PMON_GLOBAL_EN (1 << 0)
+#define NHMEX_U_PMON_GLOBAL_PMI_CORE_SEL 0x0000001e
+#define NHMEX_U_PMON_GLOBAL_EN_ALL (1 << 28)
+#define NHMEX_U_PMON_GLOBAL_RST_ALL (1 << 29)
+#define NHMEX_U_PMON_GLOBAL_FRZ_ALL (1 << 31)
+
+#define NHMEX_U_PMON_RAW_EVENT_MASK \
+ (NHMEX_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_PMON_CTL_EDGE_DET)
+
+/* NHM-EX Cbox */
+#define NHMEX_C0_MSR_PMON_GLOBAL_CTL 0xd00
+#define NHMEX_C0_MSR_PMON_CTR0 0xd11
+#define NHMEX_C0_MSR_PMON_EV_SEL0 0xd10
+#define NHMEX_C_MSR_OFFSET 0x20
+
+/* NHM-EX Bbox */
+#define NHMEX_B0_MSR_PMON_GLOBAL_CTL 0xc20
+#define NHMEX_B0_MSR_PMON_CTR0 0xc31
+#define NHMEX_B0_MSR_PMON_CTL0 0xc30
+#define NHMEX_B_MSR_OFFSET 0x40
+#define NHMEX_B0_MSR_MATCH 0xe45
+#define NHMEX_B0_MSR_MASK 0xe46
+#define NHMEX_B1_MSR_MATCH 0xe4d
+#define NHMEX_B1_MSR_MASK 0xe4e
+
+#define NHMEX_B_PMON_CTL_EN (1 << 0)
+#define NHMEX_B_PMON_CTL_EV_SEL_SHIFT 1
+#define NHMEX_B_PMON_CTL_EV_SEL_MASK \
+ (0x1f << NHMEX_B_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_B_PMON_CTR_SHIFT 6
+#define NHMEX_B_PMON_CTR_MASK \
+ (0x3 << NHMEX_B_PMON_CTR_SHIFT)
+#define NHMEX_B_PMON_RAW_EVENT_MASK \
+ (NHMEX_B_PMON_CTL_EV_SEL_MASK | \
+ NHMEX_B_PMON_CTR_MASK)
+
+/* NHM-EX Sbox */
+#define NHMEX_S0_MSR_PMON_GLOBAL_CTL 0xc40
+#define NHMEX_S0_MSR_PMON_CTR0 0xc51
+#define NHMEX_S0_MSR_PMON_CTL0 0xc50
+#define NHMEX_S_MSR_OFFSET 0x80
+#define NHMEX_S0_MSR_MM_CFG 0xe48
+#define NHMEX_S0_MSR_MATCH 0xe49
+#define NHMEX_S0_MSR_MASK 0xe4a
+#define NHMEX_S1_MSR_MM_CFG 0xe58
+#define NHMEX_S1_MSR_MATCH 0xe59
+#define NHMEX_S1_MSR_MASK 0xe5a
+
+#define NHMEX_S_PMON_MM_CFG_EN (0x1ULL << 63)
+
+/* NHM-EX Mbox */
+#define NHMEX_M0_MSR_GLOBAL_CTL 0xca0
+#define NHMEX_M0_MSR_PMU_DSP 0xca5
+#define NHMEX_M0_MSR_PMU_ISS 0xca6
+#define NHMEX_M0_MSR_PMU_MAP 0xca7
+#define NHMEX_M0_MSR_PMU_MSC_THR 0xca8
+#define NHMEX_M0_MSR_PMU_PGT 0xca9
+#define NHMEX_M0_MSR_PMU_PLD 0xcaa
+#define NHMEX_M0_MSR_PMU_ZDP_CTL_FVC 0xcab
+#define NHMEX_M0_MSR_PMU_CTL0 0xcb0
+#define NHMEX_M0_MSR_PMU_CNT0 0xcb1
+#define NHMEX_M_MSR_OFFSET 0x40
+#define NHMEX_M0_MSR_PMU_MM_CFG 0xe54
+#define NHMEX_M1_MSR_PMU_MM_CFG 0xe5c
+
+#define NHMEX_M_PMON_MM_CFG_EN (1ULL << 63)
+#define NHMEX_M_PMON_ADDR_MATCH_MASK 0x3ffffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_MASK 0x7ffffffULL
+#define NHMEX_M_PMON_ADDR_MASK_SHIFT 34
+
+#define NHMEX_M_PMON_CTL_EN (1 << 0)
+#define NHMEX_M_PMON_CTL_PMI_EN (1 << 1)
+#define NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT 2
+#define NHMEX_M_PMON_CTL_COUNT_MODE_MASK \
+ (0x3 << NHMEX_M_PMON_CTL_COUNT_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT 4
+#define NHMEX_M_PMON_CTL_STORAGE_MODE_MASK \
+ (0x3 << NHMEX_M_PMON_CTL_STORAGE_MODE_SHIFT)
+#define NHMEX_M_PMON_CTL_WRAP_MODE (1 << 6)
+#define NHMEX_M_PMON_CTL_FLAG_MODE (1 << 7)
+#define NHMEX_M_PMON_CTL_INC_SEL_SHIFT 9
+#define NHMEX_M_PMON_CTL_INC_SEL_MASK \
+ (0x1f << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT 19
+#define NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK \
+ (0x7 << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT)
+#define NHMEX_M_PMON_RAW_EVENT_MASK \
+ (NHMEX_M_PMON_CTL_COUNT_MODE_MASK | \
+ NHMEX_M_PMON_CTL_STORAGE_MODE_MASK | \
+ NHMEX_M_PMON_CTL_WRAP_MODE | \
+ NHMEX_M_PMON_CTL_FLAG_MODE | \
+ NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+ NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
+
+
+#define NHMEX_M_PMON_ZDP_CTL_FVC_FVID_MASK 0x1f
+#define NHMEX_M_PMON_ZDP_CTL_FVC_BCMD_MASK (0x7 << 5)
+#define NHMEX_M_PMON_ZDP_CTL_FVC_RSP_MASK (0x7 << 8)
+#define NHMEX_M_PMON_ZDP_CTL_FVC_PBOX_INIT_ERR (1 << 23)
+#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK \
+ (NHMEX_M_PMON_ZDP_CTL_FVC_FVID_MASK | \
+ NHMEX_M_PMON_ZDP_CTL_FVC_BCMD_MASK | \
+ NHMEX_M_PMON_ZDP_CTL_FVC_RSP_MASK | \
+ NHMEX_M_PMON_ZDP_CTL_FVC_PBOX_INIT_ERR)
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n)))
+
+/*
+ * use the 9~13 bits to select event If the 7th bit is not set,
+ * otherwise use the 19~21 bits to select event.
+ */
+#define MBOX_INC_SEL(x) ((x) << NHMEX_M_PMON_CTL_INC_SEL_SHIFT)
+#define MBOX_SET_FLAG_SEL(x) (((x) << NHMEX_M_PMON_CTL_SET_FLAG_SEL_SHIFT) | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_MASK (NHMEX_M_PMON_CTL_INC_SEL_MASK | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_SET_FLAG_SEL_MASK (NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK | \
+ NHMEX_M_PMON_CTL_FLAG_MODE)
+#define MBOX_INC_SEL_EXTAR_REG(c, r) \
+ EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+ MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r)
+#define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \
+ EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \
+ MBOX_SET_FLAG_SEL_MASK, \
+ (u64)-1, NHMEX_M_##r)
+
+/* NHM-EX Rbox */
+#define NHMEX_R_MSR_GLOBAL_CTL 0xe00
+#define NHMEX_R_MSR_PMON_CTL0 0xe10
+#define NHMEX_R_MSR_PMON_CNT0 0xe11
+#define NHMEX_R_MSR_OFFSET 0x20
+
+#define NHMEX_R_MSR_PORTN_QLX_CFG(n) \
+ ((n) < 4 ? (0xe0c + (n)) : (0xe2c + (n) - 4))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG0(n) (0xe04 + (n))
+#define NHMEX_R_MSR_PORTN_IPERF_CFG1(n) (0xe24 + (n))
+#define NHMEX_R_MSR_PORTN_XBR_OFFSET(n) \
+ (((n) < 4 ? 0 : 0x10) + (n) * 4)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) \
+ (0xe60 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET1_MASK(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(n) + 2)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) \
+ (0xe70 + NHMEX_R_MSR_PORTN_XBR_OFFSET(n))
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 1)
+#define NHMEX_R_MSR_PORTN_XBR_SET2_MASK(n) \
+ (NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(n) + 2)
+
+#define NHMEX_R_PMON_CTL_EN (1 << 0)
+#define NHMEX_R_PMON_CTL_EV_SEL_SHIFT 1
+#define NHMEX_R_PMON_CTL_EV_SEL_MASK \
+ (0x1f << NHMEX_R_PMON_CTL_EV_SEL_SHIFT)
+#define NHMEX_R_PMON_CTL_PMI_EN (1 << 6)
+#define NHMEX_R_PMON_RAW_EVENT_MASK NHMEX_R_PMON_CTL_EV_SEL_MASK
+
+/* NHM-EX Wbox */
+#define NHMEX_W_MSR_GLOBAL_CTL 0xc80
+#define NHMEX_W_MSR_PMON_CNT0 0xc90
+#define NHMEX_W_MSR_PMON_EVT_SEL0 0xc91
+#define NHMEX_W_MSR_PMON_FIXED_CTR 0x394
+#define NHMEX_W_MSR_PMON_FIXED_CTL 0x395
+
+#define NHMEX_W_PMON_GLOBAL_FIXED_EN (1ULL << 31)
+
+struct intel_uncore_ops;
+struct intel_uncore_pmu;
+struct intel_uncore_box;
+struct uncore_event_desc;
+
+struct intel_uncore_type {
+ const char *name;
+ int num_counters;
+ int num_boxes;
+ int perf_ctr_bits;
+ int fixed_ctr_bits;
+ unsigned perf_ctr;
+ unsigned event_ctl;
+ unsigned event_mask;
+ unsigned fixed_ctr;
+ unsigned fixed_ctl;
+ unsigned box_ctl;
+ unsigned msr_offset;
+ unsigned num_shared_regs:8;
+ unsigned single_fixed:1;
+ unsigned pair_ctr_ctl:1;
+ struct event_constraint unconstrainted;
+ struct event_constraint *constraints;
+ struct intel_uncore_pmu *pmus;
+ struct intel_uncore_ops *ops;
+ struct uncore_event_desc *event_descs;
+ const struct attribute_group *attr_groups[3];
+};
+
+#define format_group attr_groups[0]
+
+struct intel_uncore_ops {
+ void (*init_box)(struct intel_uncore_box *);
+ void (*disable_box)(struct intel_uncore_box *);
+ void (*enable_box)(struct intel_uncore_box *);
+ void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
+ void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
+ u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
+ int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
+ struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
+ struct perf_event *);
+ void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
+};
+
+struct intel_uncore_pmu {
+ struct pmu pmu;
+ char name[UNCORE_PMU_NAME_LEN];
+ int pmu_idx;
+ int func_id;
+ struct intel_uncore_type *type;
+ struct intel_uncore_box ** __percpu box;
+ struct list_head box_list;
+};
+
+struct intel_uncore_extra_reg {
+ raw_spinlock_t lock;
+ u64 config, config1, config2;
+ atomic_t ref;
+};
+
+struct intel_uncore_box {
+ int phys_id;
+ int n_active; /* number of active events */
+ int n_events;
+ int cpu; /* cpu to collect events */
+ unsigned long flags;
+ atomic_t refcnt;
+ struct perf_event *events[UNCORE_PMC_IDX_MAX];
+ struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
+ unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
+ u64 tags[UNCORE_PMC_IDX_MAX];
+ struct pci_dev *pci_dev;
+ struct intel_uncore_pmu *pmu;
+ struct hrtimer hrtimer;
+ struct list_head list;
+ struct intel_uncore_extra_reg shared_regs[0];
+};
+
+#define UNCORE_BOX_FLAG_INITIATED 0
+
+struct uncore_event_desc {
+ struct kobj_attribute attr;
+ const char *config;
+};
+
+#define INTEL_UNCORE_EVENT_DESC(_name, _config) \
+{ \
+ .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \
+ .config = _config, \
+}
+
+#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
+static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_##_var = \
+ __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
+
+
+static ssize_t uncore_event_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct uncore_event_desc *event =
+ container_of(attr, struct uncore_event_desc, attr);
+ return sprintf(buf, "%s", event->config);
+}
+
+static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
+{
+ return box->pmu->type->box_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
+{
+ return box->pmu->type->fixed_ctl;
+}
+
+static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
+{
+ return box->pmu->type->fixed_ctr;
+}
+
+static inline
+unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
+{
+ return idx * 4 + box->pmu->type->event_ctl;
+}
+
+static inline
+unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+ return idx * 8 + box->pmu->type->perf_ctr;
+}
+
+static inline
+unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
+{
+ if (!box->pmu->type->box_ctl)
+ return 0;
+ return box->pmu->type->box_ctl +
+ box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
+{
+ if (!box->pmu->type->fixed_ctl)
+ return 0;
+ return box->pmu->type->fixed_ctl +
+ box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
+{
+ return box->pmu->type->fixed_ctr +
+ box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
+{
+ return box->pmu->type->event_ctl +
+ (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+ box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+ return box->pmu->type->perf_ctr +
+ (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+ box->pmu->type->msr_offset * box->pmu->pmu_idx;
+}
+
+static inline
+unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
+{
+ if (box->pci_dev)
+ return uncore_pci_fixed_ctl(box);
+ else
+ return uncore_msr_fixed_ctl(box);
+}
+
+static inline
+unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
+{
+ if (box->pci_dev)
+ return uncore_pci_fixed_ctr(box);
+ else
+ return uncore_msr_fixed_ctr(box);
+}
+
+static inline
+unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
+{
+ if (box->pci_dev)
+ return uncore_pci_event_ctl(box, idx);
+ else
+ return uncore_msr_event_ctl(box, idx);
+}
+
+static inline
+unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
+{
+ if (box->pci_dev)
+ return uncore_pci_perf_ctr(box, idx);
+ else
+ return uncore_msr_perf_ctr(box, idx);
+}
+
+static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
+{
+ return box->pmu->type->perf_ctr_bits;
+}
+
+static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
+{
+ return box->pmu->type->fixed_ctr_bits;
+}
+
+static inline int uncore_num_counters(struct intel_uncore_box *box)
+{
+ return box->pmu->type->num_counters;
+}
+
+static inline void uncore_disable_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->type->ops->disable_box)
+ box->pmu->type->ops->disable_box(box);
+}
+
+static inline void uncore_enable_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->type->ops->enable_box)
+ box->pmu->type->ops->enable_box(box);
+}
+
+static inline void uncore_disable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ box->pmu->type->ops->disable_event(box, event);
+}
+
+static inline void uncore_enable_event(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ box->pmu->type->ops->enable_event(box, event);
+}
+
+static inline u64 uncore_read_counter(struct intel_uncore_box *box,
+ struct perf_event *event)
+{
+ return box->pmu->type->ops->read_counter(box, event);
+}
+
+static inline void uncore_box_init(struct intel_uncore_box *box)
+{
+ if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
+ if (box->pmu->type->ops->init_box)
+ box->pmu->type->ops->init_box(box);
+ }
+}
+
+static inline bool uncore_box_is_fake(struct intel_uncore_box *box)
+{
+ return (box->phys_id < 0);
+}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 47124a73dd7..92c7e39a079 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void)
* So at moment let leave metrics turned on forever -- it's
* ok for now but need to be revisited!
*
- * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
- * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
+ * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0);
+ * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
*/
}
@@ -909,7 +909,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
* state we need to clear P4_CCCR_OVF, otherwise interrupt get
* asserted again and again
*/
- (void)checking_wrmsrl(hwc->config_base,
+ (void)wrmsrl_safe(hwc->config_base,
(u64)(p4_config_unpack_cccr(hwc->config)) &
~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
}
@@ -943,8 +943,8 @@ static void p4_pmu_enable_pebs(u64 config)
bind = &p4_pebs_bind_map[idx];
- (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
- (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
+ (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
+ (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
}
static void p4_pmu_enable_event(struct perf_event *event)
@@ -978,8 +978,8 @@ static void p4_pmu_enable_event(struct perf_event *event)
*/
p4_pmu_enable_pebs(hwc->config);
- (void)checking_wrmsrl(escr_addr, escr_conf);
- (void)checking_wrmsrl(hwc->config_base,
+ (void)wrmsrl_safe(escr_addr, escr_conf);
+ (void)wrmsrl_safe(hwc->config_base,
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
}
@@ -1325,7 +1325,7 @@ __init int p4_pmu_init(void)
unsigned int low, high;
/* If we get stripped -- indexing fails */
- BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
+ BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
rdmsr(MSR_IA32_MISC_ENABLE, low, high);
if (!(low & (1 << 7))) {
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 32bcfc7dd23..e4dd0f7a045 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -71,7 +71,7 @@ p6_pmu_disable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- (void)checking_wrmsrl(hwc->config_base, val);
+ (void)wrmsrl_safe(hwc->config_base, val);
}
static void p6_pmu_enable_event(struct perf_event *event)
@@ -84,7 +84,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- (void)checking_wrmsrl(hwc->config_base, val);
+ (void)wrmsrl_safe(hwc->config_base, val);
}
PMU_FORMAT_ATTR(event, "config:0-7" );
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index addf9e82a7f..ee8e9abc859 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
const struct cpuid_bit *cb;
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
- { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 },
+ { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 },
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
{ X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
{ X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c
deleted file mode 100644
index a640ae5ad20..00000000000
--- a/arch/x86/kernel/cpu/sched.c
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <linux/sched.h>
-#include <linux/math64.h>
-#include <linux/percpu.h>
-#include <linux/irqflags.h>
-
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-
-#ifdef CONFIG_SMP
-
-static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched);
-
-static unsigned long scale_aperfmperf(void)
-{
- struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched);
- unsigned long ratio, flags;
-
- local_irq_save(flags);
- get_aperfmperf(&val);
- local_irq_restore(flags);
-
- ratio = calc_aperfmperf_ratio(old, &val);
- *old = val;
-
- return ratio;
-}
-
-unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
-{
- /*
- * do aperf/mperf on the cpu level because it includes things
- * like turbo mode, which are relevant to full cores.
- */
- if (boot_cpu_has(X86_FEATURE_APERFMPERF))
- return scale_aperfmperf();
-
- /*
- * maybe have something cpufreq here
- */
-
- return default_scale_freq_power(sd, cpu);
-}
-
-unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
-{
- /*
- * aperf/mperf already includes the smt gain
- */
- if (boot_cpu_has(X86_FEATURE_APERFMPERF))
- return SCHED_LOAD_SCALE;
-
- return default_scale_smt_power(sd, cpu);
-}
-
-#endif
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 571246d81ed..ae42418bc50 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -27,8 +27,8 @@ static int die_counter;
void printk_address(unsigned long address, int reliable)
{
- printk(" [<%p>] %s%pB\n", (void *) address,
- reliable ? "" : "? ", (void *) address);
+ pr_cont(" [<%p>] %s%pB\n",
+ (void *)address, reliable ? "" : "? ", (void *)address);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -271,6 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
return 1;
+ print_modules();
show_regs(regs);
#ifdef CONFIG_X86_32
if (user_mode_vm(regs)) {
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index e0b1d783daa..1038a417ea5 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -73,11 +73,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (kstack_end(stack))
break;
if (i && ((i % STACKSLOTS_PER_LINE) == 0))
- printk(KERN_CONT "\n");
- printk(KERN_CONT " %08lx", *stack++);
+ pr_cont("\n");
+ pr_cont(" %08lx", *stack++);
touch_nmi_watchdog();
}
- printk(KERN_CONT "\n");
+ pr_cont("\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
@@ -86,12 +86,11 @@ void show_regs(struct pt_regs *regs)
{
int i;
- print_modules();
__show_regs(regs, !user_mode_vm(regs));
- printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
- TASK_COMM_LEN, current->comm, task_pid_nr(current),
- current_thread_info(), current, task_thread_info(current));
+ pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
+ TASK_COMM_LEN, current->comm, task_pid_nr(current),
+ current_thread_info(), current, task_thread_info(current));
/*
* When in-kernel, we also print out the stack and code at the
* time of the fault..
@@ -102,10 +101,10 @@ void show_regs(struct pt_regs *regs)
unsigned char c;
u8 *ip;
- printk(KERN_EMERG "Stack:\n");
+ pr_emerg("Stack:\n");
show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
- printk(KERN_EMERG "Code: ");
+ pr_emerg("Code:");
ip = (u8 *)regs->ip - code_prologue;
if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
@@ -116,16 +115,16 @@ void show_regs(struct pt_regs *regs)
for (i = 0; i < code_len; i++, ip++) {
if (ip < (u8 *)PAGE_OFFSET ||
probe_kernel_address(ip, c)) {
- printk(KERN_CONT " Bad EIP value.");
+ pr_cont(" Bad EIP value.");
break;
}
if (ip == (u8 *)regs->ip)
- printk(KERN_CONT "<%02x> ", c);
+ pr_cont(" <%02x>", c);
else
- printk(KERN_CONT "%02x ", c);
+ pr_cont(" %02x", c);
}
}
- printk(KERN_CONT "\n");
+ pr_cont("\n");
}
int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 791b76122aa..b653675d528 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -228,20 +228,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (stack >= irq_stack && stack <= irq_stack_end) {
if (stack == irq_stack_end) {
stack = (unsigned long *) (irq_stack_end[-1]);
- printk(KERN_CONT " <EOI> ");
+ pr_cont(" <EOI> ");
}
} else {
if (((long) stack & (THREAD_SIZE-1)) == 0)
break;
}
if (i && ((i % STACKSLOTS_PER_LINE) == 0))
- printk(KERN_CONT "\n");
- printk(KERN_CONT " %016lx", *stack++);
+ pr_cont("\n");
+ pr_cont(" %016lx", *stack++);
touch_nmi_watchdog();
}
preempt_enable();
- printk(KERN_CONT "\n");
+ pr_cont("\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
@@ -254,10 +254,9 @@ void show_regs(struct pt_regs *regs)
sp = regs->sp;
printk("CPU %d ", cpu);
- print_modules();
__show_regs(regs, 1);
- printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
- cur->comm, cur->pid, task_thread_info(cur), cur);
+ printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n",
+ cur->comm, cur->pid, task_thread_info(cur), cur);
/*
* When in-kernel, we also print out the stack and code at the
@@ -284,16 +283,16 @@ void show_regs(struct pt_regs *regs)
for (i = 0; i < code_len; i++, ip++) {
if (ip < (u8 *)PAGE_OFFSET ||
probe_kernel_address(ip, c)) {
- printk(KERN_CONT " Bad RIP value.");
+ pr_cont(" Bad RIP value.");
break;
}
if (ip == (u8 *)regs->ip)
- printk(KERN_CONT "<%02x> ", c);
+ pr_cont("<%02x> ", c);
else
- printk(KERN_CONT "%02x ", c);
+ pr_cont("%02x ", c);
}
}
- printk(KERN_CONT "\n");
+ pr_cont("\n");
}
int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 41857970517..ed858e9e9a7 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -944,7 +944,7 @@ void __init e820_reserve_resources(void)
for (i = 0; i < e820_saved.nr_map; i++) {
struct e820entry *entry = &e820_saved.map[i];
firmware_map_add_early(entry->addr,
- entry->addr + entry->size - 1,
+ entry->addr + entry->size,
e820_type_to_string(entry->type));
}
}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7d65133b51b..69babd8c834 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1048,24 +1048,6 @@ apicinterrupt LOCAL_TIMER_VECTOR \
apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
-#ifdef CONFIG_SMP
- ALIGN
- INTR_FRAME
-.irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
- 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
-.if NUM_INVALIDATE_TLB_VECTORS > \idx
-ENTRY(invalidate_interrupt\idx)
- pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx)
- jmp .Lcommon_invalidate_interrupt0
- CFI_ADJUST_CFA_OFFSET -8
-END(invalidate_interrupt\idx)
-.endif
-.endr
- CFI_ENDPROC
-apicinterrupt INVALIDATE_TLB_VECTOR_START, \
- invalidate_interrupt0, smp_invalidate_interrupt
-#endif
-
apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt
apicinterrupt THERMAL_APIC_VECTOR \
@@ -1758,10 +1740,30 @@ end_repeat_nmi:
*/
call save_paranoid
DEFAULT_FRAME 0
+
+ /*
+ * Save off the CR2 register. If we take a page fault in the NMI then
+ * it could corrupt the CR2 value. If the NMI preempts a page fault
+ * handler before it was able to read the CR2 register, and then the
+ * NMI itself takes a page fault, the page fault that was preempted
+ * will read the information from the NMI page fault and not the
+ * origin fault. Save it off and restore it if it changes.
+ * Use the r12 callee-saved register.
+ */
+ movq %cr2, %r12
+
/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
movq %rsp,%rdi
movq $-1,%rsi
call do_nmi
+
+ /* Did the NMI take a page fault? Restore cr2 if it did */
+ movq %cr2, %rcx
+ cmpq %rcx, %r12
+ je 1f
+ movq %r12, %cr2
+1:
+
testl %ebx,%ebx /* swapgs needed? */
jnz nmi_restore
nmi_swapgs:
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3dafc6003b7..7ad683d7864 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -294,9 +294,9 @@ void fixup_irqs(void)
raw_spin_unlock(&desc->lock);
if (break_affinity && set_affinity)
- printk("Broke affinity for irq %i\n", irq);
+ pr_notice("Broke affinity for irq %i\n", irq);
else if (!set_affinity)
- printk("Cannot set affinity for irq %i\n", irq);
+ pr_notice("Cannot set affinity for irq %i\n", irq);
}
/*
@@ -328,6 +328,7 @@ void fixup_irqs(void)
chip->irq_retrigger(data);
raw_spin_unlock(&desc->lock);
}
+ __this_cpu_write(vector_irq[vector], -1);
}
}
#endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 252981afd6c..6e03b0d6913 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -171,79 +171,6 @@ static void __init smp_intr_init(void)
*/
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
- /* IPIs for invalidation */
-#define ALLOC_INVTLB_VEC(NR) \
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \
- invalidate_interrupt##NR)
-
- switch (NUM_INVALIDATE_TLB_VECTORS) {
- default:
- ALLOC_INVTLB_VEC(31);
- case 31:
- ALLOC_INVTLB_VEC(30);
- case 30:
- ALLOC_INVTLB_VEC(29);
- case 29:
- ALLOC_INVTLB_VEC(28);
- case 28:
- ALLOC_INVTLB_VEC(27);
- case 27:
- ALLOC_INVTLB_VEC(26);
- case 26:
- ALLOC_INVTLB_VEC(25);
- case 25:
- ALLOC_INVTLB_VEC(24);
- case 24:
- ALLOC_INVTLB_VEC(23);
- case 23:
- ALLOC_INVTLB_VEC(22);
- case 22:
- ALLOC_INVTLB_VEC(21);
- case 21:
- ALLOC_INVTLB_VEC(20);
- case 20:
- ALLOC_INVTLB_VEC(19);
- case 19:
- ALLOC_INVTLB_VEC(18);
- case 18:
- ALLOC_INVTLB_VEC(17);
- case 17:
- ALLOC_INVTLB_VEC(16);
- case 16:
- ALLOC_INVTLB_VEC(15);
- case 15:
- ALLOC_INVTLB_VEC(14);
- case 14:
- ALLOC_INVTLB_VEC(13);
- case 13:
- ALLOC_INVTLB_VEC(12);
- case 12:
- ALLOC_INVTLB_VEC(11);
- case 11:
- ALLOC_INVTLB_VEC(10);
- case 10:
- ALLOC_INVTLB_VEC(9);
- case 9:
- ALLOC_INVTLB_VEC(8);
- case 8:
- ALLOC_INVTLB_VEC(7);
- case 7:
- ALLOC_INVTLB_VEC(6);
- case 6:
- ALLOC_INVTLB_VEC(5);
- case 5:
- ALLOC_INVTLB_VEC(4);
- case 4:
- ALLOC_INVTLB_VEC(3);
- case 3:
- ALLOC_INVTLB_VEC(2);
- case 2:
- ALLOC_INVTLB_VEC(1);
- case 1:
- ALLOC_INVTLB_VEC(0);
- break;
- }
-
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 1d5d31ea686..dc1404bf8e4 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -107,7 +107,7 @@ static int __init create_setup_data_nodes(struct dentry *parent)
{
struct setup_data_node *node;
struct setup_data *data;
- int error = -ENOMEM;
+ int error;
struct dentry *d;
struct page *pg;
u64 pa_data;
@@ -121,8 +121,10 @@ static int __init create_setup_data_nodes(struct dentry *parent)
while (pa_data) {
node = kmalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
+ if (!node) {
+ error = -ENOMEM;
goto err_dir;
+ }
pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
if (PageHighMem(pg)) {
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8bfb6146f75..3f61904365c 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -444,12 +444,12 @@ void kgdb_roundup_cpus(unsigned long flags)
/**
* kgdb_arch_handle_exception - Handle architecture specific GDB packets.
- * @vector: The error vector of the exception that happened.
+ * @e_vector: The error vector of the exception that happened.
* @signo: The signal number of the exception that happened.
* @err_code: The error code of the exception that happened.
- * @remcom_in_buffer: The buffer of the packet we have read.
- * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into.
- * @regs: The &struct pt_regs of the current process.
+ * @remcomInBuffer: The buffer of the packet we have read.
+ * @remcomOutBuffer: The buffer of %BUFMAX bytes to write a packet into.
+ * @linux_regs: The &struct pt_regs of the current process.
*
* This function MUST handle the 'c' and 's' command packets,
* as well packets to set / remove a hardware breakpoint, if used.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e554e5ad2fe..c1d61ee4b4f 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -39,6 +39,9 @@
#include <asm/desc.h>
#include <asm/tlbflush.h>
#include <asm/idle.h>
+#include <asm/apic.h>
+#include <asm/apicdef.h>
+#include <asm/hypervisor.h>
static int kvmapf = 1;
@@ -283,6 +286,22 @@ static void kvm_register_steal_time(void)
cpu, __pa(st));
}
+static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
+
+static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
+{
+ /**
+ * This relies on __test_and_clear_bit to modify the memory
+ * in a way that is atomic with respect to the local CPU.
+ * The hypervisor only accesses this memory from the local CPU so
+ * there's no need for lock or memory barriers.
+ * An optimization barrier is implied in apic write.
+ */
+ if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi)))
+ return;
+ apic_write(APIC_EOI, APIC_EOI_ACK);
+}
+
void __cpuinit kvm_guest_cpu_init(void)
{
if (!kvm_para_available())
@@ -300,11 +319,20 @@ void __cpuinit kvm_guest_cpu_init(void)
smp_processor_id());
}
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
+ unsigned long pa;
+ /* Size alignment is implied but just to make it explicit. */
+ BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
+ __get_cpu_var(kvm_apic_eoi) = 0;
+ pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED;
+ wrmsrl(MSR_KVM_PV_EOI_EN, pa);
+ }
+
if (has_steal_clock)
kvm_register_steal_time();
}
-static void kvm_pv_disable_apf(void *unused)
+static void kvm_pv_disable_apf(void)
{
if (!__get_cpu_var(apf_reason).enabled)
return;
@@ -316,11 +344,23 @@ static void kvm_pv_disable_apf(void *unused)
smp_processor_id());
}
+static void kvm_pv_guest_cpu_reboot(void *unused)
+{
+ /*
+ * We disable PV EOI before we load a new kernel by kexec,
+ * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
+ * New kernel can re-enable when it boots.
+ */
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+ wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ kvm_pv_disable_apf();
+}
+
static int kvm_pv_reboot_notify(struct notifier_block *nb,
unsigned long code, void *unused)
{
if (code == SYS_RESTART)
- on_each_cpu(kvm_pv_disable_apf, NULL, 1);
+ on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
return NOTIFY_DONE;
}
@@ -371,7 +411,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
static void kvm_guest_cpu_offline(void *dummy)
{
kvm_disable_steal_time();
- kvm_pv_disable_apf(NULL);
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+ wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ kvm_pv_disable_apf();
apf_task_wake_all();
}
@@ -424,6 +466,9 @@ void __init kvm_guest_init(void)
pv_time_ops.steal_clock = kvm_steal_clock;
}
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+ apic_set_eoi_write(kvm_guest_apic_eoi_write);
+
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
register_cpu_notifier(&kvm_cpu_notifier);
@@ -432,6 +477,19 @@ void __init kvm_guest_init(void)
#endif
}
+static bool __init kvm_detect(void)
+{
+ if (!kvm_para_available())
+ return false;
+ return true;
+}
+
+const struct hypervisor_x86 x86_hyper_kvm __refconst = {
+ .name = "KVM",
+ .detect = kvm_detect,
+};
+EXPORT_SYMBOL_GPL(x86_hyper_kvm);
+
static __init int activate_jump_labels(void)
{
if (has_steal_clock) {
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 086eb58c6e8..f1b42b3a186 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -120,11 +120,6 @@ bool kvm_check_and_clear_guest_paused(void)
bool ret = false;
struct pvclock_vcpu_time_info *src;
- /*
- * per_cpu() is safe here because this function is only called from
- * timer functions where preemption is already disabled.
- */
- WARN_ON(!in_atomic());
src = &__get_cpu_var(hv_clock);
if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
__this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fbdfc691718..4873e62db6a 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -87,6 +87,7 @@
#include <asm/microcode.h>
#include <asm/processor.h>
#include <asm/cpu_device_id.h>
+#include <asm/perf_event.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -277,7 +278,6 @@ static int reload_for_cpu(int cpu)
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
int err = 0;
- mutex_lock(&microcode_mutex);
if (uci->valid) {
enum ucode_state ustate;
@@ -288,7 +288,6 @@ static int reload_for_cpu(int cpu)
if (ustate == UCODE_ERROR)
err = -EINVAL;
}
- mutex_unlock(&microcode_mutex);
return err;
}
@@ -298,19 +297,31 @@ static ssize_t reload_store(struct device *dev,
const char *buf, size_t size)
{
unsigned long val;
- int cpu = dev->id;
- ssize_t ret = 0;
+ int cpu;
+ ssize_t ret = 0, tmp_ret;
ret = kstrtoul(buf, 0, &val);
if (ret)
return ret;
- if (val == 1) {
- get_online_cpus();
- if (cpu_online(cpu))
- ret = reload_for_cpu(cpu);
- put_online_cpus();
+ if (val != 1)
+ return size;
+
+ get_online_cpus();
+ mutex_lock(&microcode_mutex);
+ for_each_online_cpu(cpu) {
+ tmp_ret = reload_for_cpu(cpu);
+ if (tmp_ret != 0)
+ pr_warn("Error reloading microcode on CPU %d\n", cpu);
+
+ /* save retval of the first encountered reload error */
+ if (!ret)
+ ret = tmp_ret;
}
+ if (!ret)
+ perf_check_microcode();
+ mutex_unlock(&microcode_mutex);
+ put_online_cpus();
if (!ret)
ret = size;
@@ -339,7 +350,6 @@ static DEVICE_ATTR(version, 0400, version_show, NULL);
static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
static struct attribute *mc_default_attrs[] = {
- &dev_attr_reload.attr,
&dev_attr_version.attr,
&dev_attr_processor_flags.attr,
NULL
@@ -504,7 +514,7 @@ static struct notifier_block __refdata mc_cpu_notifier = {
#ifdef MODULE
/* Autoload on Intel and AMD systems */
-static const struct x86_cpu_id microcode_id[] = {
+static const struct x86_cpu_id __initconst microcode_id[] = {
#ifdef CONFIG_MICROCODE_INTEL
{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
#endif
@@ -516,6 +526,16 @@ static const struct x86_cpu_id microcode_id[] = {
MODULE_DEVICE_TABLE(x86cpu, microcode_id);
#endif
+static struct attribute *cpu_root_microcode_attrs[] = {
+ &dev_attr_reload.attr,
+ NULL
+};
+
+static struct attribute_group cpu_root_microcode_group = {
+ .name = "microcode",
+ .attrs = cpu_root_microcode_attrs,
+};
+
static int __init microcode_init(void)
{
struct cpuinfo_x86 *c = &cpu_data(0);
@@ -540,16 +560,25 @@ static int __init microcode_init(void)
mutex_lock(&microcode_mutex);
error = subsys_interface_register(&mc_cpu_interface);
-
+ if (!error)
+ perf_check_microcode();
mutex_unlock(&microcode_mutex);
put_online_cpus();
if (error)
goto out_pdev;
+ error = sysfs_create_group(&cpu_subsys.dev_root->kobj,
+ &cpu_root_microcode_group);
+
+ if (error) {
+ pr_err("Error creating microcode group!\n");
+ goto out_driver;
+ }
+
error = microcode_dev_init();
if (error)
- goto out_driver;
+ goto out_ucode_group;
register_syscore_ops(&mc_syscore_ops);
register_hotcpu_notifier(&mc_cpu_notifier);
@@ -559,7 +588,11 @@ static int __init microcode_init(void)
return 0;
-out_driver:
+ out_ucode_group:
+ sysfs_remove_group(&cpu_subsys.dev_root->kobj,
+ &cpu_root_microcode_group);
+
+ out_driver:
get_online_cpus();
mutex_lock(&microcode_mutex);
@@ -568,7 +601,7 @@ out_driver:
mutex_unlock(&microcode_mutex);
put_online_cpus();
-out_pdev:
+ out_pdev:
platform_device_unregister(microcode_pdev);
return error;
@@ -584,6 +617,9 @@ static void __exit microcode_exit(void)
unregister_hotcpu_notifier(&mc_cpu_notifier);
unregister_syscore_ops(&mc_syscore_ops);
+ sysfs_remove_group(&cpu_subsys.dev_root->kobj,
+ &cpu_root_microcode_group);
+
get_online_cpus();
mutex_lock(&microcode_mutex);
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index f21fd94ac89..216a4d754b0 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -15,6 +15,9 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/moduleloader.h>
#include <linux/elf.h>
#include <linux/vmalloc.h>
@@ -30,9 +33,14 @@
#include <asm/pgtable.h>
#if 0
-#define DEBUGP printk
+#define DEBUGP(fmt, ...) \
+ printk(KERN_DEBUG fmt, ##__VA_ARGS__)
#else
-#define DEBUGP(fmt...)
+#define DEBUGP(fmt, ...) \
+do { \
+ if (0) \
+ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
+} while (0)
#endif
void *module_alloc(unsigned long size)
@@ -56,8 +64,8 @@ int apply_relocate(Elf32_Shdr *sechdrs,
Elf32_Sym *sym;
uint32_t *location;
- DEBUGP("Applying relocate section %u to %u\n", relsec,
- sechdrs[relsec].sh_info);
+ DEBUGP("Applying relocate section %u to %u\n",
+ relsec, sechdrs[relsec].sh_info);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
/* This is where to make the change */
location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -73,11 +81,11 @@ int apply_relocate(Elf32_Shdr *sechdrs,
*location += sym->st_value;
break;
case R_386_PC32:
- /* Add the value, subtract its postition */
+ /* Add the value, subtract its position */
*location += sym->st_value - (uint32_t)location;
break;
default:
- printk(KERN_ERR "module %s: Unknown relocation: %u\n",
+ pr_err("%s: Unknown relocation: %u\n",
me->name, ELF32_R_TYPE(rel[i].r_info));
return -ENOEXEC;
}
@@ -97,8 +105,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
void *loc;
u64 val;
- DEBUGP("Applying relocate section %u to %u\n", relsec,
- sechdrs[relsec].sh_info);
+ DEBUGP("Applying relocate section %u to %u\n",
+ relsec, sechdrs[relsec].sh_info);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
/* This is where to make the change */
loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -110,8 +118,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
+ ELF64_R_SYM(rel[i].r_info);
DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
- (int)ELF64_R_TYPE(rel[i].r_info),
- sym->st_value, rel[i].r_addend, (u64)loc);
+ (int)ELF64_R_TYPE(rel[i].r_info),
+ sym->st_value, rel[i].r_addend, (u64)loc);
val = sym->st_value + rel[i].r_addend;
@@ -140,7 +148,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
#endif
break;
default:
- printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
+ pr_err("%s: Unknown rela relocation: %llu\n",
me->name, ELF64_R_TYPE(rel[i].r_info));
return -ENOEXEC;
}
@@ -148,9 +156,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
return 0;
overflow:
- printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
+ pr_err("overflow in relocation type %d val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), val);
- printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
+ pr_err("`%s' likely not compiled with -mcmodel=kernel\n",
me->name);
return -ENOEXEC;
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index a0b2f84457b..f84f5c57de3 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
#ifdef CONFIG_X86_32
/*
* For i386, NMIs use the same stack as the kernel, and we can
- * add a workaround to the iret problem in C. Simply have 3 states
- * the NMI can be in.
+ * add a workaround to the iret problem in C (preventing nested
+ * NMIs if an NMI takes a trap). Simply have 3 states the NMI
+ * can be in:
*
* 1) not running
* 2) executing
@@ -383,32 +384,50 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
* If an NMI hits a breakpoint that executes an iret, another
* NMI can preempt it. We do not want to allow this new NMI
* to run, but we want to execute it when the first one finishes.
- * We set the state to "latched", and the first NMI will perform
- * an cmpxchg on the state, and if it doesn't successfully
- * reset the state to "not running" it will restart the next
- * NMI.
+ * We set the state to "latched", and the exit of the first NMI will
+ * perform a dec_return, if the result is zero (NOT_RUNNING), then
+ * it will simply exit the NMI handler. If not, the dec_return
+ * would have set the state to NMI_EXECUTING (what we want it to
+ * be when we are running). In this case, we simply jump back
+ * to rerun the NMI handler again, and restart the 'latched' NMI.
+ *
+ * No trap (breakpoint or page fault) should be hit before nmi_restart,
+ * thus there is no race between the first check of state for NOT_RUNNING
+ * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
+ * at this point.
+ *
+ * In case the NMI takes a page fault, we need to save off the CR2
+ * because the NMI could have preempted another page fault and corrupt
+ * the CR2 that is about to be read. As nested NMIs must be restarted
+ * and they can not take breakpoints or page faults, the update of the
+ * CR2 must be done before converting the nmi state back to NOT_RUNNING.
+ * Otherwise, there would be a race of another nested NMI coming in
+ * after setting state to NOT_RUNNING but before updating the nmi_cr2.
*/
enum nmi_states {
- NMI_NOT_RUNNING,
+ NMI_NOT_RUNNING = 0,
NMI_EXECUTING,
NMI_LATCHED,
};
static DEFINE_PER_CPU(enum nmi_states, nmi_state);
+static DEFINE_PER_CPU(unsigned long, nmi_cr2);
#define nmi_nesting_preprocess(regs) \
do { \
- if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \
- __get_cpu_var(nmi_state) = NMI_LATCHED; \
+ if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \
+ this_cpu_write(nmi_state, NMI_LATCHED); \
return; \
} \
- nmi_restart: \
- __get_cpu_var(nmi_state) = NMI_EXECUTING; \
- } while (0)
+ this_cpu_write(nmi_state, NMI_EXECUTING); \
+ this_cpu_write(nmi_cr2, read_cr2()); \
+ } while (0); \
+ nmi_restart:
#define nmi_nesting_postprocess() \
do { \
- if (cmpxchg(&__get_cpu_var(nmi_state), \
- NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \
+ if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \
+ write_cr2(this_cpu_read(nmi_cr2)); \
+ if (this_cpu_dec_return(nmi_state)) \
goto nmi_restart; \
} while (0)
#else /* x86_64 */
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 149b8d9c6ad..6d9582ec032 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -42,7 +42,8 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
static void __init init_nmi_testsuite(void)
{
/* trap all the unknown NMIs we may generate */
- register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
+ register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk",
+ __initdata);
}
static void __init cleanup_nmi_testsuite(void)
@@ -64,8 +65,8 @@ static void __init test_nmi_ipi(struct cpumask *mask)
{
unsigned long timeout;
- if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback,
- NMI_FLAG_FIRST, "nmi_selftest")) {
+ if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
+ NMI_FLAG_FIRST, "nmi_selftest", __initdata)) {
nmi_fail = FAILURE;
return;
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 9ce885996fd..17fff18a103 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -352,9 +352,7 @@ struct pv_cpu_ops pv_cpu_ops = {
#endif
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
- .rdmsr_regs = native_rdmsr_safe_regs,
.write_msr = native_write_msr_safe,
- .wrmsr_regs = native_wrmsr_safe_regs,
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
.read_tscp = native_read_tscp,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index b72838bae64..299d49302e7 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -22,6 +22,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#define pr_fmt(fmt) "Calgary: " fmt
+
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -245,7 +247,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,
npages, 0, boundary_size, 0);
if (offset == ~0UL) {
- printk(KERN_WARNING "Calgary: IOMMU full.\n");
+ pr_warn("IOMMU full\n");
spin_unlock_irqrestore(&tbl->it_lock, flags);
if (panic_on_overflow)
panic("Calgary: fix the allocator.\n");
@@ -271,8 +273,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
entry = iommu_range_alloc(dev, tbl, npages);
if (unlikely(entry == DMA_ERROR_CODE)) {
- printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
- "iommu %p\n", npages, tbl);
+ pr_warn("failed to allocate %u pages in iommu %p\n",
+ npages, tbl);
return DMA_ERROR_CODE;
}
@@ -561,8 +563,7 @@ static void calgary_tce_cache_blast(struct iommu_table *tbl)
i++;
} while ((val & 0xff) != 0xff && i < 100);
if (i == 100)
- printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
- "continuing anyway\n");
+ pr_warn("PCI bus not quiesced, continuing anyway\n");
/* invalidate TCE cache */
target = calgary_reg(bbar, tar_offset(tbl->it_busno));
@@ -604,8 +605,7 @@ begin:
i++;
} while ((val64 & 0xff) != 0xff && i < 100);
if (i == 100)
- printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, "
- "continuing anyway\n");
+ pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n");
/* 3. poll Page Migration DEBUG for SoftStopFault */
target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
@@ -617,8 +617,7 @@ begin:
if (++count < 100)
goto begin;
else {
- printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, "
- "aborting TCE cache flush sequence!\n");
+ pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n");
return; /* pray for the best */
}
}
@@ -840,8 +839,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl)
plssr = be32_to_cpu(readl(target));
/* If no error, the agent ID in the CSR is not valid */
- printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, "
- "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr);
+ pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n",
+ tbl->it_busno, csr, plssr);
}
static void calioc2_dump_error_regs(struct iommu_table *tbl)
@@ -867,22 +866,21 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl)
target = calgary_reg(bbar, phboff | 0x800);
mck = be32_to_cpu(readl(target));
- printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n",
- tbl->it_busno);
+ pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno);
- printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
- csr, plssr, csmr, mck);
+ pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
+ csr, plssr, csmr, mck);
/* dump rest of error regs */
- printk(KERN_EMERG "Calgary: ");
+ pr_emerg("");
for (i = 0; i < ARRAY_SIZE(errregs); i++) {
/* err regs are at 0x810 - 0x870 */
erroff = (0x810 + (i * 0x10));
target = calgary_reg(bbar, phboff | erroff);
errregs[i] = be32_to_cpu(readl(target));
- printk("0x%08x@0x%lx ", errregs[i], erroff);
+ pr_cont("0x%08x@0x%lx ", errregs[i], erroff);
}
- printk("\n");
+ pr_cont("\n");
/* root complex status */
target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 62c9457ccd2..de2b7ad7027 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -45,15 +45,6 @@ int iommu_detected __read_mostly = 0;
*/
int iommu_pass_through __read_mostly;
-/*
- * Group multi-function PCI devices into a single device-group for the
- * iommu_device_group interface. This tells the iommu driver to pretend
- * it cannot distinguish between functions of a device, exposing only one
- * group for the device. Useful for disallowing use of individual PCI
- * functions from userspace drivers.
- */
-int iommu_group_mf __read_mostly;
-
extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
/* Dummy device used for NULL arguments (normally ISA). */
@@ -100,7 +91,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
struct dma_attrs *attrs)
{
unsigned long dma_mask;
- struct page *page = NULL;
+ struct page *page;
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
dma_addr_t addr;
@@ -108,6 +99,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
flag |= __GFP_ZERO;
again:
+ page = NULL;
if (!(flag & GFP_ATOMIC))
page = dma_alloc_from_contiguous(dev, count, get_order(size));
if (!page)
@@ -193,8 +185,6 @@ static __init int iommu_setup(char *p)
#endif
if (!strncmp(p, "pt", 2))
iommu_pass_through = 1;
- if (!strncmp(p, "group_mf", 8))
- iommu_group_mf = 1;
gart_parse_options(p);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 735279e54e5..ef6a8456f71 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -145,16 +147,14 @@ void show_regs_common(void)
/* Board Name is optional */
board = dmi_get_system_info(DMI_BOARD_NAME);
- printk(KERN_CONT "\n");
- printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version);
- printk(KERN_CONT " %s %s", vendor, product);
- if (board)
- printk(KERN_CONT "/%s", board);
- printk(KERN_CONT "\n");
+ printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n",
+ current->pid, current->comm, print_tainted(),
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version,
+ vendor, product,
+ board ? "/" : "",
+ board ? board : "");
}
void flush_thread(void)
@@ -645,7 +645,7 @@ static void amd_e400_idle(void)
amd_e400_c1e_detected = true;
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halt in AMD C1E");
- printk(KERN_INFO "System has AMD C1E enabled\n");
+ pr_info("System has AMD C1E enabled\n");
}
}
@@ -659,8 +659,7 @@ static void amd_e400_idle(void)
*/
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
&cpu);
- printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
- cpu);
+ pr_info("Switch to broadcast mode on CPU%d\n", cpu);
}
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
@@ -681,8 +680,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
if (pm_idle == poll_idle && smp_num_siblings > 1) {
- printk_once(KERN_WARNING "WARNING: polling idle and HT enabled,"
- " performance may degrade.\n");
+ pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
}
#endif
if (pm_idle)
@@ -692,11 +690,11 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
/*
* One CPU supports mwait => All CPUs supports mwait
*/
- printk(KERN_INFO "using mwait in idle threads.\n");
+ pr_info("using mwait in idle threads\n");
pm_idle = mwait_idle;
} else if (cpu_has_amd_erratum(amd_erratum_400)) {
/* E400: APIC timer interrupt does not wake up CPU from C1e */
- printk(KERN_INFO "using AMD E400 aware idle routine\n");
+ pr_info("using AMD E400 aware idle routine\n");
pm_idle = amd_e400_idle;
} else
pm_idle = default_idle;
@@ -715,7 +713,7 @@ static int __init idle_setup(char *str)
return -EINVAL;
if (!strcmp(str, "poll")) {
- printk("using polling idle threads.\n");
+ pr_info("using polling idle threads\n");
pm_idle = poll_idle;
boot_option_idle_override = IDLE_POLL;
} else if (!strcmp(str, "mwait")) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 61cdf7fdf09..0a980c9d7cb 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,10 +117,10 @@ void release_thread(struct task_struct *dead_task)
{
if (dead_task->mm) {
if (dead_task->mm->context.size) {
- printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
- dead_task->comm,
- dead_task->mm->context.ldt,
- dead_task->mm->context.size);
+ pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n",
+ dead_task->comm,
+ dead_task->mm->context.ldt,
+ dead_task->mm->context.size);
BUG();
}
}
@@ -466,7 +466,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
task->thread.gs = addr;
if (doit) {
load_gs_index(0);
- ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
+ ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
}
}
put_cpu();
@@ -494,7 +494,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
/* set the selector to 0 to not confuse
__switch_to */
loadsegment(fs, 0);
- ret = checking_wrmsrl(MSR_FS_BASE, addr);
+ ret = wrmsrl_safe(MSR_FS_BASE, addr);
}
}
put_cpu();
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 03920a15a63..1b27de56356 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
/* Set correct numa_node information for AMD NB functions */
-static void __init quirk_amd_nb_node(struct pci_dev *dev)
+static void __devinit quirk_amd_nb_node(struct pci_dev *dev)
{
struct pci_dev *nb_ht;
unsigned int devfn;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 25b48edb847..52190a938b4 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/init.h>
@@ -20,14 +22,12 @@
#include <asm/virtext.h>
#include <asm/cpu.h>
#include <asm/nmi.h>
+#include <asm/smp.h>
-#ifdef CONFIG_X86_32
-# include <linux/ctype.h>
-# include <linux/mc146818rtc.h>
-# include <asm/realmode.h>
-#else
-# include <asm/x86_init.h>
-#endif
+#include <linux/ctype.h>
+#include <linux/mc146818rtc.h>
+#include <asm/realmode.h>
+#include <asm/x86_init.h>
/*
* Power off function, if any
@@ -49,7 +49,7 @@ int reboot_force;
*/
static int reboot_default = 1;
-#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
static int reboot_cpu = -1;
#endif
@@ -67,8 +67,8 @@ bool port_cf9_safe = false;
* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
* warm Don't set the cold reboot flag
* cold Set the cold reboot flag
- * bios Reboot by jumping through the BIOS (only for X86_32)
- * smp Reboot by executing reset on BSP or other CPU (only for X86_32)
+ * bios Reboot by jumping through the BIOS
+ * smp Reboot by executing reset on BSP or other CPU
* triple Force a triple fault (init)
* kbd Use the keyboard controller. cold reset (default)
* acpi Use the RESET_REG in the FADT
@@ -95,7 +95,6 @@ static int __init reboot_setup(char *str)
reboot_mode = 0;
break;
-#ifdef CONFIG_X86_32
#ifdef CONFIG_SMP
case 's':
if (isdigit(*(str+1))) {
@@ -112,7 +111,6 @@ static int __init reboot_setup(char *str)
#endif /* CONFIG_SMP */
case 'b':
-#endif
case 'a':
case 'k':
case 't':
@@ -138,7 +136,6 @@ static int __init reboot_setup(char *str)
__setup("reboot=", reboot_setup);
-#ifdef CONFIG_X86_32
/*
* Reboot options and system auto-detection code provided by
* Dell Inc. so their systems "just work". :-)
@@ -152,16 +149,14 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
{
if (reboot_type != BOOT_BIOS) {
reboot_type = BOOT_BIOS;
- printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident);
+ pr_info("%s series board detected. Selecting %s-method for reboots.\n",
+ "BIOS", d->ident);
}
return 0;
}
-void machine_real_restart(unsigned int type)
+void __noreturn machine_real_restart(unsigned int type)
{
- void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int))
- real_mode_header->machine_real_restart_asm;
-
local_irq_disable();
/*
@@ -181,25 +176,28 @@ void machine_real_restart(unsigned int type)
/*
* Switch back to the initial page table.
*/
+#ifdef CONFIG_X86_32
load_cr3(initial_page_table);
-
- /*
- * Write 0x1234 to absolute memory location 0x472. The BIOS reads
- * this on booting to tell it to "Bypass memory test (also warm
- * boot)". This seems like a fairly standard thing that gets set by
- * REBOOT.COM programs, and the previous reset routine did this
- * too. */
- *((unsigned short *)0x472) = reboot_mode;
+#else
+ write_cr3(real_mode_header->trampoline_pgd);
+#endif
/* Jump to the identity-mapped low memory code */
- restart_lowmem(type);
+#ifdef CONFIG_X86_32
+ asm volatile("jmpl *%0" : :
+ "rm" (real_mode_header->machine_real_restart_asm),
+ "a" (type));
+#else
+ asm volatile("ljmpl *%0" : :
+ "m" (real_mode_header->machine_real_restart_asm),
+ "D" (type));
+#endif
+ unreachable();
}
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(machine_real_restart);
#endif
-#endif /* CONFIG_X86_32 */
-
/*
* Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
*/
@@ -207,8 +205,8 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
{
if (reboot_type != BOOT_CF9) {
reboot_type = BOOT_CF9;
- printk(KERN_INFO "%s series board detected. "
- "Selecting PCI-method for reboots.\n", d->ident);
+ pr_info("%s series board detected. Selecting %s-method for reboots.\n",
+ "PCI", d->ident);
}
return 0;
}
@@ -217,17 +215,16 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
{
if (reboot_type != BOOT_KBD) {
reboot_type = BOOT_KBD;
- printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident);
+ pr_info("%s series board detected. Selecting %s-method for reboot.\n",
+ "KBD", d->ident);
}
return 0;
}
/*
- * This is a single dmi_table handling all reboot quirks. Note that
- * REBOOT_BIOS is only available for 32bit
+ * This is a single dmi_table handling all reboot quirks.
*/
static struct dmi_system_id __initdata reboot_dmi_table[] = {
-#ifdef CONFIG_X86_32
{ /* Handle problems with rebooting on Dell E520's */
.callback = set_bios_reboot,
.ident = "Dell E520",
@@ -377,7 +374,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
},
},
-#endif /* CONFIG_X86_32 */
{ /* Handle reboot issue on Acer Aspire one */
.callback = set_kbd_reboot,
@@ -451,6 +447,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
},
},
+ { /* Handle problems with rebooting on the Precision M6600. */
+ .callback = set_pci_reboot,
+ .ident = "Dell OptiPlex 990",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
+ },
+ },
{ }
};
@@ -576,13 +580,11 @@ static void native_machine_emergency_restart(void)
reboot_type = BOOT_KBD;
break;
-#ifdef CONFIG_X86_32
case BOOT_BIOS:
machine_real_restart(MRR_BIOS);
reboot_type = BOOT_KBD;
break;
-#endif
case BOOT_ACPI:
acpi_reboot();
@@ -624,12 +626,10 @@ void native_machine_shutdown(void)
/* The boot cpu is always logical cpu 0 */
int reboot_cpu_id = 0;
-#ifdef CONFIG_X86_32
/* See if there has been given a command line override */
if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
cpu_online(reboot_cpu))
reboot_cpu_id = reboot_cpu;
-#endif
/* Make certain the cpu I'm about to reboot on is online */
if (!cpu_online(reboot_cpu_id))
@@ -670,7 +670,7 @@ static void __machine_emergency_restart(int emergency)
static void native_machine_restart(char *__unused)
{
- printk("machine restart\n");
+ pr_notice("machine restart\n");
if (!reboot_force)
machine_shutdown();
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 16be6dc14db..f4b9b80e1b9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1031,8 +1031,6 @@ void __init setup_arch(char **cmdline_p)
x86_init.timers.wallclock_init();
- x86_platform.wallclock_init();
-
mcheck_init();
arch_init_ideal_nops();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5a98aa27218..5cdff035774 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -21,7 +21,7 @@
#include <asm/cpu.h>
#include <asm/stackprotector.h>
-DEFINE_PER_CPU(int, cpu_number);
+DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 21af737053a..b280908a376 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,6 +6,9 @@
* 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
* 2000-2002 x86-64 support by Andi Kleen
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/smp.h>
@@ -814,7 +817,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
me->comm, me->pid, where, frame,
regs->ip, regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
- printk(KERN_CONT "\n");
+ pr_cont("\n");
}
force_sig(SIGSEGV, me);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3fab55bea29..7c5a8c314c0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1,4 +1,4 @@
-/*
+ /*
* x86 SMP booting functions
*
* (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
@@ -39,6 +39,8 @@
* Glauber Costa : i386 and x86_64 integration
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/module.h>
@@ -104,17 +106,17 @@ int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
+DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
/* representing HT siblings of each logical CPU */
-DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
/* representing HT and core siblings of each logical CPU */
-DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
-DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
/* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
@@ -184,7 +186,7 @@ static void __cpuinit smp_callin(void)
* boards)
*/
- pr_debug("CALLIN, before setup_local_APIC().\n");
+ pr_debug("CALLIN, before setup_local_APIC()\n");
if (apic->smp_callin_clear_local_apic)
apic->smp_callin_clear_local_apic();
setup_local_APIC();
@@ -255,22 +257,13 @@ notrace static void __cpuinit start_secondary(void *unused)
check_tsc_sync_target();
/*
- * We need to hold call_lock, so there is no inconsistency
- * between the time smp_call_function() determines number of
- * IPI recipients, and the time when the determination is made
- * for which cpus receive the IPI. Holding this
- * lock helps us to not include this cpu in a currently in progress
- * smp_call_function().
- *
* We need to hold vector_lock so there the set of online cpus
* does not change while we are assigning vectors to cpus. Holding
* this lock ensures we don't half assign or remove an irq from a cpu.
*/
- ipi_call_lock();
lock_vector_lock();
set_cpu_online(smp_processor_id(), true);
unlock_vector_lock();
- ipi_call_unlock();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
x86_platform.nmi_init();
@@ -349,9 +342,12 @@ static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
- if (c->phys_proc_id == o->phys_proc_id)
- return topology_sane(c, o, "mc");
+ if (c->phys_proc_id == o->phys_proc_id) {
+ if (cpu_has(c, X86_FEATURE_AMD_DCM))
+ return true;
+ return topology_sane(c, o, "mc");
+ }
return false;
}
@@ -429,17 +425,16 @@ static void impress_friends(void)
/*
* Allow the user to impress friends.
*/
- pr_debug("Before bogomips.\n");
+ pr_debug("Before bogomips\n");
for_each_possible_cpu(cpu)
if (cpumask_test_cpu(cpu, cpu_callout_mask))
bogosum += cpu_data(cpu).loops_per_jiffy;
- printk(KERN_INFO
- "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
num_online_cpus(),
bogosum/(500000/HZ),
(bogosum/(5000/HZ))%100);
- pr_debug("Before bogocount - setting activated=1.\n");
+ pr_debug("Before bogocount - setting activated=1\n");
}
void __inquire_remote_apic(int apicid)
@@ -449,18 +444,17 @@ void __inquire_remote_apic(int apicid)
int timeout;
u32 status;
- printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
+ pr_info("Inquiring remote APIC 0x%x...\n", apicid);
for (i = 0; i < ARRAY_SIZE(regs); i++) {
- printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
+ pr_info("... APIC 0x%x %s: ", apicid, names[i]);
/*
* Wait for idle.
*/
status = safe_apic_wait_icr_idle();
if (status)
- printk(KERN_CONT
- "a previous APIC delivery may have failed\n");
+ pr_cont("a previous APIC delivery may have failed\n");
apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
@@ -473,10 +467,10 @@ void __inquire_remote_apic(int apicid)
switch (status) {
case APIC_ICR_RR_VALID:
status = apic_read(APIC_RRR);
- printk(KERN_CONT "%08x\n", status);
+ pr_cont("%08x\n", status);
break;
default:
- printk(KERN_CONT "failed\n");
+ pr_cont("failed\n");
}
}
}
@@ -510,12 +504,12 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
apic_write(APIC_ESR, 0);
accept_status = (apic_read(APIC_ESR) & 0xEF);
}
- pr_debug("NMI sent.\n");
+ pr_debug("NMI sent\n");
if (send_status)
- printk(KERN_ERR "APIC never delivered???\n");
+ pr_err("APIC never delivered???\n");
if (accept_status)
- printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
+ pr_err("APIC delivery error (%lx)\n", accept_status);
return (send_status | accept_status);
}
@@ -537,7 +531,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
apic_read(APIC_ESR);
}
- pr_debug("Asserting INIT.\n");
+ pr_debug("Asserting INIT\n");
/*
* Turn INIT on target chip
@@ -553,7 +547,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
mdelay(10);
- pr_debug("Deasserting INIT.\n");
+ pr_debug("Deasserting INIT\n");
/* Target chip */
/* Send IPI */
@@ -586,14 +580,14 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Run STARTUP IPI loop.
*/
- pr_debug("#startup loops: %d.\n", num_starts);
+ pr_debug("#startup loops: %d\n", num_starts);
for (j = 1; j <= num_starts; j++) {
- pr_debug("Sending STARTUP #%d.\n", j);
+ pr_debug("Sending STARTUP #%d\n", j);
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
- pr_debug("After apic_write.\n");
+ pr_debug("After apic_write\n");
/*
* STARTUP IPI
@@ -610,7 +604,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
*/
udelay(300);
- pr_debug("Startup point 1.\n");
+ pr_debug("Startup point 1\n");
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -625,12 +619,12 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
if (send_status || accept_status)
break;
}
- pr_debug("After Startup.\n");
+ pr_debug("After Startup\n");
if (send_status)
- printk(KERN_ERR "APIC never delivered???\n");
+ pr_err("APIC never delivered???\n");
if (accept_status)
- printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
+ pr_err("APIC delivery error (%lx)\n", accept_status);
return (send_status | accept_status);
}
@@ -644,11 +638,11 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
if (system_state == SYSTEM_BOOTING) {
if (node != current_node) {
if (current_node > (-1))
- pr_cont(" Ok.\n");
+ pr_cont(" OK\n");
current_node = node;
pr_info("Booting Node %3d, Processors ", node);
}
- pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : "");
+ pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");
return;
} else
pr_info("Booting Node %d Processor %d APIC 0x%x\n",
@@ -728,9 +722,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
/*
* allow APs to start initializing.
*/
- pr_debug("Before Callout %d.\n", cpu);
+ pr_debug("Before Callout %d\n", cpu);
cpumask_set_cpu(cpu, cpu_callout_mask);
- pr_debug("After Callout %d.\n", cpu);
+ pr_debug("After Callout %d\n", cpu);
/*
* Wait 5s total for a response
@@ -758,7 +752,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
pr_err("CPU%d: Stuck ??\n", cpu);
else
/* trampoline code not run */
- pr_err("CPU%d: Not responding.\n", cpu);
+ pr_err("CPU%d: Not responding\n", cpu);
if (apic->inquire_remote_apic)
apic->inquire_remote_apic(apicid);
}
@@ -803,7 +797,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
!physid_isset(apicid, phys_cpu_present_map) ||
!apic->apic_id_valid(apicid)) {
- printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
+ pr_err("%s: bad cpu %d\n", __func__, cpu);
return -EINVAL;
}
@@ -884,9 +878,8 @@ static int __init smp_sanity_check(unsigned max_cpus)
unsigned int cpu;
unsigned nr;
- printk(KERN_WARNING
- "More than 8 CPUs detected - skipping them.\n"
- "Use CONFIG_X86_BIGSMP.\n");
+ pr_warn("More than 8 CPUs detected - skipping them\n"
+ "Use CONFIG_X86_BIGSMP\n");
nr = 0;
for_each_present_cpu(cpu) {
@@ -907,8 +900,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
#endif
if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
- printk(KERN_WARNING
- "weird, boot CPU (#%d) not listed by the BIOS.\n",
+ pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",
hard_smp_processor_id());
physid_set(hard_smp_processor_id(), phys_cpu_present_map);
@@ -920,11 +912,10 @@ static int __init smp_sanity_check(unsigned max_cpus)
*/
if (!smp_found_config && !acpi_lapic) {
preempt_enable();
- printk(KERN_NOTICE "SMP motherboard not detected.\n");
+ pr_notice("SMP motherboard not detected\n");
disable_smp();
if (APIC_init_uniprocessor())
- printk(KERN_NOTICE "Local APIC not detected."
- " Using dummy APIC emulation.\n");
+ pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
return -1;
}
@@ -933,9 +924,8 @@ static int __init smp_sanity_check(unsigned max_cpus)
* CPU too, but we do it for the sake of robustness anyway.
*/
if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
- printk(KERN_NOTICE
- "weird, boot CPU (#%d) not listed by the BIOS.\n",
- boot_cpu_physical_apicid);
+ pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n",
+ boot_cpu_physical_apicid);
physid_set(hard_smp_processor_id(), phys_cpu_present_map);
}
preempt_enable();
@@ -948,8 +938,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
if (!disable_apic) {
pr_err("BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_physical_apicid);
- pr_err("... forcing use of dummy APIC emulation."
- "(tell your hw vendor)\n");
+ pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
}
smpboot_clear_io_apic();
disable_ioapic_support();
@@ -962,7 +951,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
* If SMP should be disabled, then really disable it!
*/
if (!max_cpus) {
- printk(KERN_INFO "SMP mode deactivated.\n");
+ pr_info("SMP mode deactivated\n");
smpboot_clear_io_apic();
connect_bsp_APIC();
@@ -1014,7 +1003,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
if (smp_sanity_check(max_cpus) < 0) {
- printk(KERN_INFO "SMP disabled\n");
+ pr_info("SMP disabled\n");
disable_smp();
goto out;
}
@@ -1052,7 +1041,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
* Set up local APIC timer on boot CPU.
*/
- printk(KERN_INFO "CPU%d: ", 0);
+ pr_info("CPU%d: ", 0);
print_cpu_info(&cpu_data(0));
x86_init.timers.setup_percpu_clockev();
@@ -1102,7 +1091,7 @@ void __init native_smp_prepare_boot_cpu(void)
void __init native_smp_cpus_done(unsigned int max_cpus)
{
- pr_debug("Boot done.\n");
+ pr_debug("Boot done\n");
nmi_selftest();
impress_friends();
@@ -1163,8 +1152,7 @@ __init void prefill_possible_map(void)
/* nr_cpu_ids could be reduced via nr_cpus= */
if (possible > nr_cpu_ids) {
- printk(KERN_WARNING
- "%d Processors exceeds NR_CPUS limit of %d\n",
+ pr_warn("%d Processors exceeds NR_CPUS limit of %d\n",
possible, nr_cpu_ids);
possible = nr_cpu_ids;
}
@@ -1173,13 +1161,12 @@ __init void prefill_possible_map(void)
if (!setup_max_cpus)
#endif
if (possible > i) {
- printk(KERN_WARNING
- "%d Processors exceeds max_cpus limit of %u\n",
+ pr_warn("%d Processors exceeds max_cpus limit of %u\n",
possible, setup_max_cpus);
possible = i;
}
- printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
+ pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
possible, max_t(int, possible - num_processors, 0));
for (i = 0; i < possible; i++)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 05b31d92f69..b481341c936 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -9,6 +9,9 @@
/*
* Handle hardware traps and faults.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
#include <linux/spinlock.h>
@@ -143,12 +146,11 @@ trap_signal:
#ifdef CONFIG_X86_64
if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
printk_ratelimit()) {
- printk(KERN_INFO
- "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
- tsk->comm, tsk->pid, str,
- regs->ip, regs->sp, error_code);
+ pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
+ tsk->comm, tsk->pid, str,
+ regs->ip, regs->sp, error_code);
print_vma_addr(" in ", regs->ip);
- printk("\n");
+ pr_cont("\n");
}
#endif
@@ -269,12 +271,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
printk_ratelimit()) {
- printk(KERN_INFO
- "%s[%d] general protection ip:%lx sp:%lx error:%lx",
+ pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx",
tsk->comm, task_pid_nr(tsk),
regs->ip, regs->sp, error_code);
print_vma_addr(" in ", regs->ip);
- printk("\n");
+ pr_cont("\n");
}
force_sig(SIGSEGV, tsk);
@@ -570,7 +571,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
conditional_sti(regs);
#if 0
/* No need to warn about this any longer. */
- printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
+ pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
#endif
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index fc0a147e372..cfa5d4f7ca5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1,3 +1,5 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/init.h>
@@ -84,8 +86,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
#ifdef CONFIG_X86_TSC
int __init notsc_setup(char *str)
{
- printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC completely.\n");
+ pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
tsc_disabled = 1;
return 1;
}
@@ -373,7 +374,7 @@ static unsigned long quick_pit_calibrate(void)
goto success;
}
}
- printk("Fast TSC calibration failed\n");
+ pr_err("Fast TSC calibration failed\n");
return 0;
success:
@@ -392,7 +393,7 @@ success:
*/
delta *= PIT_TICK_RATE;
do_div(delta, i*256*1000);
- printk("Fast TSC calibration using PIT\n");
+ pr_info("Fast TSC calibration using PIT\n");
return delta;
}
@@ -487,9 +488,8 @@ unsigned long native_calibrate_tsc(void)
* use the reference value, as it is more precise.
*/
if (delta >= 90 && delta <= 110) {
- printk(KERN_INFO
- "TSC: PIT calibration matches %s. %d loops\n",
- hpet ? "HPET" : "PMTIMER", i + 1);
+ pr_info("PIT calibration