aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/bitops.h3
-rw-r--r--arch/x86/boot/compressed/Makefile8
-rw-r--r--arch/x86/boot/compressed/eboot.c557
-rw-r--r--arch/x86/boot/compressed/eboot.h12
-rw-r--r--arch/x86/boot/compressed/kaslr.c98
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c73
-rw-r--r--arch/x86/boot/string.c5
-rw-r--r--arch/x86/crypto/aegis128-aesni-asm.S3
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c12
-rw-r--r--arch/x86/crypto/aegis128l-aesni-asm.S3
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c12
-rw-r--r--arch/x86/crypto/aegis256-aesni-asm.S3
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c12
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S8
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S4
-rw-r--r--arch/x86/crypto/morus1280-avx2-asm.S3
-rw-r--r--arch/x86/crypto/morus1280-avx2-glue.c10
-rw-r--r--arch/x86/crypto/morus1280-sse2-asm.S3
-rw-r--r--arch/x86/crypto/morus1280-sse2-glue.c10
-rw-r--r--arch/x86/crypto/morus640-sse2-asm.S3
-rw-r--r--arch/x86/crypto/morus640-sse2-glue.c10
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S2
-rw-r--r--arch/x86/entry/entry_64.S23
-rw-r--r--arch/x86/entry/vdso/Makefile4
-rw-r--r--arch/x86/events/amd/ibs.c6
-rw-r--r--arch/x86/events/intel/core.c34
-rw-r--r--arch/x86/events/intel/ds.c82
-rw-r--r--arch/x86/events/intel/lbr.c56
-rw-r--r--arch/x86/events/intel/uncore.h2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c10
-rw-r--r--arch/x86/events/perf_event.h6
-rw-r--r--arch/x86/hyperv/hv_apic.c5
-rw-r--r--arch/x86/hyperv/hv_init.c5
-rw-r--r--arch/x86/include/asm/apm.h6
-rw-r--r--arch/x86/include/asm/asm.h59
-rw-r--r--arch/x86/include/asm/atomic.h32
-rw-r--r--arch/x86/include/asm/atomic64_32.h61
-rw-r--r--arch/x86/include/asm/atomic64_64.h50
-rw-r--r--arch/x86/include/asm/cmpxchg.h2
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h4
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h7
-rw-r--r--arch/x86/include/asm/intel_ds.h3
-rw-r--r--arch/x86/include/asm/irqflags.h2
-rw-r--r--arch/x86/include/asm/kprobes.h5
-rw-r--r--arch/x86/include/asm/mshyperv.h5
-rw-r--r--arch/x86/include/asm/orc_types.h2
-rw-r--r--arch/x86/include/asm/percpu.h7
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h2
-rw-r--r--arch/x86/include/asm/refcount.h1
-rw-r--r--arch/x86/include/asm/uaccess_64.h7
-rw-r--r--arch/x86/include/asm/unwind_hints.h16
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c5
-rw-r--r--arch/x86/kernel/apic/vector.c19
-rw-r--r--arch/x86/kernel/apm_32.c5
-rw-r--r--arch/x86/kernel/cpu/amd.c4
-rw-r--r--arch/x86/kernel/cpu/bugs.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c205
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c3
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c131
-rw-r--r--arch/x86/kernel/irqflags.S26
-rw-r--r--arch/x86/kernel/kprobes/common.h10
-rw-r--r--arch/x86/kernel/kprobes/core.c124
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c49
-rw-r--r--arch/x86/kernel/kprobes/opt.c1
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/kvmclock.c12
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/pci-iommu_table.c2
-rw-r--r--arch/x86/kernel/pcspeaker.c2
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kernel/stacktrace.c42
-rw-r--r--arch/x86/kernel/unwind_orc.c52
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/vmx.c83
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/lib/memcpy_64.S2
-rw-r--r--arch/x86/net/bpf_jit_comp32.c8
-rw-r--r--arch/x86/platform/efi/efi_64.c103
-rw-r--r--arch/x86/platform/efi/quirks.c14
-rw-r--r--arch/x86/power/hibernate_asm_64.S2
-rw-r--r--arch/x86/purgatory/Makefile2
-rw-r--r--arch/x86/um/mem_32.c2
-rw-r--r--arch/x86/um/vdso/.gitignore1
-rw-r--r--arch/x86/um/vdso/Makefile16
-rw-r--r--arch/x86/xen/enlighten_pv.c25
-rw-r--r--arch/x86/xen/irq.c4
92 files changed, 1162 insertions, 1191 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f1dbb4ee19d7..6d4774f203d0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -63,7 +63,7 @@ config X86
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_REFCOUNT
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
- select ARCH_HAS_UACCESS_MCSAFE if X86_64
+ select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
@@ -180,7 +180,7 @@ config X86
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
select HAVE_STACKPROTECTOR if CC_HAS_SANE_STACKPROTECTOR
select HAVE_STACK_VALIDATION if X86_64
select HAVE_RSEQ
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a08e82856563..7e3c07d6ad42 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -80,11 +80,6 @@ ifeq ($(CONFIG_X86_32),y)
# alignment instructions.
KBUILD_CFLAGS += $(call cc-option,$(cc_stack_align4))
- # Disable unit-at-a-time mode on pre-gcc-4.0 compilers, it makes gcc use
- # a lot more stack due to the lack of sharing of stacklots:
- KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0400, \
- $(call cc-option,-fno-unit-at-a-time))
-
# CPU-specific tuning. Anything which can be shared with UML should go here.
include arch/x86/Makefile_32.cpu
KBUILD_CFLAGS += $(cflags-y)
diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h
index 0d41d68131cc..2e1382486e91 100644
--- a/arch/x86/boot/bitops.h
+++ b/arch/x86/boot/bitops.h
@@ -17,6 +17,7 @@
#define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */
#include <linux/types.h>
+#include <asm/asm.h>
static inline bool constant_test_bit(int nr, const void *addr)
{
@@ -28,7 +29,7 @@ static inline bool variable_test_bit(int nr, const void *addr)
bool v;
const u32 *p = (const u32 *)addr;
- asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr));
+ asm("btl %2,%1" CC_SET(c) : CC_OUT(c) (v) : "m" (*p), "Ir" (nr));
return v;
}
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fa42f895fdde..169c2feda14a 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -106,9 +106,13 @@ define cmd_check_data_rel
done
endef
+# We need to run two commands under "if_changed", so merge them into a
+# single invocation.
+quiet_cmd_check-and-link-vmlinux = LD $@
+ cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld)
+
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
- $(call if_changed,check_data_rel)
- $(call if_changed,ld)
+ $(call if_changed,check-and-link-vmlinux)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index e57665b4ba1c..1458b1700fc7 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -34,74 +34,13 @@ static void setup_boot_services##bits(struct efi_config *c) \
\
table = (typeof(table))sys_table; \
\
- c->runtime_services = table->runtime; \
- c->boot_services = table->boottime; \
- c->text_output = table->con_out; \
+ c->runtime_services = table->runtime; \
+ c->boot_services = table->boottime; \
+ c->text_output = table->con_out; \
}
BOOT_SERVICES(32);
BOOT_SERVICES(64);
-static inline efi_status_t __open_volume32(void *__image, void **__fh)
-{
- efi_file_io_interface_t *io;
- efi_loaded_image_32_t *image = __image;
- efi_file_handle_32_t *fh;
- efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
- efi_status_t status;
- void *handle = (void *)(unsigned long)image->device_handle;
- unsigned long func;
-
- status = efi_call_early(handle_protocol, handle,
- &fs_proto, (void **)&io);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to handle fs_proto\n");
- return status;
- }
-
- func = (unsigned long)io->open_volume;
- status = efi_early->call(func, io, &fh);
- if (status != EFI_SUCCESS)
- efi_printk(sys_table, "Failed to open volume\n");
-
- *__fh = fh;
- return status;
-}
-
-static inline efi_status_t __open_volume64(void *__image, void **__fh)
-{
- efi_file_io_interface_t *io;
- efi_loaded_image_64_t *image = __image;
- efi_file_handle_64_t *fh;
- efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
- efi_status_t status;
- void *handle = (void *)(unsigned long)image->device_handle;
- unsigned long func;
-
- status = efi_call_early(handle_protocol, handle,
- &fs_proto, (void **)&io);
- if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to handle fs_proto\n");
- return status;
- }
-
- func = (unsigned long)io->open_volume;
- status = efi_early->call(func, io, &fh);
- if (status != EFI_SUCCESS)
- efi_printk(sys_table, "Failed to open volume\n");
-
- *__fh = fh;
- return status;
-}
-
-efi_status_t
-efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh)
-{
- if (efi_early->is64)
- return __open_volume64(__image, __fh);
-
- return __open_volume32(__image, __fh);
-}
-
void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
{
efi_call_proto(efi_simple_text_output_protocol, output_string,
@@ -109,23 +48,17 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
}
static efi_status_t
-__setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
+preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
{
struct pci_setup_rom *rom = NULL;
efi_status_t status;
unsigned long size;
- uint64_t attributes, romsize;
+ uint64_t romsize;
void *romimage;
- status = efi_call_proto(efi_pci_io_protocol, attributes, pci,
- EfiPciIoAttributeOperationGet, 0ULL,
- &attributes);
- if (status != EFI_SUCCESS)
- return status;
-
/*
- * Some firmware images contain EFI function pointers at the place where the
- * romimage and romsize fields are supposed to be. Typically the EFI
+ * Some firmware images contain EFI function pointers at the place where
+ * the romimage and romsize fields are supposed to be. Typically the EFI
* code is mapped at high addresses, translating to an unrealistically
* large romsize. The UEFI spec limits the size of option ROMs to 16
* MiB so we reject any ROMs over 16 MiB in size to catch this.
@@ -140,16 +73,16 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc mem for rom\n");
+ efi_printk(sys_table, "Failed to allocate memory for 'rom'\n");
return status;
}
memset(rom, 0, sizeof(*rom));
- rom->data.type = SETUP_PCI;
- rom->data.len = size - sizeof(struct setup_data);
- rom->data.next = 0;
- rom->pcilen = pci->romsize;
+ rom->data.type = SETUP_PCI;
+ rom->data.len = size - sizeof(struct setup_data);
+ rom->data.next = 0;
+ rom->pcilen = pci->romsize;
*__rom = rom;
status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
@@ -185,96 +118,6 @@ free_struct:
return status;
}
-static void
-setup_efi_pci32(struct boot_params *params, void **pci_handle,
- unsigned long size)
-{
- efi_pci_io_protocol_t *pci = NULL;
- efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
- u32 *handles = (u32 *)(unsigned long)pci_handle;
- efi_status_t status;
- unsigned long nr_pci;
- struct setup_data *data;
- int i;
-
- data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
- while (data && data->next)
- data = (struct setup_data *)(unsigned long)data->next;
-
- nr_pci = size / sizeof(u32);
- for (i = 0; i < nr_pci; i++) {
- struct pci_setup_rom *rom = NULL;
- u32 h = handles[i];
-
- status = efi_call_early(handle_protocol, h,
- &pci_proto, (void **)&pci);
-
- if (status != EFI_SUCCESS)
- continue;
-
- if (!pci)
- continue;
-
- status = __setup_efi_pci(pci, &rom);
- if (status != EFI_SUCCESS)
- continue;
-
- if (data)
- data->next = (unsigned long)rom;
- else
- params->hdr.setup_data = (unsigned long)rom;
-
- data = (struct setup_data *)rom;
-
- }
-}
-
-static void
-setup_efi_pci64(struct boot_params *params, void **pci_handle,
- unsigned long size)
-{
- efi_pci_io_protocol_t *pci = NULL;
- efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
- u64 *handles = (u64 *)(unsigned long)pci_handle;
- efi_status_t status;
- unsigned long nr_pci;
- struct setup_data *data;
- int i;
-
- data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
- while (data && data->next)
- data = (struct setup_data *)(unsigned long)data->next;
-
- nr_pci = size / sizeof(u64);
- for (i = 0; i < nr_pci; i++) {
- struct pci_setup_rom *rom = NULL;
- u64 h = handles[i];
-
- status = efi_call_early(handle_protocol, h,
- &pci_proto, (void **)&pci);
-
- if (status != EFI_SUCCESS)
- continue;
-
- if (!pci)
- continue;
-
- status = __setup_efi_pci(pci, &rom);
- if (status != EFI_SUCCESS)
- continue;
-
- if (data)
- data->next = (unsigned long)rom;
- else
- params->hdr.setup_data = (unsigned long)rom;
-
- data = (struct setup_data *)rom;
-
- }
-}
-
/*
* There's no way to return an informative status from this function,
* because any analysis (and printing of error messages) needs to be
@@ -290,6 +133,9 @@ static void setup_efi_pci(struct boot_params *params)
void **pci_handle = NULL;
efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
unsigned long size = 0;
+ unsigned long nr_pci;
+ struct setup_data *data;
+ int i;
status = efi_call_early(locate_handle,
EFI_LOCATE_BY_PROTOCOL,
@@ -301,7 +147,7 @@ static void setup_efi_pci(struct boot_params *params)
size, (void **)&pci_handle);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc mem for pci_handle\n");
+ efi_printk(sys_table, "Failed to allocate memory for 'pci_handle'\n");
return;
}
@@ -313,10 +159,34 @@ static void setup_efi_pci(struct boot_params *params)
if (status != EFI_SUCCESS)
goto free_handle;
- if (efi_early->is64)
- setup_efi_pci64(params, pci_handle, size);
- else
- setup_efi_pci32(params, pci_handle, size);
+ data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+
+ while (data && data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
+
+ nr_pci = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
+ for (i = 0; i < nr_pci; i++) {
+ efi_pci_io_protocol_t *pci = NULL;
+ struct pci_setup_rom *rom;
+
+ status = efi_call_early(handle_protocol,
+ efi_is_64bit() ? ((u64 *)pci_handle)[i]
+ : ((u32 *)pci_handle)[i],
+ &pci_proto, (void **)&pci);
+ if (status != EFI_SUCCESS || !pci)
+ continue;
+
+ status = preserve_pci_rom_image(pci, &rom);
+ if (status != EFI_SUCCESS)
+ continue;
+
+ if (data)
+ data->next = (unsigned long)rom;
+ else
+ params->hdr.setup_data = (unsigned long)rom;
+
+ data = (struct setup_data *)rom;
+ }
free_handle:
efi_call_early(free_pool, pci_handle);
@@ -347,8 +217,7 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
size + sizeof(struct setup_data), &new);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table,
- "Failed to alloc mem for properties\n");
+ efi_printk(sys_table, "Failed to allocate memory for 'properties'\n");
return;
}
@@ -364,9 +233,9 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
new->next = 0;
data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
- if (!data)
+ if (!data) {
boot_params->hdr.setup_data = (unsigned long)new;
- else {
+ } else {
while (data->next)
data = (struct setup_data *)(unsigned long)data->next;
data->next = (unsigned long)new;
@@ -386,81 +255,55 @@ static void setup_quirks(struct boot_params *boot_params)
}
}
+/*
+ * See if we have Universal Graphics Adapter (UGA) protocol
+ */
static efi_status_t
-setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
+setup_uga(struct screen_info *si, efi_guid_t *uga_proto, unsigned long size)
{
- struct efi_uga_draw_protocol *uga = NULL, *first_uga;
- efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
+ efi_status_t status;
+ u32 width, height;
+ void **uga_handle = NULL;
+ efi_uga_draw_protocol_t *uga = NULL, *first_uga;
unsigned long nr_ugas;
- u32 *handles = (u32 *)uga_handle;
- efi_status_t status = EFI_INVALID_PARAMETER;
int i;
- first_uga = NULL;
- nr_ugas = size / sizeof(u32);
- for (i = 0; i < nr_ugas; i++) {
- efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
- u32 w, h, depth, refresh;
- void *pciio;
- u32 handle = handles[i];
-
- status = efi_call_early(handle_protocol, handle,
- &uga_proto, (void **)&uga);
- if (status != EFI_SUCCESS)
- continue;
-
- efi_call_early(handle_protocol, handle, &pciio_proto, &pciio);
-
- status = efi_early->call((unsigned long)uga->get_mode, uga,
- &w, &h, &depth, &refresh);
- if (status == EFI_SUCCESS && (!first_uga || pciio)) {
- *width = w;
- *height = h;
-
- /*
- * Once we've found a UGA supporting PCIIO,
- * don't bother looking any further.
- */
- if (pciio)
- break;
-
- first_uga = uga;
- }
- }
+ status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+ size, (void **)&uga_handle);
+ if (status != EFI_SUCCESS)
+ return status;
- return status;
-}
+ status = efi_call_early(locate_handle,
+ EFI_LOCATE_BY_PROTOCOL,
+ uga_proto, NULL, &size, uga_handle);
+ if (status != EFI_SUCCESS)
+ goto free_handle;
-static efi_status_t
-setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
-{
- struct efi_uga_draw_protocol *uga = NULL, *first_uga;
- efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
- unsigned long nr_ugas;
- u64 *handles = (u64 *)uga_handle;
- efi_status_t status = EFI_INVALID_PARAMETER;
- int i;
+ height = 0;
+ width = 0;
first_uga = NULL;
- nr_ugas = size / sizeof(u64);
+ nr_ugas = size / (efi_is_64bit() ? sizeof(u64) : sizeof(u32));
for (i = 0; i < nr_ugas; i++) {
efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
u32 w, h, depth, refresh;
void *pciio;
- u64 handle = handles[i];
+ unsigned long handle = efi_is_64bit() ? ((u64 *)uga_handle)[i]
+ : ((u32 *)uga_handle)[i];
status = efi_call_early(handle_protocol, handle,
- &uga_proto, (void **)&uga);
+ uga_proto, (void **)&uga);
if (status != EFI_SUCCESS)
continue;
+ pciio = NULL;
efi_call_early(handle_protocol, handle, &pciio_proto, &pciio);
- status = efi_early->call((unsigned long)uga->get_mode, uga,
- &w, &h, &depth, &refresh);
+ status = efi_call_proto(efi_uga_draw_protocol, get_mode, uga,
+ &w, &h, &depth, &refresh);
if (status == EFI_SUCCESS && (!first_uga || pciio)) {
- *width = w;
- *height = h;
+ width = w;
+ height = h;
/*
* Once we've found a UGA supporting PCIIO,
@@ -473,59 +316,28 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
}
}
- return status;
-}
-
-/*
- * See if we have Universal Graphics Adapter (UGA) protocol
- */
-static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto,
- unsigned long size)
-{
- efi_status_t status;
- u32 width, height;
- void **uga_handle = NULL;
-
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- size, (void **)&uga_handle);
- if (status != EFI_SUCCESS)
- return status;
-
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- uga_proto, NULL, &size, uga_handle);
- if (status != EFI_SUCCESS)
- goto free_handle;
-
- height = 0;
- width = 0;
-
- if (efi_early->is64)
- status = setup_uga64(uga_handle, size, &width, &height);
- else
- status = setup_uga32(uga_handle, size, &width, &height);
-
if (!width && !height)
goto free_handle;
/* EFI framebuffer */
- si->orig_video_isVGA = VIDEO_TYPE_EFI;
+ si->orig_video_isVGA = VIDEO_TYPE_EFI;
- si->lfb_depth = 32;
- si->lfb_width = width;
- si->lfb_height = height;
+ si->lfb_depth = 32;
+ si->lfb_width = width;
+ si->lfb_height = height;
- si->red_size = 8;
- si->red_pos = 16;
- si->green_size = 8;
- si->green_pos = 8;
- si->blue_size = 8;
- si->blue_pos = 0;
- si->rsvd_size = 8;
- si->rsvd_pos = 24;
+ si->red_size = 8;
+ si->red_pos = 16;
+ si->green_size = 8;
+ si->green_pos = 8;
+ si->blue_size = 8;
+ si->blue_pos = 0;
+ si->rsvd_size = 8;
+ si->rsvd_pos = 24;
free_handle:
efi_call_early(free_pool, uga_handle);
+
return status;
}
@@ -592,7 +404,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
return NULL;
- if (efi_early->is64)
+ if (efi_is_64bit())
setup_boot_services64(efi_early);
else
setup_boot_services32(efi_early);
@@ -607,7 +419,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
status = efi_low_alloc(sys_table, 0x4000, 1,
(unsigned long *)&boot_params);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc lowmem for boot params\n");
+ efi_printk(sys_table, "Failed to allocate lowmem for boot params\n");
return NULL;
}
@@ -623,9 +435,9 @@ struct boot_params *make_boot_params(struct efi_config *c)
* Fill out some of the header fields ourselves because the
* EFI firmware loader doesn't load the first sector.
*/
- hdr->root_flags = 1;
- hdr->vid_mode = 0xffff;
- hdr->boot_flag = 0xAA55;
+ hdr->root_flags = 1;
+ hdr->vid_mode = 0xffff;
+ hdr->boot_flag = 0xAA55;
hdr->type_of_loader = 0x21;
@@ -633,6 +445,7 @@ struct boot_params *make_boot_params(struct efi_config *c)
cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size);
if (!cmdline_ptr)
goto fail;
+
hdr->cmd_line_ptr = (unsigned long)cmdline_ptr;
/* Fill in upper bits of command line address, NOP on 32 bit */
boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32;
@@ -669,10 +482,12 @@ struct boot_params *make_boot_params(struct efi_config *c)
boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32;
return boot_params;
+
fail2:
efi_free(sys_table, options_size, hdr->cmd_line_ptr);
fail:
efi_free(sys_table, 0x4000, (unsigned long)boot_params);
+
return NULL;
}
@@ -684,7 +499,7 @@ static void add_e820ext(struct boot_params *params,
unsigned long size;
e820ext->type = SETUP_E820_EXT;
- e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
+ e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
e820ext->next = 0;
data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
@@ -698,8 +513,8 @@ static void add_e820ext(struct boot_params *params,
params->hdr.setup_data = (unsigned long)e820ext;
}
-static efi_status_t setup_e820(struct boot_params *params,
- struct setup_data *e820ext, u32 e820ext_size)
+static efi_status_t
+setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_size)
{
struct boot_e820_entry *entry = params->e820_table;
struct efi_info *efi = &params->efi_info;
@@ -820,11 +635,10 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
}
struct exit_boot_struct {
- struct boot_params *boot_params;
- struct efi_info *efi;
- struct setup_data *e820ext;
- __u32 e820ext_size;
- bool is64;
+ struct boot_params *boot_params;
+ struct efi_info *efi;
+ struct setup_data *e820ext;
+ __u32 e820ext_size;
};
static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
@@ -851,25 +665,25 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
first = false;
}
- signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE;
+ signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
+ : EFI32_LOADER_SIGNATURE;
memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
- p->efi->efi_systab = (unsigned long)sys_table_arg;
- p->efi->efi_memdesc_size = *map->desc_size;
- p->efi->efi_memdesc_version = *map->desc_ver;
- p->efi->efi_memmap = (unsigned long)*map->map;
- p->efi->efi_memmap_size = *map->map_size;
+ p->efi->efi_systab = (unsigned long)sys_table_arg;
+ p->efi->efi_memdesc_size = *map->desc_size;
+ p->efi->efi_memdesc_version = *map->desc_ver;
+ p->efi->efi_memmap = (unsigned long)*map->map;
+ p->efi->efi_memmap_size = *map->map_size;
#ifdef CONFIG_X86_64
- p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
- p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
+ p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
+ p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
#endif
return EFI_SUCCESS;
}
-static efi_status_t exit_boot(struct boot_params *boot_params,
- void *handle, bool is64)
+static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
{
unsigned long map_sz, key, desc_size, buff_size;
efi_memory_desc_t *mem_map;
@@ -880,17 +694,16 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
struct efi_boot_memmap map;
struct exit_boot_struct priv;
- map.map = &mem_map;
- map.map_size = &map_sz;
- map.desc_size = &desc_size;
- map.desc_ver = &desc_version;
- map.key_ptr = &key;
- map.buff_size = &buff_size;
- priv.boot_params = boot_params;
- priv.efi = &boot_params->efi_info;
- priv.e820ext = NULL;
- priv.e820ext_size = 0;
- priv.is64 = is64;
+ map.map = &mem_map;
+ map.map_size = &map_sz;
+ map.desc_size = &desc_size;
+ map.desc_ver = &desc_version;
+ map.key_ptr = &key;
+ map.buff_size = &buff_size;
+ priv.boot_params = boot_params;
+ priv.efi = &boot_params->efi_info;
+ priv.e820ext = NULL;
+ priv.e820ext_size = 0;
/* Might as well exit boot services now */
status = efi_exit_boot_services(sys_table, handle, &map, &priv,
@@ -898,10 +711,11 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
if (status != EFI_SUCCESS)
return status;
- e820ext = priv.e820ext;
- e820ext_size = priv.e820ext_size;
+ e820ext = priv.e820ext;
+ e820ext_size = priv.e820ext_size;
+
/* Historic? */
- boot_params->alt_mem_k = 32 * 1024;
+ boot_params->alt_mem_k = 32 * 1024;
status = setup_e820(boot_params, e820ext, e820ext_size);
if (status != EFI_SUCCESS)
@@ -914,8 +728,8 @@ static efi_status_t exit_boot(struct boot_params *boot_params,
* On success we return a pointer to a boot_params structure, and NULL
* on failure.
*/
-struct boot_params *efi_main(struct efi_config *c,
- struct boot_params *boot_params)
+struct boot_params *
+efi_main(struct efi_config *c, struct boot_params *boot_params)
{
struct desc_ptr *gdt = NULL;
efi_loaded_image_t *image;
@@ -924,13 +738,11 @@ struct boot_params *efi_main(struct efi_config *c,
struct desc_struct *desc;
void *handle;
efi_system_table_t *_table;
- bool is64;
efi_early = c;
_table = (efi_system_table_t *)(unsigned long)efi_early->table;
handle = (void *)(unsigned long)efi_early->image_handle;
- is64 = efi_early->is64;
sys_table = _table;
@@ -938,7 +750,7 @@ struct boot_params *efi_main(struct efi_config *c,
if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
goto fail;
- if (is64)
+ if (efi_is_64bit())
setup_boot_services64(efi_early);
else
setup_boot_services32(efi_early);
@@ -963,7 +775,7 @@ struct boot_params *efi_main(struct efi_config *c,
status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
sizeof(*gdt), (void **)&gdt);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc mem for gdt structure\n");
+ efi_printk(sys_table, "Failed to allocate memory for 'gdt' structure\n");
goto fail;
}
@@ -971,7 +783,7 @@ struct boot_params *efi_main(struct efi_config *c,
status = efi_low_alloc(sys_table, gdt->size, 8,
(unsigned long *)&gdt->address);
if (status != EFI_SUCCESS) {
- efi_printk(sys_table, "Failed to alloc mem for gdt\n");
+ efi_printk(sys_table, "Failed to allocate memory for 'gdt'\n");
goto fail;
}
@@ -994,7 +806,7 @@ struct boot_params *efi_main(struct efi_config *c,
hdr->code32_start = bzimage_addr;
}
- status = exit_boot(boot_params, handle, is64);
+ status = exit_boot(boot_params, handle);
if (status != EFI_SUCCESS) {
efi_printk(sys_table, "exit_boot() failed!\n");
goto fail;
@@ -1008,19 +820,20 @@ struct boot_params *efi_main(struct efi_config *c,
if (IS_ENABLED(CONFIG_X86_64)) {
/* __KERNEL32_CS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0xf;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
+
desc++;
} else {
/* Second entry is unused on 32-bit */
@@ -1028,15 +841,16 @@ struct boot_params *efi_main(struct efi_config *c,
}
/* __KERNEL_CS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0xf;
+ desc->avl = 0;
+
if (IS_ENABLED(CONFIG_X86_64)) {
desc->l = 1;
desc->d = 0;
@@ -1044,41 +858,41 @@ struct boot_params *efi_main(struct efi_config *c,
desc->l = 0;
desc->d = SEG_OP_SIZE_32BIT;
}
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
desc++;
/* __KERNEL_DS */
- desc->limit0 = 0xffff;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE;
- desc->s = DESC_TYPE_CODE_DATA;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0xf;
- desc->avl = 0;
- desc->l = 0;
- desc->d = SEG_OP_SIZE_32BIT;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
+ desc->limit0 = 0xffff;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE;
+ desc->s = DESC_TYPE_CODE_DATA;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0xf;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = SEG_OP_SIZE_32BIT;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
desc++;
if (IS_ENABLED(CONFIG_X86_64)) {
/* Task segment value */
- desc->limit0 = 0x0000;
- desc->base0 = 0x0000;
- desc->base1 = 0x0000;
- desc->type = SEG_TYPE_TSS;
- desc->s = 0;
- desc->dpl = 0;
- desc->p = 1;
- desc->limit1 = 0x0;
- desc->avl = 0;
- desc->l = 0;
- desc->d = 0;
- desc->g = SEG_GRANULARITY_4KB;
- desc->base2 = 0x00;
+ desc->limit0 = 0x0000;
+ desc->base0 = 0x0000;
+ desc->base1 = 0x0000;
+ desc->type = SEG_TYPE_TSS;
+ desc->s = 0;
+ desc->dpl = 0;
+ desc->p = 1;
+ desc->limit1 = 0x0;
+ desc->avl = 0;
+ desc->l = 0;
+ desc->d = 0;
+ desc->g = SEG_GRANULARITY_4KB;
+ desc->base2 = 0x00;
desc++;
}
@@ -1088,5 +902,6 @@ struct boot_params *efi_main(struct efi_config *c,
return boot_params;
fail:
efi_printk(sys_table, "efi_main() failed!\n");
+
return NULL;
}
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index e799dc5c6448..8297387c4676 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -12,22 +12,22 @@
#define DESC_TYPE_CODE_DATA (1 << 0)
-struct efi_uga_draw_protocol_32 {
+typedef struct {
u32 get_mode;
u32 set_mode;
u32 blt;
-};
+} efi_uga_draw_protocol_32_t;
-struct efi_uga_draw_protocol_64 {
+typedef struct {
u64 get_mode;
u64 set_mode;
u64 blt;
-};
+} efi_uga_draw_protocol_64_t;
-struct efi_uga_draw_protocol {
+typedef struct {
void *get_mode;
void *set_mode;
void *blt;
-};
+} efi_uga_draw_protocol_t;
#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index b87a7582853d..302517929932 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -102,7 +102,7 @@ static bool memmap_too_large;
/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
-unsigned long long mem_limit = ULLONG_MAX;
+static unsigned long long mem_limit = ULLONG_MAX;
enum mem_avoid_index {
@@ -215,7 +215,36 @@ static void mem_avoid_memmap(char *str)
memmap_too_large = true;
}
-static int handle_mem_memmap(void)
+/* Store the number of 1GB huge pages which users specified: */
+static unsigned long max_gb_huge_pages;
+
+static void parse_gb_huge_pages(char *param, char *val)
+{
+ static bool gbpage_sz;
+ char *p;
+
+ if (!strcmp(param, "hugepagesz")) {
+ p = val;
+ if (memparse(p, &p) != PUD_SIZE) {
+ gbpage_sz = false;
+ return;
+ }
+
+ if (gbpage_sz)
+ warn("Repeatedly set hugeTLB page size of 1G!\n");
+ gbpage_sz = true;
+ return;
+ }
+
+ if (!strcmp(param, "hugepages") && gbpage_sz) {
+ p = val;
+ max_gb_huge_pages = simple_strtoull(p, &p, 0);
+ return;
+ }
+}
+
+
+static int handle_mem_options(void)
{
char *args = (char *)get_cmd_line_ptr();
size_t len = strlen((char *)args);
@@ -223,7 +252,8 @@ static int handle_mem_memmap(void)
char *param, *val;
u64 mem_size;
- if (!strstr(args, "memmap=") && !strstr(args, "mem="))
+ if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
+ !strstr(args, "hugepages"))
return 0;
tmp_cmdline = malloc(len + 1);
@@ -248,6 +278,8 @@ static int handle_mem_memmap(void)
if (!strcmp(param, "memmap")) {
mem_avoid_memmap(val);
+ } else if (strstr(param, "hugepages")) {
+ parse_gb_huge_pages(param, val);
} else if (!strcmp(param, "mem")) {
char *p = val;
@@ -387,7 +419,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
/* We don't need to set a mapping for setup_data. */
/* Mark the memmap regions we need to avoid */
- handle_mem_memmap();
+ handle_mem_options();
#ifdef CONFIG_X86_VERBOSE_BOOTUP
/* Make sure video RAM can be used. */
@@ -466,6 +498,60 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
}
}
+/*
+ * Skip as many 1GB huge pages as possible in the passed region
+ * according to the number which users specified:
+ */
+static void
+process_gb_huge_pages(struct mem_vector *region, unsigned long image_size)
+{
+ unsigned long addr, size = 0;
+ struct mem_vector tmp;
+ int i = 0;
+
+ if (!max_gb_huge_pages) {
+ store_slot_info(region, image_size);
+ return;
+ }
+
+ addr = ALIGN(region->start, PUD_SIZE);
+ /* Did we raise the address above the passed in memory entry? */
+ if (addr < region->start + region->size)
+ size = region->size - (addr - region->start);
+
+ /* Check how many 1GB huge pages can be filtered out: */
+ while (size > PUD_SIZE && max_gb_huge_pages) {
+ size -= PUD_SIZE;
+ max_gb_huge_pages--;
+ i++;
+ }
+
+ /* No good 1GB huge pages found: */
+ if (!i) {
+ store_slot_info(region, image_size);
+ return;
+ }
+
+ /*
+ * Skip those 'i'*1GB good huge pages, and continue checking and
+ * processing the remaining head or tail part of the passed region
+ * if available.
+ */
+
+ if (addr >= region->start + image_size) {
+ tmp.start = region->start;
+ tmp.size = addr - region->start;
+ store_slot_info(&tmp, image_size);
+ }
+
+ size = region->size - (addr - region->start) - i * PUD_SIZE;
+ if (size >= image_size) {
+ tmp.start = addr + i * PUD_SIZE;
+ tmp.size = size;
+ store_slot_info(&tmp, image_size);
+ }
+}
+
static unsigned long slots_fetch_random(void)
{
unsigned long slot;
@@ -546,7 +632,7 @@ static void process_mem_region(struct mem_vector *entry,
/* If nothing overlaps, store the region and return. */
if (!mem_avoid_overlap(&region, &overlap)) {
- store_slot_info(&region, image_size);
+ process_gb_huge_pages(&region, image_size);
return;
}
@@ -556,7 +642,7 @@ static void process_mem_region(struct mem_vector *entry,
beginning.start = region.start;
beginning.size = overlap.start - region.start;
- store_slot_info(&beginning, image_size);
+ process_gb_huge_pages(&beginning, image_size);
}
/* Return if overlap extends to or past end of region. */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 8c5107545251..9e2157371491 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,3 +1,4 @@
+#include <asm/e820/types.h>
#include <asm/processor.h>
#include "pgtable.h"
#include "../string.h"
@@ -34,10 +35,62 @@ unsigned long *trampoline_32bit __section(.data);
extern struct boot_params *boot_params;
int cmdline_find_option_bool(const char *option);
+static unsigned long find_trampoline_placement(void)
+{
+ unsigned long bios_start, ebda_start;
+ unsigned long trampoline_start;
+ struct boot_e820_entry *entry;
+ int i;
+
+ /*
+ * Find a suitable spot for the trampoline.
+ * This code is based on reserve_bios_regions().
+ */
+
+ ebda_start = *(unsigned short *)0x40e << 4;
+ bios_start = *(unsigned short *)0x413 << 10;
+
+ if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
+ bios_start = BIOS_START_MAX;
+
+ if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
+ bios_start = ebda_start;
+
+ bios_start = round_down(bios_start, PAGE_SIZE);
+
+ /* Find the first usable memory region under bios_start. */
+ for (i = boot_params->e820_entries - 1; i >= 0; i--) {
+ entry = &boot_params->e820_table[i];
+
+ /* Skip all entries above bios_start. */
+ if (bios_start <= entry->addr)
+ continue;
+
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_TYPE_RAM)
+ continue;
+
+ /* Adjust bios_start to the end of the entry if needed. */
+ if (bios_start > entry->addr + entry->size)
+ bios_start = entry->addr + entry->size;
+
+ /* Keep bios_start page-aligned. */
+ bios_start = round_down(bios_start, PAGE_SIZE);
+
+ /* Skip the entry if it's too small. */
+ if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr)
+ continue;
+
+ break;
+ }
+
+ /* Place the trampoline just below the end of low memory */
+ return bios_start - TRAMPOLINE_32BIT_SIZE;
+}
+
struct paging_config paging_prepare(void *rmode)
{
struct paging_config paging_config = {};
- unsigned long bios_start, ebda_start;
/* Initialize boot_params. Required for cmdline_find_option_bool(). */
boot_params = rmode;
@@ -61,23 +114,7 @@ struct paging_config paging_prepare(void *rmode)
paging_config.l5_required = 1;
}
- /*
- * Find a suitable spot for the trampoline.
- * This code is based on reserve_bios_regions().
- */
-
- ebda_start = *(unsigned short *)0x40e << 4;
- bios_start = *(unsigned short *)0x413 << 10;
-
- if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
- bios_start = BIOS_START_MAX;
-
- if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
- bios_start = ebda_start;
-
- /* Place the trampoline just below the end of low memory, aligned to 4k */
- paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE;
- paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE);
+ paging_config.trampoline_start = find_trampoline_placement();
trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 16f49123d747..c4428a176973 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -13,6 +13,7 @@
*/
#include <linux/types.h>
+#include <asm/asm.h>
#include "ctype.h"
#include "string.h"
@@ -28,8 +29,8 @@
int memcmp(const void *s1, const void *s2, size_t len)
{
bool diff;
- asm("repe; cmpsb; setnz %0"
- : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
+ asm("repe; cmpsb" CC_SET(nz)
+ : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 9254e0b6cc06..5f7e43d4f64a 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -75,7 +75,7 @@
* %r9
*/
__load_partial:
- xor %r9, %r9
+ xor %r9d, %r9d
pxor MSG, MSG
mov LEN, %r8
@@ -535,6 +535,7 @@ ENTRY(crypto_aegis128_aesni_enc_tail)
movdqu STATE3, 0x40(STATEP)
FRAME_END
+ ret
ENDPROC(crypto_aegis128_aesni_enc_tail)
.macro decrypt_block a s0 s1 s2 s3 s4 i
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 5de7c0d46edf..acd11b3bf639 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis128_aesni_alg[] = {
}
};
-static const struct x86_cpu_id aesni_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_AES),
- X86_FEATURE_MATCH(X86_FEATURE_XMM2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
static int __init crypto_aegis128_aesni_module_init(void)
{
- if (!x86_match_cpu(aesni_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_aegis128_aesni_alg,
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S
index 9263c344f2c7..491dd61c845c 100644
--- a/arch/x86/crypto/aegis128l-aesni-asm.S
+++ b/arch/x86/crypto/aegis128l-aesni-asm.S
@@ -66,7 +66,7 @@
* %r9
*/
__load_partial:
- xor %r9, %r9
+ xor %r9d, %r9d
pxor MSG0, MSG0
pxor MSG1, MSG1
@@ -645,6 +645,7 @@ ENTRY(crypto_aegis128l_aesni_enc_tail)
state_store0
FRAME_END
+ ret
ENDPROC(crypto_aegis128l_aesni_enc_tail)
/*
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
index 876e4866e633..2071c3d1ae07 100644
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis128l_aesni_alg[] = {
}
};
-static const struct x86_cpu_id aesni_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_AES),
- X86_FEATURE_MATCH(X86_FEATURE_XMM2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
static int __init crypto_aegis128l_aesni_module_init(void)
{
- if (!x86_match_cpu(aesni_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_aegis128l_aesni_alg,
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S
index 1d977d515bf9..8870c7c5d9a4 100644
--- a/arch/x86/crypto/aegis256-aesni-asm.S
+++ b/arch/x86/crypto/aegis256-aesni-asm.S
@@ -59,7 +59,7 @@
* %r9
*/
__load_partial:
- xor %r9, %r9
+ xor %r9d, %r9d
pxor MSG, MSG
mov LEN, %r8
@@ -543,6 +543,7 @@ ENTRY(crypto_aegis256_aesni_enc_tail)
state_store0
FRAME_END
+ ret
ENDPROC(crypto_aegis256_aesni_enc_tail)
/*
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
index 2b5dd3af8f4d..b5f2a8fd5a71 100644
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -375,16 +375,12 @@ static struct aead_alg crypto_aegis256_aesni_alg[] = {
}
};
-static const struct x86_cpu_id aesni_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_AES),
- X86_FEATURE_MATCH(X86_FEATURE_XMM2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-
static int __init crypto_aegis256_aesni_module_init(void)
{
- if (!x86_match_cpu(aesni_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_aegis256_aesni_alg,
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index e762ef417562..9bd139569b41 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -258,7 +258,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
.macro GCM_INIT Iv SUBKEY AAD AADLEN
mov \AADLEN, %r11
mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
- xor %r11, %r11
+ xor %r11d, %r11d
mov %r11, InLen(%arg2) # ctx_data.in_length = 0
mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
@@ -286,7 +286,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
movdqu HashKey(%arg2), %xmm13
add %arg5, InLen(%arg2)
- xor %r11, %r11 # initialise the data pointer offset as zero
+ xor %r11d, %r11d # initialise the data pointer offset as zero
PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
sub %r11, %arg5 # sub partial block data used
@@ -702,7 +702,7 @@ _no_extra_mask_1_\@:
# GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
- xor %rax,%rax
+ xor %eax, %eax
mov %rax, PBlockLen(%arg2)
jmp _dec_done_\@
@@ -737,7 +737,7 @@ _no_extra_mask_2_\@:
# GHASH computation for the last <16 Byte block
GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
- xor %rax,%rax
+ xor %eax, %eax
mov %rax, PBlockLen(%arg2)
jmp _encode_done_\@
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index faecb1518bf8..1985ea0b551b 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -463,7 +463,7 @@ _get_AAD_rest_final\@:
_get_AAD_done\@:
# initialize the data pointer offset as zero
- xor %r11, %r11
+ xor %r11d, %r11d
# start AES for num_initial_blocks blocks
mov arg5, %rax # rax = *Y0
@@ -1770,7 +1770,7 @@ _get_AAD_rest_final\@:
_get_AAD_done\@:
# initialize the data pointer offset as zero
- xor %r11, %r11
+ xor %r11d, %r11d
# start AES for num_initial_blocks blocks
mov arg5, %rax # rax = *Y0
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
index 37d422e77931..de182c460f82 100644
--- a/arch/x86/crypto/morus1280-avx2-asm.S
+++ b/arch/x86/crypto/morus1280-avx2-asm.S
@@ -113,7 +113,7 @@ ENDPROC(__morus1280_update_zero)
* %r9
*/
__load_partial:
- xor %r9, %r9
+ xor %r9d, %r9d
vpxor MSG, MSG, MSG
mov %rcx, %r8
@@ -453,6 +453,7 @@ ENTRY(crypto_morus1280_avx2_enc_tail)
vmovdqu STATE4, (4 * 32)(%rdi)
FRAME_END
+ ret
ENDPROC(crypto_morus1280_avx2_enc_tail)
/*
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
index f111f36d26dc..6634907d6ccd 100644
--- a/arch/x86/crypto/morus1280-avx2-glue.c
+++ b/arch/x86/crypto/morus1280-avx2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400);
-static const struct x86_cpu_id avx2_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_AVX2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id);
-
static int __init crypto_morus1280_avx2_module_init(void)
{
- if (!x86_match_cpu(avx2_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_morus1280_avx2_algs,
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
index 1fe637c7be9d..da5d2905db60 100644
--- a/arch/x86/crypto/morus1280-sse2-asm.S
+++ b/arch/x86/crypto/morus1280-sse2-asm.S
@@ -235,7 +235,7 @@ ENDPROC(__morus1280_update_zero)
* %r9
*/
__load_partial:
- xor %r9, %r9
+ xor %r9d, %r9d
pxor MSG_LO, MSG_LO
pxor MSG_HI, MSG_HI
@@ -652,6 +652,7 @@ ENTRY(crypto_morus1280_sse2_enc_tail)
movdqu STATE4_HI, (9 * 16)(%rdi)
FRAME_END
+ ret
ENDPROC(crypto_morus1280_sse2_enc_tail)
/*
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
index 839270aa713c..95cf857d2cbb 100644
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ b/arch/x86/crypto/morus1280-sse2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
-static const struct x86_cpu_id sse2_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_XMM2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
-
static int __init crypto_morus1280_sse2_module_init(void)
{
- if (!x86_match_cpu(sse2_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_morus1280_sse2_algs,
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
index 71c72a0a0862..414db480250e 100644
--- a/arch/x86/crypto/morus640-sse2-asm.S
+++ b/arch/x86/crypto/morus640-sse2-asm.S
@@ -113,7 +113,7 @@ ENDPROC(__morus640_update_zero)
* %r9
*/
__load_partial:
- xor %r9, %r9
+ xor %r9d, %r9d
pxor MSG, MSG
mov %rcx, %r8
@@ -437,6 +437,7 @@ ENTRY(crypto_morus640_sse2_enc_tail)
movdqu STATE4, (4 * 16)(%rdi)
FRAME_END
+ ret
ENDPROC(crypto_morus640_sse2_enc_tail)
/*
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
index 26b47e2db8d2..615fb7bc9a32 100644
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ b/arch/x86/crypto/morus640-sse2-glue.c
@@ -37,15 +37,11 @@ asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
-static const struct x86_cpu_id sse2_cpu_id[] = {
- X86_FEATURE_MATCH(X86_FEATURE_XMM2),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
-
static int __init crypto_morus640_sse2_module_init(void)
{
- if (!x86_match_cpu(sse2_cpu_id))
+ if (!boot_cpu_has(X86_FEATURE_XMM2) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
+ !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
return crypto_register_aeads(crypto_morus640_sse2_algs,
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 6204bd53528c..613d0bfc3d84 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -96,7 +96,7 @@
# cleanup workspace
mov $8, %ecx
mov %rsp, %rdi
- xor %rax, %rax
+ xor %eax, %eax
rep stosq
mov %rbp, %rsp # deallocate workspace
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 73a522d53b53..957dfb693ecc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -92,7 +92,7 @@ END(native_usergs_sysret64)
.endm
.macro TRACE_IRQS_IRETQ_DEBUG
- bt $9, EFLAGS(%rsp) /* interrupts off? */
+ btl $9, EFLAGS(%rsp) /* interrupts off? */
jnc 1f
TRACE_IRQS_ON_DEBUG
1:
@@ -408,6 +408,7 @@ ENTRY(ret_from_fork)
1:
/* kernel thread */
+ UNWIND_HINT_EMPTY
movq %r12, %rdi
CALL_NOSPEC %rbx
/*
@@ -701,7 +702,7 @@ retint_kernel:
#ifdef CONFIG_PREEMPT
/* Interrupts are off */
/* Check if we need preemption */
- bt $9, EFLAGS(%rsp) /* were interrupts off? */
+ btl $9, EFLAGS(%rsp) /* were interrupts off? */
jnc 1f
0: cmpl $0, PER_CPU_VAR(__preempt_count)
jnz 1f
@@ -981,7 +982,7 @@ ENTRY(\sym)
call \do_sym
- jmp error_exit /* %ebx: no swapgs flag */
+ jmp error_exit
.endif
END(\sym)
.endm
@@ -1222,7 +1223,6 @@ END(paranoid_exit)
/*
* Save all registers in pt_regs, and switch GS if needed.
- * Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
UNWIND_HINT_FUNC
@@ -1269,7 +1269,6 @@ ENTRY(error_entry)
* for these here too.
*/
.Lerror_kernelspace:
- incl %ebx
leaq native_irq_return_iret(%rip), %rcx
cmpq %rcx, RIP+8(%rsp)
je .Lerror_bad_iret
@@ -1303,28 +1302,20 @@ ENTRY(error_entry)
/*
* Pretend that the exception came from user mode: set up pt_regs
- * as if we faulted immediately after IRET and clear EBX so that
- * error_exit knows that we will be returning to user mode.
+ * as if we faulted immediately after IRET.
*/
mov %rsp, %rdi
call fixup_bad_iret
mov %rax, %rsp
- decl %ebx
jmp .Lerror_entry_from_usermode_after_swapgs
END(error_entry)
-
-/*
- * On entry, EBX is a "return to kernel mode" flag:
- * 1: already in kernel mode, don't need SWAPGS
- * 0: user gsbase is loaded, we need SWAPGS and standard preparation for return to usermode
- */
ENTRY(error_exit)
UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
- testl %ebx, %ebx
- jnz retint_kernel
+ testb $3, CS(%rsp)
+ jz retint_kernel
jmp retint_user
END(error_exit)
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 261802b1cc50..b9ed1aa53a26 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -58,9 +58,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(src
hostprogs-y += vdso2c
quiet_cmd_vdso2c = VDSO2C $@
-define cmd_vdso2c
- $(obj)/vdso2c $< $(<:%.dbg=%) $@
-endef
+ cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@
$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
$(call if_changed,vdso2c)
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 4b98101209a1..d50bb4dc0650 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -579,7 +579,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
{
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
struct perf_event *event = pcpu->event;
- struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event *hwc;
struct perf_sample_data data;
struct perf_raw_record raw;
struct pt_regs regs;
@@ -602,6 +602,10 @@ fail:
return 0;
}
+ if (WARN_ON_ONCE(!event))
+ goto fail;
+
+ hwc = &event->hw;
msr = hwc->config_base;
buf = ibs_data.regs;
rdmsrl(msr, *buf);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 707b2a96e516..035c37481f57 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2041,15 +2041,15 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);
+ if (unlikely(event->attr.precise_ip))
+ intel_pmu_pebs_disable(event);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
}
x86_pmu_disable_event(event);
-
- if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_disable(event);
}
static void intel_pmu_del_event(struct perf_event *event)
@@ -2068,17 +2068,19 @@ static void intel_pmu_read_event(struct perf_event *event)
x86_perf_event_update(event);
}
-static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
+static void intel_pmu_enable_fixed(struct perf_event *event)
{
+ struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
- u64 ctrl_val, bits, mask;
+ u64 ctrl_val, mask, bits = 0;
/*
- * Enable IRQ generation (0x8),
+ * Enable IRQ generation (0x8), if not PEBS,
* and enable ring-3 counting (0x2) and ring-0 counting (0x1)
* if requested:
*/
- bits = 0x8ULL;
+ if (!event->attr.precise_ip)
+ bits |= 0x8;
if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
bits |= 0x2;
if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
@@ -2120,14 +2122,14 @@ static void intel_pmu_enable_event(struct perf_event *event)
if (unlikely(event_is_checkpointed(event)))
cpuc->intel_cp_status |= (1ull << hwc->idx);
+ if (unlikely(event->attr.precise_ip))
+ intel_pmu_pebs_enable(event);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
- intel_pmu_enable_fixed(hwc);
+ intel_pmu_enable_fixed(event);
return;
}
- if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_enable(event);
-
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
@@ -2280,7 +2282,10 @@ again:
* counters from the GLOBAL_STATUS mask and we always process PEBS
* events via drain_pebs().
*/
- status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+ if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+ status &= ~cpuc->pebs_enabled;
+ else
+ status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
/*
* PEBS overflow sets bit 62 in the global status register
@@ -2997,6 +3002,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
}
if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event);
+
+ if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+ event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
}
if (needs_branch_stack(event)) {
@@ -4069,7 +4077,6 @@ __init int intel_pmu_init(void)
intel_pmu_lbr_init_skl();
x86_pmu.event_constraints = intel_slm_event_constraints;
- x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints;
x86_pmu.extra_regs = intel_glm_extra_regs;
/*
* It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
@@ -4079,6 +4086,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_prec_dist = true;
x86_pmu.lbr_pt_coexist = true;
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_PEBS_ALL;
x86_pmu.get_event_constraints = glp_get_event_constraints;
x86_pmu.cpu_events = glm_events_attrs;
/* Goldmont Plus has 4-wide pipeline */
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8a10a045b57b..b7b01d762d32 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -408,9 +408,11 @@ static int alloc_bts_buffer(int cpu)
ds->bts_buffer_base = (unsigned long) cea;
ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
ds->bts_index = ds->bts_buffer_base;
- max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
- ds->bts_absolute_maximum = ds->bts_buffer_base + max;
- ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
+ max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+ ds->bts_absolute_maximum = ds->bts_buffer_base +
+ max * BTS_RECORD_SIZE;
+ ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+ (max / 16) * BTS_RECORD_SIZE;
return 0;
}
@@ -711,12 +713,6 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
-struct event_constraint intel_glp_pebs_event_constraints[] = {
- /* Allow all events as PEBS with no flags */
- INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
- EVENT_CONSTRAINT_END
-};
-
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
@@ -869,6 +865,13 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
}
}
+ /*
+ * Extended PEBS support
+ * Makes the PEBS code search the normal constraints.
+ */
+ if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+ return NULL;
+
return &emptyconstraint;
}
@@ -894,10 +897,16 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
{
struct debug_store *ds = cpuc->ds;
u64 threshold;
+ int reserved;
+
+ if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+ reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
+ else
+ reserved = x86_pmu.max_pebs_events;
if (cpuc->n_pebs == cpuc->n_large_pebs) {
threshold = ds->pebs_absolute_maximum -
- x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+ reserved * x86_pmu.pebs_record_size;
} else {
threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
}
@@ -961,7 +970,11 @@ void intel_pmu_pebs_enable(struct perf_event *event)
* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
*/
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
- ds->pebs_event_reset[hwc->idx] =
+ unsigned int idx = hwc->idx;
+
+ if (idx >= INTEL_PMC_IDX_FIXED)
+ idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
+ ds->pebs_event_reset[idx] =
(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
} else {
ds->pebs_event_reset[hwc->idx] = 0;
@@ -1184,16 +1197,20 @@ static void setup_pebs_sample_data(struct perf_event *event,
}
/*
+ * We must however always use iregs for the unwinder to stay sane; the
+ * record BP,SP,IP can point into thin air when the record is from a
+ * previous PMI context or an (I)RET happend between the record and
+ * PMI.
+ */
+ if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ data->callchain = perf_callchain(event, iregs);
+
+ /*
* We use the interrupt regs as a base because the PEBS record does not
* contain a full regs set, specifically it seems to lack segment
* descriptors, which get used by things like user_mode().
*
* In the simple case fix up only the IP for PERF_SAMPLE_IP.
- *
- * We must however always use BP,SP from iregs for the unwinder to stay
- * sane; the record BP,SP can point into thin air when the record is
- * from a previous PMI context or an (I)RET happend between the record
- * and PMI.
*/
*regs = *iregs;
@@ -1212,15 +1229,8 @@ static void setup_pebs_sample_data(struct perf_event *event,
regs->si = pebs->si;
regs->di = pebs->di;
- /*
- * Per the above; only set BP,SP if we don't need callchains.
- *
- * XXX: does this make sense?
- */
- if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
- regs->bp = pebs->bp;
- regs->sp = pebs->sp;
- }
+ regs->bp = pebs->bp;
+ regs->sp = pebs->sp;
#ifndef CONFIG_X86_32
regs->r8 = pebs->r8;
@@ -1482,9 +1492,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
struct debug_store *ds = cpuc->ds;
struct perf_event *event;
void *base, *at, *top;
- short counts[MAX_PEBS_EVENTS] = {};
- short error[MAX_PEBS_EVENTS] = {};
- int bit, i;
+ short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ int bit, i, size;
+ u64 mask;
if (!x86_pmu.pebs_active)
return;
@@ -1494,6 +1505,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
ds->pebs_index = ds->pebs_buffer_base;
+ mask = (1ULL << x86_pmu.max_pebs_events) - 1;
+ size = x86_pmu.max_pebs_events;
+ if (x86_pmu.flags & PMU_FL_PEBS_ALL) {
+ mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED;
+ size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
+ }
+
if (unlikely(base >= top)) {
/*
* The drain_pebs() could be called twice in a short period
@@ -1503,7 +1521,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
* update the event->count for this case.
*/
for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
- x86_pmu.max_pebs_events) {
+ size) {
event = cpuc->events[bit];
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_save_and_restart_reload(event, 0);
@@ -1516,12 +1534,12 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
u64 pebs_status;
pebs_status = p->status & cpuc->pebs_enabled;
- pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+ pebs_status &= mask;
/* PEBS v3 has more accurate status bits */
if (x86_pmu.intel_cap.pebs_format >= 3) {
for_each_set_bit(bit, (unsigned long *)&pebs_status,
- x86_pmu.max_pebs_events)
+ size)
counts[bit]++;
continue;
@@ -1569,7 +1587,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
counts[bit]++;
}
- for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+ for (bit = 0; bit < size; bit++) {
if ((counts[bit] == 0) && (error[bit] == 0))
continue;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index cf372b90557e..f3e006bed9a7 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -216,6 +216,8 @@ static void intel_pmu_lbr_reset_64(void)
void intel_pmu_lbr_reset(void)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
if (!x86_pmu.lbr_nr)
return;
@@ -223,6 +225,9 @@ void intel_pmu_lbr_reset(void)
intel_pmu_lbr_reset_32();
else
intel_pmu_lbr_reset_64();
+
+ cpuc->last_task_ctx = NULL;
+ cpuc->last_log_id = 0;
}
/*
@@ -334,6 +339,7 @@ static inline u64 rdlbr_to(unsigned int idx)
static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int i;
unsigned lbr_idx, mask;
u64 tos;
@@ -344,9 +350,21 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
return;
}
- mask = x86_pmu.lbr_nr - 1;
tos = task_ctx->tos;
- for (i = 0; i < tos; i++) {
+ /*
+ * Does not restore the LBR registers, if
+ * - No one else touched them, and
+ * - Did not enter C6
+ */
+ if ((task_ctx == cpuc->last_task_ctx) &&
+ (task_ctx->log_id == cpuc->last_log_id) &&
+ rdlbr_from(tos)) {
+ task_ctx->lbr_stack_state = LBR_NONE;
+ return;
+ }
+
+ mask = x86_pmu.lbr_nr - 1;
+ for (i = 0; i < task_ctx->valid_lbrs; i++) {
lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
wrlbr_to (lbr_idx, task_ctx->lbr_to[i]);
@@ -354,14 +372,24 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
+
+ for (; i < x86_pmu.lbr_nr; i++) {
+ lbr_idx = (tos - i) & mask;
+ wrlbr_from(lbr_idx, 0);
+ wrlbr_to(lbr_idx, 0);
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+ wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0);
+ }
+
wrmsrl(x86_pmu.lbr_tos, tos);
task_ctx->lbr_stack_state = LBR_NONE;
}
static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
unsigned lbr_idx, mask;
- u64 tos;
+ u64 tos, from;
int i;
if (task_ctx->lbr_callstack_users == 0) {
@@ -371,15 +399,22 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
mask = x86_pmu.lbr_nr - 1;
tos = intel_pmu_lbr_tos();
- for (i = 0; i < tos; i++) {
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
lbr_idx = (tos - i) & mask;
- task_ctx->lbr_from[i] = rdlbr_from(lbr_idx);
+ from = rdlbr_from(lbr_idx);
+ if (!from)
+ break;
+ task_ctx->lbr_from[i] = from;
task_ctx->lbr_to[i] = rdlbr_to(lbr_idx);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
+ task_ctx->valid_lbrs = i;
task_ctx->tos = tos;
task_ctx->lbr_stack_state = LBR_VALID;
+
+ cpuc->last_task_ctx = task_ctx;
+ cpuc->last_log_id = ++task_ctx->log_id;
}
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
@@ -531,7 +566,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
*/
static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
- bool need_info = false;
+ bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
@@ -542,7 +577,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (cpuc->lbr_sel) {
need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
if (cpuc->lbr_sel->config & LBR_CALL_STACK)
- num = tos;
+ call_stack = true;
}
for (i = 0; i < num; i++) {
@@ -555,6 +590,13 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
from = rdlbr_from(lbr_idx);
to = rdlbr_to(lbr_idx);
+ /*
+ * Read LBR call stack entries
+ * until invalid entry (0s) is detected.
+ */
+ if (call_stack && !from)
+ break;
+
if (lbr_format == LBR_FORMAT_INFO && need_info) {
u64 info;
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index c9e1e0bef3c3..e17ab885b1e9 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -28,7 +28,7 @@
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
#define UNCORE_EXTRA_PCI_DEV 0xff
-#define UNCORE_EXTRA_PCI_DEV_MAX 3
+#define UNCORE_EXTRA_PCI_DEV_MAX 4
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 87dc0263a2e1..51d7c117e3c7 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1029,6 +1029,7 @@ void snbep_uncore_cpu_init(void)
enum {
SNBEP_PCI_QPI_PORT0_FILTER,
SNBEP_PCI_QPI_PORT1_FILTER,
+ BDX_PCI_QPI_PORT2_FILTER,
HSWEP_PCI_PCU_3,
};
@@ -3286,15 +3287,18 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
},
{ /* QPI Port 0 filter */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
},
{ /* QPI Port 1 filter */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
},
{ /* QPI Port 2 filter */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
- .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ BDX_PCI_QPI_PORT2_FILTER),
},
{ /* PCU.3 (for Capability registers) */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0),
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 9f3711470ec1..156286335351 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -163,6 +163,7 @@ struct intel_excl_cntrs {
unsigned core_id; /* per-core: core id */
};
+struct x86_perf_task_context;
#define MAX_LBR_ENTRIES 32
enum {
@@ -214,6 +215,8 @@ struct cpu_hw_events {
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;
u64 br_sel;
+ struct x86_perf_task_context *last_task_ctx;
+ int last_log_id;
/*
* Intel host/guest exclude bits
@@ -648,8 +651,10 @@ struct x86_perf_task_context {
u64 lbr_to[MAX_LBR_ENTRIES];
u64 lbr_info[MAX_LBR_ENTRIES];
int tos;
+ int valid_lbrs;
int lbr_callstack_users;
int lbr_stack_state;
+ int log_id;
};
#define x86_add_quirk(func_) \
@@ -668,6 +673,7 @@ do { \
#define PMU_FL_HAS_RSP_1 0x2 /* has 2 equivalent offcore_rsp regs */
#define PMU_FL_EXCL_CNTRS 0x4 /* has exclusive counter requirements */
#define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */
+#define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index f68855499391..402338365651 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -114,6 +114,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
}
+ if (nr_bank < 0)
+ goto ipi_mask_ex_done;
if (!nr_bank)
ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
@@ -158,6 +160,9 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
for_each_cpu(cur_cpu, mask) {
vcpu = hv_cpu_number_to_vp_number(cur_cpu);
+ if (vcpu == VP_INVAL)
+ goto ipi_mask_done;
+
/*
* This particular version of the IPI hypercall can
* only target upto 64 CPUs.
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 4c431e1c1eff..1ff420217298 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -265,7 +265,7 @@ void __init hyperv_init(void)
{
u64 guest_id, required_msrs;
union hv_x64_msr_hypercall_contents hypercall_msr;
- int cpuhp;
+ int cpuhp, i;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
@@ -293,6 +293,9 @@ void __init hyperv_init(void)
if (!hv_vp_index)
return;
+ for (i = 0; i < num_possible_cpus(); i++)
+ hv_vp_index[i] = VP_INVAL;
+
hv_vp_assist_page = kcalloc(num_possible_cpus(),
sizeof(*hv_vp_assist_page), GFP_KERNEL);
if (!hv_vp_assist_page) {
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index c356098b6fb9..4d4015ddcf26 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,8 +7,6 @@
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
#define _ASM_X86_MACH_DEFAULT_APM_H
-#include <asm/nospec-branch.h>
-
#ifdef APM_ZERO_SEGS
# define APM_DO_ZERO_SEGS \
"pushl %%ds\n\t" \
@@ -34,7 +32,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
- firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -47,7 +44,6 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
"=S" (*esi)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
- firmware_restrict_branch_speculation_end();
}
static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -60,7 +56,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
- firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -73,7 +68,6 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
"=S" (si)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
- firmware_restrict_branch_speculation_end();
return error;
}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 219faaec51df..990770f9e76b 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -46,6 +46,65 @@
#define _ASM_SI __ASM_REG(si)
#define _ASM_DI __ASM_REG(di)
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1 _ASM_AX
+#define _ASM_ARG2 _ASM_DX
+#define _ASM_ARG3 _ASM_CX
+
+#define _ASM_ARG1L eax
+#define _ASM_ARG2L edx
+#define _ASM_ARG3L ecx
+
+#define _ASM_ARG1W ax
+#define _ASM_ARG2W dx
+#define _ASM_ARG3W cx
+
+#define _ASM_ARG1B al
+#define _ASM_ARG2B dl
+#define _ASM_ARG3B cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1 _ASM_DI
+#define _ASM_ARG2 _ASM_SI
+#define _ASM_ARG3 _ASM_DX
+#define _ASM_ARG4 _ASM_CX
+#define _ASM_ARG5 r8
+#define _ASM_ARG6 r9
+
+#define _ASM_ARG1Q rdi
+#define _ASM_ARG2Q rsi
+#define _ASM_ARG3Q rdx
+#define _ASM_ARG4Q rcx
+#define _ASM_ARG5Q r8
+#define _ASM_ARG6Q r9
+
+#define _ASM_ARG1L edi
+#define _ASM_ARG2L esi
+#define _ASM_ARG3L edx
+#define _ASM_ARG4L ecx
+#define _ASM_ARG5L r8d
+#define _ASM_ARG6L r9d
+
+#define _ASM_ARG1W di
+#define _ASM_ARG2W si
+#define _ASM_ARG3W dx
+#define _ASM_ARG4W cx
+#define _ASM_ARG5W r8w
+#define _ASM_ARG6W r9w
+
+#define _ASM_ARG1B dil
+#define _ASM_ARG2B sil
+#define _ASM_ARG3B dl
+#define _ASM_ARG4B cl
+#define _ASM_ARG5B r8b
+#define _ASM_ARG6B r9b
+
+#endif
+
/*
* Macros to generate condition code outputs from inline assembly,
* The output operand must be type "bool".
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 0db6bec95489..b143717b92b3 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -80,6 +80,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
* true if the result is zero, or false for all
* other cases.
*/
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
@@ -91,6 +92,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
*
* Atomically increments @v by 1.
*/
+#define arch_atomic_inc arch_atomic_inc
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
@@ -103,6 +105,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
*
* Atomically decrements @v by 1.
*/
+#define arch_atomic_dec arch_atomic_dec
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
@@ -117,6 +120,7 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
@@ -130,6 +134,7 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
@@ -144,6 +149,7 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
+#define arch_atomic_add_negative arch_atomic_add_negative
static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
@@ -173,9 +179,6 @@ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
return arch_atomic_add_return(-i, v);
}
-#define arch_atomic_inc_return(v) (arch_atomic_add_return(1, v))
-#define arch_atomic_dec_return(v) (arch_atomic_sub_return(1, v))
-
static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
return xadd(&v->counter, i);
@@ -199,7 +202,7 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n
static inline int arch_atomic_xchg(atomic_t *v, int new)
{
- return xchg(&v->counter, new);
+ return arch_xchg(&v->counter, new);
}
static inline void arch_atomic_and(int i, atomic_t *v)
@@ -253,27 +256,6 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
return val;
}
-/**
- * __arch_atomic_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns the old value of @v.
- */
-static __always_inline int __arch_atomic_add_unless(atomic_t *v, int a, int u)
-{
- int c = arch_atomic_read(v);
-
- do {
- if (unlikely(c == u))
- break;
- } while (!arch_atomic_try_cmpxchg(v, &c, c + a));
-
- return c;
-}
-
#ifdef CONFIG_X86_32
# include <asm/atomic64_32.h>
#else
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 92212bf0484f..ef959f02d070 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -158,6 +158,7 @@ static inline long long arch_atomic64_inc_return(atomic64_t *v)
"S" (v) : "memory", "ecx");
return a;
}
+#define arch_atomic64_inc_return arch_atomic64_inc_return
static inline long long arch_atomic64_dec_return(atomic64_t *v)
{
@@ -166,6 +167,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v)
"S" (v) : "memory", "ecx");
return a;
}
+#define arch_atomic64_dec_return arch_atomic64_dec_return
/**
* arch_atomic64_add - add integer to atomic64 variable
@@ -198,25 +200,12 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
}
/**
- * arch_atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline int arch_atomic64_sub_and_test(long long i, atomic64_t *v)
-{
- return arch_atomic64_sub_return(i, v) == 0;
-}
-
-/**
* arch_atomic64_inc - increment atomic64 variable
* @v: pointer to type atomic64_t
*
* Atomically increments @v by 1.
*/
+#define arch_atomic64_inc arch_atomic64_inc
static inline void arch_atomic64_inc(atomic64_t *v)
{
__alternative_atomic64(inc, inc_return, /* no output */,
@@ -229,6 +218,7 @@ static inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
+#define arch_atomic64_dec arch_atomic64_dec
static inline void arch_atomic64_dec(atomic64_t *v)
{
__alternative_atomic64(dec, dec_return, /* no output */,
@@ -236,46 +226,6 @@ static inline void arch_atomic64_dec(atomic64_t *v)
}
/**
- * arch_atomic64_dec_and_test - decrement and test
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline int arch_atomic64_dec_and_test(atomic64_t *v)
-{
- return arch_atomic64_dec_return(v) == 0;
-}
-
-/**
- * atomic64_inc_and_test - increment and test
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline int arch_atomic64_inc_and_test(atomic64_t *v)
-{
- return arch_atomic64_inc_return(v) == 0;
-}
-
-/**
- * arch_atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline int arch_atomic64_add_negative(long long i, atomic64_t *v)
-{
- return arch_atomic64_add_return(i, v) < 0;
-}
-
-/**
* arch_atomic64_add_unless - add unless the number is a given value
* @v: pointer of type atomic64_t
* @a: the amount to add to v...
@@ -295,7 +245,7 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
return (int)a;
}
-
+#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
{
int r;
@@ -304,6 +254,7 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
return r;
}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
{
long long r;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6106b59d3260..4343d9b4f30e 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -71,6 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
@@ -82,6 +83,7 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
+#define arch_atomic64_inc arch_atomic64_inc
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
@@ -95,6 +97,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
+#define arch_atomic64_dec arch_atomic64_dec
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
@@ -110,6 +113,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
@@ -123,6 +127,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
@@ -137,6 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
+#define arch_atomic64_add_negative arch_atomic64_add_negative
static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
@@ -169,9 +175,6 @@ static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
return xadd(&v->counter, -i);
}
-#define arch_atomic64_inc_return(v) (arch_atomic64_add_return(1, (v)))
-#define arch_atomic64_dec_return(v) (arch_atomic64_sub_return(1, (v)))
-
static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
{
return arch_cmpxchg(&v->counter, old, new);
@@ -185,46 +188,7 @@ static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, l
static inline long arch_atomic64_xchg(atomic64_t *v, long new)
{
- return xchg(&v->counter, new);
-}
-
-/**
- * arch_atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static inline bool arch_atomic64_add_unless(atomic64_t *v, long a, long u)
-{
- s64 c = arch_atomic64_read(v);
- do {
- if (unlikely(c == u))
- return false;
- } while (!arch_atomic64_try_cmpxchg(v, &c, c + a));
- return true;
-}
-
-#define arch_atomic64_inc_not_zero(v) arch_atomic64_add_unless((v), 1, 0)
-
-/*
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
-{
- s64 dec, c = arch_atomic64_read(v);
- do {
- dec = c - 1;
- if (unlikely(dec < 0))
- break;
- } while (!arch_atomic64_try_cmpxchg(v, &c, dec));
- return dec;
+ return arch_xchg(&v->counter, new);
}
static inline void arch_atomic64_and(long i, atomic64_t *v)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index e3efd8a06066..a55d79b233d3 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -75,7 +75,7 @@ extern void __add_wrong_size(void)
* use "asm volatile" and "memory" clobbers to prevent gcc from moving
* information around.
*/
-#define xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
+#define arch_xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
/*
* Atomic compare and exchange. Compare OLD with MEM, if identical,
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index bfca3b346c74..072e5459fe2f 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -10,13 +10,13 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
#define arch_cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg((ptr), (o), (n)); \
+ arch_cmpxchg((ptr), (o), (n)); \
})
#define arch_cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_local((ptr), (o), (n)); \
+ arch_cmpxchg_local((ptr), (o), (n)); \
})
#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16)
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index f59c39835a5a..a1f0e90d0818 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -49,11 +49,14 @@ static inline int hw_breakpoint_slots(int type)
return HBP_NUM;
}
+struct perf_event_attr;
struct perf_event;
struct pmu;
-extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index 62a9f4966b42..ae26df1c2789 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -8,6 +8,7 @@
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 8
+#define MAX_FIXED_PEBS_EVENTS 3
/*
* A debug store configuration.
@@ -23,7 +24,7 @@ struct debug_store {
u64 pebs_index;
u64 pebs_absolute_maximum;
u64 pebs_interrupt_threshold;
- u64 pebs_event_reset[MAX_PEBS_EVENTS];
+ u64 pebs_event_reset[MAX_PEBS_EVENTS + MAX_FIXED_PEBS_EVENTS];
} __aligned(PAGE_SIZE);
DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 89f08955fff7..c4fc17220df9 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -13,7 +13,7 @@
* Interrupt control:
*/
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
{
unsigned long flags;
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 367d99cff426..c8cec1b39b88 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -78,7 +78,7 @@ struct arch_specific_insn {
* boostable = true: This instruction has been boosted: we have
* added a relative jump after the instruction copy in insn,
* so no single-step and fixup are needed (unless there's
- * a post_handler or break_handler).
+ * a post_handler).
*/
bool boostable;
bool if_modifier;
@@ -111,9 +111,6 @@ struct kprobe_ctlblk {
unsigned long kprobe_status;
unsigned long kprobe_old_flags;
unsigned long kprobe_saved_flags;
- unsigned long *jprobe_saved_sp;
- struct pt_regs jprobe_saved_regs;
- kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
struct prev_kprobe prev_kprobe;
};
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 3cd14311edfa..5a7375ed5f7c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -9,6 +9,8 @@
#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
+#define VP_INVAL U32_MAX
+
struct ms_hyperv_info {
u32 features;
u32 misc_features;
@@ -20,7 +22,6 @@ struct ms_hyperv_info {
extern struct ms_hyperv_info ms_hyperv;
-
/*
* Generate the guest ID.
*/
@@ -281,6 +282,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
*/
for_each_cpu(cpu, cpus) {
vcpu = hv_cpu_number_to_vp_number(cpu);
+ if (vcpu == VP_INVAL)
+ return -1;
vcpu_bank = vcpu / 64;
vcpu_offset = vcpu % 64;
__set_bit(vcpu_offset, (unsigned long *)
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index 9c9dc579bd7d..46f516dd80ce 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -88,6 +88,7 @@ struct orc_entry {
unsigned sp_reg:4;
unsigned bp_reg:4;
unsigned type:2;
+ unsigned end:1;
} __packed;
/*
@@ -101,6 +102,7 @@ struct unwind_hint {
s16 sp_offset;
u8 sp_reg;
u8 type;
+ u8 end;
};
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a06b07399d17..e9202a0de8f0 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -450,9 +450,10 @@ do { \
bool __ret; \
typeof(pcp1) __o1 = (o1), __n1 = (n1); \
typeof(pcp2) __o2 = (o2), __n2 = (n2); \
- asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
- : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \
- : "b" (__n1), "c" (__n2), "a" (__o1)); \
+ asm volatile("cmpxchg8b "__percpu_arg(1) \
+ CC_SET(z) \
+ : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \
+ : "b" (__n1), "c" (__n2)); \
__ret; \
})
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 9ef5ee03d2d7..159622ee0674 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -43,7 +43,7 @@ asm (".pushsection .text;"
"push %rdx;"
"mov $0x1,%eax;"
"xor %edx,%edx;"
- "lock cmpxchg %dl,(%rdi);"
+ LOCK_PREFIX "cmpxchg %dl,(%rdi);"
"cmp $0x1,%al;"
"jne .slowpath;"
"pop %rdx;"
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4cf11d88d3b3..19b90521954c 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -5,6 +5,7 @@
* PaX/grsecurity.
*/
#include <linux/refcount.h>
+#include <asm/bug.h>
/*
* This is the first portion of the refcount error handling, which lives in
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 62acb613114b..a9d637bc301d 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -52,7 +52,12 @@ copy_to_user_mcsafe(void *to, const void *from, unsigned len)
unsigned long ret;
__uaccess_begin();
- ret = memcpy_mcsafe(to, from, len);
+ /*
+ * Note, __memcpy_mcsafe() is explicitly used since it can
+ * handle exceptions / faults. memcpy_mcsafe() may fall back to
+ * memcpy() which lacks this handling.
+ */
+ ret = __memcpy_mcsafe(to, from, len);
__uaccess_end();
return ret;
}
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index bae46fc6b9de..0bcdb1279361 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -26,7 +26,7 @@
* the debuginfo as necessary. It will also warn if it sees any
* inconsistencies.
*/
-.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL
+.macro UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=0 type=ORC_TYPE_CALL end=0
#ifdef CONFIG_STACK_VALIDATION
.Lunwind_hint_ip_\@:
.pushsection .discard.unwind_hints
@@ -35,12 +35,14 @@
.short \sp_offset
.byte \sp_reg
.byte \type
+ .byte \end
+ .balign 4
.popsection
#endif
.endm
.macro UNWIND_HINT_EMPTY
- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED
+ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED end=1
.endm
.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 iret=0
@@ -86,19 +88,21 @@
#else /* !__ASSEMBLY__ */
-#define UNWIND_HINT(sp_reg, sp_offset, type) \
+#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
"987: \n\t" \
".pushsection .discard.unwind_hints\n\t" \
/* struct unwind_hint */ \
".long 987b - .\n\t" \
- ".short " __stringify(sp_offset) "\n\t" \
+ ".short " __stringify(sp_offset) "\n\t" \
".byte " __stringify(sp_reg) "\n\t" \
".byte " __stringify(type) "\n\t" \
+ ".byte " __stringify(end) "\n\t" \
+ ".balign 4 \n\t" \
".popsection\n\t"
-#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE)
+#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0)
-#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE)
+#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0)
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 02d6f5cf4e70..8824d01c0c35 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -61,6 +61,7 @@ obj-y += alternative.o i8253.o hw_breakpoint.o
obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
+obj-y += irqflags.o
obj-y += process.o
obj-y += fpu/
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2aabd4cb0e3f..07fa222f0c52 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -573,6 +573,9 @@ static u32 skx_deadline_rev(void)
case 0x04: return 0x02000014;
}
+ if (boot_cpu_data.x86_stepping > 4)
+ return 0;
+
return ~0U;
}
@@ -937,7 +940,7 @@ static int __init calibrate_APIC_clock(void)
if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
pr_warning("APIC timer disabled due to verification failure\n");
- return -1;
+ return -1;
}
return 0;
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 35aaee4fc028..0954315842c0 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -218,7 +218,8 @@ static int reserve_irq_vector(struct irq_data *irqd)
return 0;
}
-static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
+static int
+assign_vector_locked(struct irq_data *irqd, const struct cpumask *dest)
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
bool resvd = apicd->has_reserved;
@@ -245,22 +246,12 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
return -EBUSY;
vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
- if (vector > 0)
- apic_update_vector(irqd, vector, cpu);
trace_vector_alloc(irqd->irq, vector, resvd, vector);
- return vector;
-}
-
-static int assign_vector_locked(struct irq_data *irqd,
- const struct cpumask *dest)
-{
- struct apic_chip_data *apicd = apic_chip_data(irqd);
- int vector = allocate_vector(irqd, dest);
-
if (vector < 0)
return vector;
+ apic_update_vector(irqd, vector, cpu);
+ apic_update_irq_cfg(irqd, vector, cpu);
- apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
return 0;
}
@@ -433,7 +424,7 @@ static int activate_managed(struct irq_data *irqd)
pr_err("Managed startup irq %u, no vector available\n",
irqd->irq);
}
- return ret;
+ return ret;
}
static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5d0de79fdab0..ec00d1ff5098 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -240,6 +240,7 @@
#include <asm/olpc.h>
#include <asm/paravirt.h>
#include <asm/reboot.h>
+#include <asm/nospec-branch.h>
#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
extern int (*console_blank_hook)(int);
@@ -614,11 +615,13 @@ static long __apm_bios_call(void *_call)
gdt[0x40 / 8] = bad_bios_desc;
apm_irq_save(flags);
+ firmware_restrict_branch_speculation_start();
APM_DO_SAVE_SEGS;
apm_bios_call_asm(call->func, call->ebx, call->ecx,
&call->eax, &call->ebx, &call->ecx, &call->edx,
&call->esi);
APM_DO_RESTORE_SEGS;
+ firmware_restrict_branch_speculation_end();
apm_irq_restore(flags);
gdt[0x40 / 8] = save_desc_40;
put_cpu();
@@ -690,10 +693,12 @@ static long __apm_bios_call_simple(void *_call)
gdt[0x40 / 8] = bad_bios_desc;
apm_irq_save(flags);
+ firmware_restrict_branch_speculation_start();
APM_DO_SAVE_SEGS;
error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx,
&call->eax);
APM_DO_RESTORE_SEGS;
+ firmware_restrict_branch_speculation_end();
apm_irq_restore(flags);
gdt[0x40 / 8] = save_desc_40;
put_cpu();
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 082d7875cef8..38915fbfae73 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -543,7 +543,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
nodes_per_socket = ((value >> 3) & 7) + 1;
}
- if (c->x86 >= 0x15 && c->x86 <= 0x17) {
+ if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
+ !boot_cpu_has(X86_FEATURE_VIRT_SSBD) &&
+ c->x86 >= 0x15 && c->x86 <= 0x17) {
unsigned int bit;
switch (c->x86) {
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 404df26b7de8..5c0ea39311fe 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -155,7 +155,8 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
/* SSBD controlled in MSR_SPEC_CTRL */
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ static_cpu_has(X86_FEATURE_AMD_SSBD))
hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
if (hostval != guestval) {
@@ -533,9 +534,10 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
* Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
* use a completely different MSR and bit dependent on family.
*/
- if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
+ !static_cpu_has(X86_FEATURE_AMD_SSBD)) {
x86_amd_ssb_disable();
- else {
+ } else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c102ad51025e..4b767284b7f5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -123,8 +123,8 @@ void mce_setup(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
m->cpu = m->extcpu = smp_processor_id();
- /* We hope get_seconds stays lockless */
- m->time = get_seconds();
+ /* need the internal __ version to avoid deadlocks */
+ m->time = __ktime_get_real_seconds();
m->cpuvendor = boot_cpu_data.x86_vendor;
m->cpuid = cpuid_eax(1);
m->socketid = cpu_data(m->extcpu).phys_proc_id;
@@ -1104,6 +1104,101 @@ static void mce_unmap_kpfn(unsigned long pfn)
}
#endif
+
+/*
+ * Cases where we avoid rendezvous handler timeout:
+ * 1) If this CPU is offline.
+ *
+ * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
+ * skip those CPUs which remain looping in the 1st kernel - see
+ * crash_nmi_callback().
+ *
+ * Note: there still is a small window between kexec-ing and the new,
+ * kdump kernel establishing a new #MC handler where a broadcasted MCE
+ * might not get handled properly.
+ */
+static bool __mc_check_crashing_cpu(int cpu)
+{
+ if (cpu_is_offline(cpu) ||
+ (crashing_cpu != -1 && crashing_cpu != cpu)) {
+ u64 mcgstatus;
+
+ mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ if (mcgstatus & MCG_STATUS_RIPV) {
+ mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ return true;
+ }
+ }
+ return false;
+}
+
+static void __mc_scan_banks(struct mce *m, struct mce *final,
+ unsigned long *toclear, unsigned long *valid_banks,
+ int no_way_out, int *worst)
+{
+ struct mca_config *cfg = &mca_cfg;
+ int severity, i;
+
+ for (i = 0; i < cfg->banks; i++) {
+ __clear_bit(i, toclear);
+ if (!test_bit(i, valid_banks))
+ continue;
+
+ if (!mce_banks[i].ctl)
+ continue;
+
+ m->misc = 0;
+ m->addr = 0;
+ m->bank = i;
+
+ m->status = mce_rdmsrl(msr_ops.status(i));
+ if (!(m->status & MCI_STATUS_VAL))
+ continue;
+
+ /*
+ * Corrected or non-signaled errors are handled by
+ * machine_check_poll(). Leave them alone, unless this panics.
+ */
+ if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
+ !no_way_out)
+ continue;
+
+ /* Set taint even when machine check was not enabled. */
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+
+ severity = mce_severity(m, cfg->tolerant, NULL, true);
+
+ /*
+ * When machine check was for corrected/deferred handler don't
+ * touch, unless we're panicking.
+ */
+ if ((severity == MCE_KEEP_SEVERITY ||
+ severity == MCE_UCNA_SEVERITY) && !no_way_out)
+ continue;
+
+ __set_bit(i, toclear);
+
+ /* Machine check event was not enabled. Clear, but ignore. */
+ if (severity == MCE_NO_SEVERITY)
+ continue;
+
+ mce_read_aux(m, i);
+
+ /* assuming valid severity level != 0 */
+ m->severity = severity;
+
+ mce_log(m);
+
+ if (severity > *worst) {
+ *final = *m;
+ *worst = severity;
+ }
+ }
+
+ /* mce_clear_state will clear *final, save locally for use later */
+ *m = *final;
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -1118,68 +1213,45 @@ static void mce_unmap_kpfn(unsigned long pfn)
*/
void do_machine_check(struct pt_regs *regs, long error_code)
{
+ DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
+ DECLARE_BITMAP(toclear, MAX_NR_BANKS);
struct mca_config *cfg = &mca_cfg;
+ int cpu = smp_processor_id();
+ char *msg = "Unknown";
struct mce m, *final;
- int i;
int worst = 0;
- int severity;
/*
* Establish sequential order between the CPUs entering the machine
* check handler.
*/
int order = -1;
+
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
*/
int no_way_out = 0;
+
/*
* If kill_it gets set, there might be a way to recover from this
* error.
*/
int kill_it = 0;
- DECLARE_BITMAP(toclear, MAX_NR_BANKS);
- DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
- char *msg = "Unknown";
/*
* MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
* on Intel.
*/
int lmce = 1;
- int cpu = smp_processor_id();
-
- /*
- * Cases where we avoid rendezvous handler timeout:
- * 1) If this CPU is offline.
- *
- * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to
- * skip those CPUs which remain looping in the 1st kernel - see
- * crash_nmi_callback().
- *
- * Note: there still is a small window between kexec-ing and the new,
- * kdump kernel establishing a new #MC handler where a broadcasted MCE
- * might not get handled properly.
- */
- if (cpu_is_offline(cpu) ||
- (crashing_cpu != -1 && crashing_cpu != cpu)) {
- u64 mcgstatus;
- mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
- if (mcgstatus & MCG_STATUS_RIPV) {
- mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
- return;
- }
- }
+ if (__mc_check_crashing_cpu(cpu))
+ return;
ist_enter(regs);
this_cpu_inc(mce_exception_count);
- if (!cfg->banks)
- goto out;
-
mce_gather_info(&m, regs);
m.tsc = rdtsc();
@@ -1220,67 +1292,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
order = mce_start(&no_way_out);
}
- for (i = 0; i < cfg->banks; i++) {
- __clear_bit(i, toclear);
- if (!test_bit(i, valid_banks))
- continue;
- if (!mce_banks[i].ctl)
- continue;
-
- m.misc = 0;
- m.addr = 0;
- m.bank = i;
-
- m.status = mce_rdmsrl(msr_ops.status(i));
- if ((m.status & MCI_STATUS_VAL) == 0)
- continue;
-
- /*
- * Non uncorrected or non signaled errors are handled by
- * machine_check_poll. Leave them alone, unless this panics.
- */
- if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
- !no_way_out)
- continue;
-
- /*
- * Set taint even when machine check was not enabled.
- */
- add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
-
- severity = mce_severity(&m, cfg->tolerant, NULL, true);
-
- /*
- * When machine check was for corrected/deferred handler don't
- * touch, unless we're panicing.
- */
- if ((severity == MCE_KEEP_SEVERITY ||
- severity == MCE_UCNA_SEVERITY) && !no_way_out)
- continue;
- __set_bit(i, toclear);
- if (severity == MCE_NO_SEVERITY) {
- /*
- * Machine check event was not enabled. Clear, but
- * ignore.
- */
- continue;
- }
-
- mce_read_aux(&m, i);
-
- /* assuming valid severity level != 0 */
- m.severity = severity;
-
- mce_log(&m);
-
- if (severity > worst) {
- *final = m;
- worst = severity;
- }
- }
-
- /* mce_clear_state will clear *final, save locally for use later */
- m = *final;
+ __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1319,7 +1331,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (worst > 0)
mce_report_event(regs);
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
-out:
+
sync_core();
if (worst != MCE_AR_SEVERITY && !kill_it)
@@ -2165,9 +2177,6 @@ static ssize_t store_int_with_restart(struct device *s,
if (check_interval == old_check_interval)
return ret;
- if (check_interval < 1)
- check_interval = 1;
-
mutex_lock(&mce_sysfs_mutex);
mce_restart();
mutex_unlock(&mce_sysfs_mutex);
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 4021d3859499..40eee6cc4124 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -106,7 +106,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
memset(line, 0, LINE_SIZE);
- length = strncpy_from_user(line, buf, LINE_SIZE - 1);
+ len = min_t(size_t, len, LINE_SIZE - 1);
+ length = strncpy_from_user(line, buf, len);
if (length < 0)
return length;
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 8344dd2f310a..15ebc2fc166e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -235,7 +235,7 @@ ENTRY(secondary_startup_64)
* address given in m16:64.
*/
pushq $.Lafter_lret # put return address on stack for unwinder
- xorq %rbp, %rbp # clear frame pointer
+ xorl %ebp, %ebp # clear frame pointer
movq initial_code(%rip), %rax
pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 8771766d46b6..34a5c1715148 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -169,28 +169,29 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
set_dr_addr_mask(0, i);
}
-/*
- * Check for virtual address in kernel space.
- */
-int arch_check_bp_in_kernelspace(struct perf_event *bp)
+static int arch_bp_generic_len(int x86_len)
{
- unsigned int len;
- unsigned long va;
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
- va = info->address;
- len = bp->attr.bp_len;
-
- /*
- * We don't need to worry about va + len - 1 overflowing:
- * we already require that va is aligned to a multiple of len.
- */
- return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
+ switch (x86_len) {
+ case X86_BREAKPOINT_LEN_1:
+ return HW_BREAKPOINT_LEN_1;
+ case X86_BREAKPOINT_LEN_2:
+ return HW_BREAKPOINT_LEN_2;
+ case X86_BREAKPOINT_LEN_4:
+ return HW_BREAKPOINT_LEN_4;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ return HW_BREAKPOINT_LEN_8;
+#endif
+ default:
+ return -EINVAL;
+ }
}
int arch_bp_generic_fields(int x86_len, int x86_type,
int *gen_len, int *gen_type)
{
+ int len;
+
/* Type */
switch (x86_type) {
case X86_BREAKPOINT_EXECUTE:
@@ -211,42 +212,47 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
}
/* Len */
- switch (x86_len) {
- case X86_BREAKPOINT_LEN_1:
- *gen_len = HW_BREAKPOINT_LEN_1;
- break;
- case X86_BREAKPOINT_LEN_2:
- *gen_len = HW_BREAKPOINT_LEN_2;
- break;
- case X86_BREAKPOINT_LEN_4:
- *gen_len = HW_BREAKPOINT_LEN_4;
- break;
-#ifdef CONFIG_X86_64
- case X86_BREAKPOINT_LEN_8:
- *gen_len = HW_BREAKPOINT_LEN_8;
- break;
-#endif
- default:
+ len = arch_bp_generic_len(x86_len);
+ if (len < 0)
return -EINVAL;
- }
+ *gen_len = len;
return 0;
}
-
-static int arch_build_bp_info(struct perf_event *bp)
+/*
+ * Check for virtual address in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
{
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned long va;
+ int len;
- info->address = bp->attr.bp_addr;
+ va = hw->address;
+ len = arch_bp_generic_len(hw->len);
+ WARN_ON_ONCE(len < 0);
+
+ /*
+ * We don't need to worry about va + len - 1 overflowing:
+ * we already require that va is aligned to a multiple of len.
+ */
+ return (va >= TASK_SIZE_MAX) || ((va + len - 1) >= TASK_SIZE_MAX);
+}
+
+static int arch_build_bp_info(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
+{
+ hw->address = attr->bp_addr;
+ hw->mask = 0;
/* Type */
- switch (bp->attr.bp_type) {
+ switch (attr->bp_type) {
case HW_BREAKPOINT_W:
- info->type = X86_BREAKPOINT_WRITE;
+ hw->type = X86_BREAKPOINT_WRITE;
break;
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
- info->type = X86_BREAKPOINT_RW;
+ hw->type = X86_BREAKPOINT_RW;
break;
case HW_BREAKPOINT_X:
/*
@@ -254,23 +260,23 @@ static int arch_build_bp_info(struct perf_event *bp)
* acceptable for kprobes. On non-kprobes kernels, we don't
* allow kernel breakpoints at all.
*/
- if (bp->attr.bp_addr >= TASK_SIZE_MAX) {
+ if (attr->bp_addr >= TASK_SIZE_MAX) {
#ifdef CONFIG_KPROBES
- if (within_kprobe_blacklist(bp->attr.bp_addr))
+ if (within_kprobe_blacklist(attr->bp_addr))
return -EINVAL;
#else
return -EINVAL;
#endif
}
- info->type = X86_BREAKPOINT_EXECUTE;
+ hw->type = X86_BREAKPOINT_EXECUTE;
/*
* x86 inst breakpoints need to have a specific undefined len.
* But we still need to check userspace is not trying to setup
* an unsupported length, to get a range breakpoint for example.
*/
- if (bp->attr.bp_len == sizeof(long)) {
- info->len = X86_BREAKPOINT_LEN_X;
+ if (attr->bp_len == sizeof(long)) {
+ hw->len = X86_BREAKPOINT_LEN_X;
return 0;
}
default:
@@ -278,28 +284,26 @@ static int arch_build_bp_info(struct perf_event *bp)
}
/* Len */
- info->mask = 0;
-
- switch (bp->attr.bp_len) {
+ switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
- info->len = X86_BREAKPOINT_LEN_1;
+ hw->len = X86_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
- info->len = X86_BREAKPOINT_LEN_2;
+ hw->len = X86_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
- info->len = X86_BREAKPOINT_LEN_4;
+ hw->len = X86_BREAKPOINT_LEN_4;
break;
#ifdef CONFIG_X86_64
case HW_BREAKPOINT_LEN_8:
- info->len = X86_BREAKPOINT_LEN_8;
+ hw->len = X86_BREAKPOINT_LEN_8;
break;
#endif
default:
/* AMD range breakpoint */
- if (!is_power_of_2(bp->attr.bp_len))
+ if (!is_power_of_2(attr->bp_len))
return -EINVAL;
- if (bp->attr.bp_addr & (bp->attr.bp_len - 1))
+ if (attr->bp_addr & (attr->bp_len - 1))
return -EINVAL;
if (!boot_cpu_has(X86_FEATURE_BPEXT))
@@ -312,8 +316,8 @@ static int arch_build_bp_info(struct perf_event *bp)
* breakpoints, then we'll have to check for kprobe-blacklisted
* addresses anywhere in the range.
*/
- info->mask = bp->attr.bp_len - 1;
- info->len = X86_BREAKPOINT_LEN_1;
+ hw->mask = attr->bp_len - 1;
+ hw->len = X86_BREAKPOINT_LEN_1;
}
return 0;
@@ -322,22 +326,23 @@ static int arch_build_bp_info(struct perf_event *bp)
/*
* Validate the arch-specific HW Breakpoint register settings
*/
-int arch_validate_hwbkpt_settings(struct perf_event *bp)
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
{
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
unsigned int align;
int ret;
- ret = arch_build_bp_info(bp);
+ ret = arch_build_bp_info(bp, attr, hw);
if (ret)
return ret;
- switch (info->len) {
+ switch (hw->len) {
case X86_BREAKPOINT_LEN_1:
align = 0;
- if (info->mask)
- align = info->mask;
+ if (hw->mask)
+ align = hw->mask;
break;
case X86_BREAKPOINT_LEN_2:
align = 1;
@@ -358,7 +363,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* Check that the low-order bits of the address are appropriate
* for the alignment implied by len.
*/
- if (info->address & align)
+ if (hw->address & align)
return -EINVAL;
return 0;
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
new file mode 100644
index 000000000000..ddeeaac8adda
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/asm.h>
+#include <asm/export.h>
+#include <linux/linkage.h>
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+ pushf
+ pop %_ASM_AX
+ ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+ push %_ASM_ARG1
+ popf
+ ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index ae38dccf0c8f..2b949f4fd4d8 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -105,14 +105,4 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
}
#endif
-#ifdef CONFIG_KPROBES_ON_FTRACE
-extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb);
-#else
-static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- return 0;
-}
-#endif
#endif
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 6f4d42377fe5..b0d1e81c96bb 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -66,8 +66,6 @@
#include "common.h"
-void jprobe_return_end(void);
-
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -395,8 +393,6 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
- (u8 *) real;
if ((s64) (s32) newdisp != newdisp) {
pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
- pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
- src, real, insn->displacement.value);
return 0;
}
disp = (u8 *) dest + insn_offset_displacement(insn);
@@ -596,7 +592,6 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
* stepping.
*/
regs->ip = (unsigned long)p->ainsn.insn;
- preempt_enable_no_resched();
return;
}
#endif
@@ -640,8 +635,7 @@ static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
* Raise a BUG or we'll continue in an endless reentering loop
* and eventually a stack overflow.
*/
- printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n",
- p->addr);
+ pr_err("Unrecoverable kprobe detected.\n");
dump_kprobe(p);
BUG();
default:
@@ -669,12 +663,10 @@ int kprobe_int3_handler(struct pt_regs *regs)
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
/*
- * We don't want to be preempted for the entire
- * duration of kprobe processing. We conditionally
- * re-enable preemption at the end of this function,
- * and also in reenter_kprobe() and setup_singlestep().
+ * We don't want to be preempted for the entire duration of kprobe
+ * processing. Since int3 and debug trap disables irqs and we clear
+ * IF while singlestepping, it must be no preemptible.
*/
- preempt_disable();
kcb = get_kprobe_ctlblk();
p = get_kprobe(addr);
@@ -690,13 +682,14 @@ int kprobe_int3_handler(struct pt_regs *regs)
/*
* If we have no pre-handler or it returned 0, we
* continue with normal processing. If we have a
- * pre-handler and it returned non-zero, it prepped
- * for calling the break_handler below on re-entry
- * for jprobe processing, so get out doing nothing
- * more here.
+ * pre-handler and it returned non-zero, that means
+ * user handler setup registers to exit to another
+ * instruction, we must skip the single stepping.
*/
if (!p->pre_handler || !p->pre_handler(p, regs))
setup_singlestep(p, regs, kcb, 0);
+ else
+ reset_current_kprobe();
return 1;
}
} else if (*addr != BREAKPOINT_INSTRUCTION) {
@@ -710,18 +703,9 @@ int kprobe_int3_handler(struct pt_regs *regs)
* the original instruction.
*/
regs->ip = (unsigned long)addr;
- preempt_enable_no_resched();
return 1;
- } else if (kprobe_running()) {
- p = __this_cpu_read(current_kprobe);
- if (p->break_handler && p->break_handler(p, regs)) {
- if (!skip_singlestep(p, regs, kcb))
- setup_singlestep(p, regs, kcb, 0);
- return 1;
- }
} /* else: not a kprobe fault; let the kernel handle it */
- preempt_enable_no_resched();
return 0;
}
NOKPROBE_SYMBOL(kprobe_int3_handler);
@@ -972,8 +956,6 @@ int kprobe_debug_handler(struct pt_regs *regs)
}
reset_current_kprobe();
out:
- preempt_enable_no_resched();
-
/*
* if somebody else is singlestepping across a probe point, flags
* will have TF set, in which case, continue the remaining processing
@@ -1020,7 +1002,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
restore_previous_kprobe(kcb);
else
reset_current_kprobe();
- preempt_enable_no_resched();
} else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE ||
kcb->kprobe_status == KPROBE_HIT_SSDONE) {
/*
@@ -1083,93 +1064,6 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
}
NOKPROBE_SYMBOL(kprobe_exceptions_notify);
-int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- unsigned long addr;
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- kcb->jprobe_saved_regs = *regs;
- kcb->jprobe_saved_sp = stack_addr(regs);
- addr = (unsigned long)(kcb->jprobe_saved_sp);
-
- /*
- * As Linus pointed out, gcc assumes that the callee
- * owns the argument space and could overwrite it, e.g.
- * tailcall optimization. So, to be absolutely safe
- * we also save and restore enough stack bytes to cover
- * the argument area.
- * Use __memcpy() to avoid KASAN stack out-of-bounds reports as we copy
- * raw stack chunk with redzones:
- */
- __memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
- regs->ip = (unsigned long)(jp->entry);
-
- /*
- * jprobes use jprobe_return() which skips the normal return
- * path of the function, and this messes up the accounting of the
- * function graph tracer to get messed up.
- *
- * Pause function graph tracing while performing the jprobe function.
- */
- pause_graph_tracing();
- return 1;
-}
-NOKPROBE_SYMBOL(setjmp_pre_handler);
-
-void jprobe_return(void)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- /* Unpoison stack redzones in the frames we are going to jump over. */
- kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp);
-
- asm volatile (
-#ifdef CONFIG_X86_64
- " xchg %%rbx,%%rsp \n"
-#else
- " xchgl %%ebx,%%esp \n"
-#endif
- " int3 \n"
- " .globl jprobe_return_end\n"
- " jprobe_return_end: \n"
- " nop \n"::"b"
- (kcb->jprobe_saved_sp):"memory");
-}
-NOKPROBE_SYMBOL(jprobe_return);
-NOKPROBE_SYMBOL(jprobe_return_end);
-
-int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- u8 *addr = (u8 *) (regs->ip - 1);
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- void *saved_sp = kcb->jprobe_saved_sp;
-
- if ((addr > (u8 *) jprobe_return) &&
- (addr < (u8 *) jprobe_return_end)) {
- if (stack_addr(regs) != saved_sp) {
- struct pt_regs *saved_regs = &kcb->jprobe_saved_regs;
- printk(KERN_ERR
- "current sp %p does not match saved sp %p\n",
- stack_addr(regs), saved_sp);
- printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
- show_regs(saved_regs);
- printk(KERN_ERR "Current registers\n");
- show_regs(regs);
- BUG();
- }
- /* It's OK to start function graph tracing again */
- unpause_graph_tracing();
- *regs = kcb->jprobe_saved_regs;
- __memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-NOKPROBE_SYMBOL(longjmp_break_handler);
-
bool arch_within_kprobe_blacklist(unsigned long addr)
{
bool is_in_entry_trampoline_section = false;
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 8dc0161cec8f..ef819e19650b 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -25,36 +25,6 @@
#include "common.h"
-static nokprobe_inline
-void __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb, unsigned long orig_ip)
-{
- /*
- * Emulate singlestep (and also recover regs->ip)
- * as if there is a 5byte nop
- */
- regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
- if (unlikely(p->post_handler)) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- p->post_handler(p, regs, 0);
- }
- __this_cpu_write(current_kprobe, NULL);
- if (orig_ip)
- regs->ip = orig_ip;
-}
-
-int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- if (kprobe_ftrace(p)) {
- __skip_singlestep(p, regs, kcb, 0);
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-NOKPROBE_SYMBOL(skip_singlestep);
-
/* Ftrace callback handler for kprobes -- called under preepmt disabed */
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
@@ -75,18 +45,25 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
- /* To emulate trap based kprobes, preempt_disable here */
- preempt_disable();
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs)) {
- __skip_singlestep(p, regs, kcb, orig_ip);
- preempt_enable_no_resched();
+ /*
+ * Emulate singlestep (and also recover regs->ip)
+ * as if there is a 5byte nop
+ */
+ regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ regs->ip = orig_ip;
}
/*
- * If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe, and keep preempt count +1.
+ * If pre_handler returns !0, it changes regs->ip. We have to
+ * skip emulating post_handler.
*/
+ __this_cpu_write(current_kprobe, NULL);
}
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 203d398802a3..eaf02f2e7300 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -491,7 +491,6 @@ int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
if (!reenter)
reset_current_kprobe();
- preempt_enable_no_resched();
return 1;
}
return 0;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5b2300b818af..a37bda38d205 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -154,7 +154,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
for (;;) {
if (!n.halted)
- prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
if (hlist_unhashed(&n.link))
break;
@@ -188,7 +188,7 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
if (n->halted)
smp_send_reschedule(n->cpu);
else if (swq_has_sleeper(&n->wq))
- swake_up(&n->wq);
+ swake_up_one(&n->wq);
}
static void apf_task_wake_all(void)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index bf8d1eb7fca3..3b8e7c13c614 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -138,6 +138,7 @@ static unsigned long kvm_get_tsc_khz(void)
src = &hv_clock[cpu].pvti;
tsc_khz = pvclock_tsc_khz(src);
put_cpu();
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
return tsc_khz;
}
@@ -319,6 +320,8 @@ void __init kvmclock_init(void)
printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
msr_kvm_system_time, msr_kvm_wall_clock);
+ pvclock_set_pvti_cpu0_va(hv_clock);
+
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
@@ -366,14 +369,11 @@ int __init kvm_setup_vsyscall_timeinfo(void)
vcpu_time = &hv_clock[cpu].pvti;
flags = pvclock_read_flags(vcpu_time);
- if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
- put_cpu();
- return 1;
- }
-
- pvclock_set_pvti_cpu0_va(hv_clock);
put_cpu();
+ if (!(flags & PVCLOCK_TSC_STABLE_BIT))
+ return 1;
+
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
#endif
return 0;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 9edadabf04f6..9cb98f7b07c9 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -20,7 +20,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax");
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax");
+DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
#endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 4dfd90a75e63..2e9006c1e240 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -60,7 +60,7 @@ void __init check_iommu_entries(struct iommu_table_entry *start,
printk(KERN_ERR "CYCLIC DEPENDENCY FOUND! %pS depends on %pS and vice-versa. BREAKING IT.\n",
p->detect, q->detect);
/* Heavy handed way..*/
- x->depend = 0;
+ x->depend = NULL;
}
}
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c
index da5190a1ea16..4a710ffffd9a 100644
--- a/arch/x86/kernel/pcspeaker.c
+++ b/arch/x86/kernel/pcspeaker.c
@@ -9,6 +9,6 @@ static __init int add_pcspkr(void)
pd = platform_device_register_simple("pcspkr", -1, NULL, 0);
- return IS_ERR(pd) ? PTR_ERR(pd) : 0;
+ return PTR_ERR_OR_ZERO(pd);
}
device_initcall(add_pcspkr);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c2f7d1d2a5c3..db9656e13ea0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -221,6 +221,11 @@ static void notrace start_secondary(void *unused)
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
load_cr3(swapper_pg_dir);
+ /*
+ * Initialize the CR4 shadow before doing anything that could
+ * try to read it.
+ */
+ cr4_init_shadow();
__flush_tlb_all();
#endif
load_current_idt();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 093f2ea5dd56..7627455047c2 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -81,16 +81,6 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
-#define STACKTRACE_DUMP_ONCE(task) ({ \
- static bool __section(.data.unlikely) __dumped; \
- \
- if (!__dumped) { \
- __dumped = true; \
- WARN_ON(1); \
- show_stack(task, NULL); \
- } \
-})
-
static int __always_inline
__save_stack_trace_reliable(struct stack_trace *trace,
struct task_struct *task)
@@ -99,30 +89,25 @@ __save_stack_trace_reliable(struct stack_trace *trace,
struct pt_regs *regs;
unsigned long addr;
- for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
+ for (unwind_start(&state, task, NULL, NULL);
+ !unwind_done(&state) && !unwind_error(&state);
unwind_next_frame(&state)) {
regs = unwind_get_entry_regs(&state, NULL);
if (regs) {
+ /* Success path for user tasks */
+ if (user_mode(regs))
+ goto success;
+
/*
* Kernel mode registers on the stack indicate an
* in-kernel interrupt or exception (e.g., preemption
* or a page fault), which can make frame pointers
* unreliable.
*/
- if (!user_mode(regs))
- return -EINVAL;
- /*
- * The last frame contains the user mode syscall
- * pt_regs. Skip it and finish the unwind.
- */
- unwind_next_frame(&state);
- if (!unwind_done(&state)) {
- STACKTRACE_DUMP_ONCE(task);
+ if (IS_ENABLED(CONFIG_FRAME_POINTER))
return -EINVAL;
- }
- break;
}
addr = unwind_get_return_address(&state);
@@ -132,21 +117,22 @@ __save_stack_trace_reliable(struct stack_trace *trace,
* generated code which __kernel_text_address() doesn't know
* about.
*/
- if (!addr) {
- STACKTRACE_DUMP_ONCE(task);
+ if (!addr)
return -EINVAL;
- }
if (save_stack_address(trace, addr, false))
return -EINVAL;
}
/* Check for stack corruption */
- if (unwind_error(&state)) {
- STACKTRACE_DUMP_ONCE(task);
+ if (unwind_error(&state))
+ return -EINVAL;
+
+ /* Success path for non-user tasks, i.e. kthreads and idle tasks */
+ if (!(task->flags & (PF_KTHREAD | PF_IDLE)))
return -EINVAL;
- }
+success:
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index feb28fee6cea..26038eacf74a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -198,7 +198,7 @@ static int orc_sort_cmp(const void *_a, const void *_b)
* whitelisted .o files which didn't get objtool generation.
*/
orc_a = cur_orc_table + (a - cur_orc_ip_table);
- return orc_a->sp_reg == ORC_REG_UNDEFINED ? -1 : 1;
+ return orc_a->sp_reg == ORC_REG_UNDEFINED && !orc_a->end ? -1 : 1;
}
#ifdef CONFIG_MODULES
@@ -352,7 +352,7 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
bool unwind_next_frame(struct unwind_state *state)
{
- unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
+ unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp;
enum stack_type prev_type = state->stack_info.type;
struct orc_entry *orc;
bool indirect = false;
@@ -363,9 +363,9 @@ bool unwind_next_frame(struct unwind_state *state)
/* Don't let modules unload while we're reading their ORC data. */
preempt_disable();
- /* Have we reached the end? */
+ /* End-of-stack check for user tasks: */
if (state->regs && user_mode(state->regs))
- goto done;
+ goto the_end;
/*
* Find the orc_entry associated with the text address.
@@ -374,9 +374,16 @@ bool unwind_next_frame(struct unwind_state *state)
* calls and calls to noreturn functions.
*/
orc = orc_find(state->signal ? state->ip : state->ip - 1);
- if (!orc || orc->sp_reg == ORC_REG_UNDEFINED)
- goto done;
- orig_ip = state->ip;
+ if (!orc)
+ goto err;
+
+ /* End-of-stack check for kernel threads: */
+ if (orc->sp_reg == ORC_REG_UNDEFINED) {
+ if (!orc->end)
+ goto err;
+
+ goto the_end;
+ }
/* Find the previous frame's stack: */
switch (orc->sp_reg) {
@@ -402,7 +409,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!state->regs || !state->full_regs) {
orc_warn("missing regs for base reg R10 at ip %pB\n",
(void *)state->ip);
- goto done;
+ goto err;
}
sp = state->regs->r10;
break;
@@ -411,7 +418,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!state->regs || !state->full_regs) {
orc_warn("missing regs for base reg R13 at ip %pB\n",
(void *)state->ip);
- goto done;
+ goto err;
}
sp = state->regs->r13;
break;
@@ -420,7 +427,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!state->regs || !state->full_regs) {
orc_warn("missing regs for base reg DI at ip %pB\n",
(void *)state->ip);
- goto done;
+ goto err;
}
sp = state->regs->di;
break;
@@ -429,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!state->regs || !state->full_regs) {
orc_warn("missing regs for base reg DX at ip %pB\n",
(void *)state->ip);
- goto done;
+ goto err;
}
sp = state->regs->dx;
break;
@@ -437,12 +444,12 @@ bool unwind_next_frame(struct unwind_state *state)
default:
orc_warn("unknown SP base reg %d for ip %pB\n",
orc->sp_reg, (void *)state->ip);
- goto done;
+ goto err;
}
if (indirect) {
if (!deref_stack_reg(state, sp, &sp))
- goto done;
+ goto err;
}
/* Find IP, SP and possibly regs: */
@@ -451,7 +458,7 @@ bool unwind_next_frame(struct unwind_state *state)
ip_p = sp - sizeof(long);
if (!deref_stack_reg(state, ip_p, &state->ip))
- goto done;
+ goto err;
state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
state->ip, (void *)ip_p);
@@ -465,7 +472,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
orc_warn("can't dereference registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
- goto done;
+ goto err;
}
state->regs = (struct pt_regs *)sp;
@@ -477,7 +484,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
orc_warn("can't dereference iret registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
- goto done;
+ goto err;
}
state->regs = (void *)sp - IRET_FRAME_OFFSET;
@@ -500,18 +507,18 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_REG_PREV_SP:
if (!deref_stack_reg(state, sp + orc->bp_offset, &state->bp))
- goto done;
+ goto err;
break;
case ORC_REG_BP:
if (!deref_stack_reg(state, state->bp + orc->bp_offset, &state->bp))
- goto done;
+ goto err;
break;
default:
orc_warn("unknown BP base reg %d for ip %pB\n",
orc->bp_reg, (void *)orig_ip);
- goto done;
+ goto err;
}
/* Prevent a recursive loop due to bad ORC data: */
@@ -520,13 +527,16 @@ bool unwind_next_frame(struct unwind_state *state)
state->sp <= prev_sp) {
orc_warn("stack going in the wrong direction? ip=%pB\n",
(void *)orig_ip);
- goto done;
+ goto err;
}
preempt_enable();
return true;
-done:
+err:
+ state->error = true;
+
+the_end:
preempt_enable();
state->stack_info.type = STACK_TYPE_UNKNOWN;
return false;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 92fd433c50b9..1bbec387d289 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -85,7 +85,7 @@ config KVM_AMD_SEV
def_bool y
bool "AMD Secure Encrypted Virtualization (SEV) support"
depends on KVM_AMD && X86_64
- depends on CRYPTO_DEV_CCP && CRYPTO_DEV_CCP_DD && CRYPTO_DEV_SP_PSP
+ depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
---help---
Provides support for launching Encrypted VMs on AMD processors.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b5cd8465d44f..d536d457517b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1379,7 +1379,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
* using swait_active() is safe.
*/
if (swait_active(q))
- swake_up(q);
+ swake_up_one(q);
if (apic_lvtt_tscdeadline(apic))
ktimer->expired_tscdeadline = ktimer->tscdeadline;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d594690d8b95..6b8f11521c41 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -890,7 +890,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
if (cache->nobjs >= min)
return 0;
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
- page = (void *)__get_free_page(GFP_KERNEL);
+ page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!page)
return -ENOMEM;
cache->objects[cache->nobjs++] = page;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1689f433f3a0..5d8e317c2b04 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2571,6 +2571,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
#ifdef CONFIG_X86_64
int cpu = raw_smp_processor_id();
+ unsigned long fs_base, kernel_gs_base;
#endif
int i;
@@ -2586,12 +2587,20 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
#ifdef CONFIG_X86_64
- save_fsgs_for_kvm();
- vmx->host_state.fs_sel = current->thread.fsindex;
- vmx->host_state.gs_sel = current->thread.gsindex;
-#else
- savesegment(fs, vmx->host_state.fs_sel);
- savesegment(gs, vmx->host_state.gs_sel);
+ if (likely(is_64bit_mm(current->mm))) {
+ save_fsgs_for_kvm();
+ vmx->host_state.fs_sel = current->thread.fsindex;
+ vmx->host_state.gs_sel = current->thread.gsindex;
+ fs_base = current->thread.fsbase;
+ kernel_gs_base = current->thread.gsbase;
+ } else {
+#endif
+ savesegment(fs, vmx->host_state.fs_sel);
+ savesegment(gs, vmx->host_state.gs_sel);
+#ifdef CONFIG_X86_64
+ fs_base = read_msr(MSR_FS_BASE);
+ kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
+ }
#endif
if (!(vmx->host_state.fs_sel & 7)) {
vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
@@ -2611,10 +2620,10 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
savesegment(ds, vmx->host_state.ds_sel);
savesegment(es, vmx->host_state.es_sel);
- vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
+ vmcs_writel(HOST_FS_BASE, fs_base);
vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
- vmx->msr_host_kernel_gs_base = current->thread.gsbase;
+ vmx->msr_host_kernel_gs_base = kernel_gs_base;
if (is_long_mode(&vmx->vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#else
@@ -4322,11 +4331,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
vmcs_conf->order = get_order(vmcs_conf->size);
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
- /* KVM supports Enlightened VMCS v1 only */
- if (static_branch_unlikely(&enable_evmcs))
- vmcs_conf->revision_id = KVM_EVMCS_VERSION;
- else
- vmcs_conf->revision_id = vmx_msr_low;
+ vmcs_conf->revision_id = vmx_msr_low;
vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
@@ -4396,7 +4401,13 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
return NULL;
vmcs = page_address(pages);
memset(vmcs, 0, vmcs_config.size);
- vmcs->revision_id = vmcs_config.revision_id; /* vmcs revision id */
+
+ /* KVM supports Enlightened VMCS v1 only */
+ if (static_branch_unlikely(&enable_evmcs))
+ vmcs->revision_id = KVM_EVMCS_VERSION;
+ else
+ vmcs->revision_id = vmcs_config.revision_id;
+
return vmcs;
}
@@ -4564,6 +4575,19 @@ static __init int alloc_kvm_area(void)
return -ENOMEM;
}
+ /*
+ * When eVMCS is enabled, alloc_vmcs_cpu() sets
+ * vmcs->revision_id to KVM_EVMCS_VERSION instead of
+ * revision_id reported by MSR_IA32_VMX_BASIC.
+ *
+ * However, even though not explictly documented by
+ * TLFS, VMXArea passed as VMXON argument should
+ * still be marked with revision_id reported by
+ * physical CPU.
+ */
+ if (static_branch_unlikely(&enable_evmcs))
+ vmcs->revision_id = vmcs_config.revision_id;
+
per_cpu(vmxarea, cpu) = vmcs;
}
return 0;
@@ -7869,6 +7893,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
HRTIMER_MODE_REL_PINNED);
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
+ vmx->nested.vpid02 = allocate_vpid();
+
vmx->nested.vmxon = true;
return 0;
@@ -8456,21 +8482,20 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
/* Emulate the VMPTRST instruction */
static int handle_vmptrst(struct kvm_vcpu *vcpu)
{
- unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- gva_t vmcs_gva;
+ unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION);
+ u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+ gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
struct x86_exception e;
+ gva_t gva;
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (get_vmx_mem_address(vcpu, exit_qualification,
- vmx_instruction_info, true, &vmcs_gva))
+ if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva))
return 1;
/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
- if (kvm_write_guest_virt_system(vcpu, vmcs_gva,
- (void *)&to_vmx(vcpu)->nested.current_vmptr,
- sizeof(u64), &e)) {
+ if (kvm_write_guest_virt_system(vcpu, gva, (void *)&current_vmptr,
+ sizeof(gpa_t), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
@@ -10346,11 +10371,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto free_vmcs;
}
- if (nested) {
+ if (nested)
nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
kvm_vcpu_apicv_active(&vmx->vcpu));
- vmx->nested.vpid02 = allocate_vpid();
- }
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
@@ -10367,7 +10390,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
return &vmx->vcpu;
free_vmcs:
- free_vpid(vmx->nested.vpid02);
free_loaded_vmcs(vmx->loaded_vmcs);
free_msrs:
kfree(vmx->guest_msrs);
@@ -11753,7 +11775,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- u32 msr_entry_idx;
u32 exit_qual;
int r;
@@ -11775,10 +11796,10 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
nested_get_vmcs12_pages(vcpu, vmcs12);
r = EXIT_REASON_MSR_LOAD_FAIL;
- msr_entry_idx = nested_vmx_load_msr(vcpu,
- vmcs12->vm_entry_msr_load_addr,
- vmcs12->vm_entry_msr_load_count);
- if (msr_entry_idx)
+ exit_qual = nested_vmx_load_msr(vcpu,
+ vmcs12->vm_entry_msr_load_addr,
+ vmcs12->vm_entry_msr_load_count);
+ if (exit_qual)
goto fail;
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0046aa70205a..2b812b3c5088 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1097,6 +1097,7 @@ static u32 msr_based_features[] = {
MSR_F10H_DECFG,
MSR_IA32_UCODE_REV,
+ MSR_IA32_ARCH_CAPABILITIES,
};
static unsigned int num_msr_based_features;
@@ -1105,7 +1106,8 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
{
switch (msr->index) {
case MSR_IA32_UCODE_REV:
- rdmsrl(msr->index, msr->data);
+ case MSR_IA32_ARCH_CAPABILITIES:
+ rdmsrl_safe(msr->index, &msr->data);
break;
default:
if (kvm_x86_ops->get_msr_feature(msr))
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 298ef1479240..3b24dc05251c 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -256,7 +256,7 @@ ENTRY(__memcpy_mcsafe)
/* Copy successful. Return zero */
.L_done_memcpy_trap:
- xorq %rax, %rax
+ xorl %eax, %eax
ret
ENDPROC(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 55799873ebe5..8f6cc71e0848 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1441,8 +1441,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
/* sub esp,STACK_SIZE */
EMIT2_off32(0x81, 0xEC, STACK_SIZE);
- /* sub ebp,SCRATCH_SIZE+4+12*/
- EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16);
+ /* sub ebp,SCRATCH_SIZE+12*/
+ EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12);
/* xor ebx,ebx */
EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
@@ -1475,8 +1475,8 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
/* mov edx,dword ptr [ebp+off]*/
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
- /* add ebp,SCRATCH_SIZE+4+12*/
- EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16);
+ /* add ebp,SCRATCH_SIZE+12*/
+ EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12);
/* mov ebx,dword ptr [ebp-12]*/
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 77873ce700ae..ee5d08f25ce4 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -417,7 +417,7 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va)
if (!(md->attribute & EFI_MEMORY_WB))
flags |= _PAGE_PCD;
- if (sev_active())
+ if (sev_active() && md->type != EFI_MEMORY_MAPPED_IO)
flags |= _PAGE_ENC;
pfn = md->phys_addr >> PAGE_SHIFT;
@@ -636,6 +636,8 @@ void efi_switch_mm(struct mm_struct *mm)
#ifdef CONFIG_EFI_MIXED
extern efi_status_t efi64_thunk(u32, ...);
+static DEFINE_SPINLOCK(efi_runtime_lock);
+
#define runtime_service32(func) \
({ \
u32 table = (u32)(unsigned long)efi.systab; \
@@ -657,17 +659,14 @@ extern efi_status_t efi64_thunk(u32, ...);
#define efi_thunk(f, ...) \
({ \
efi_status_t __s; \
- unsigned long __flags; \
u32 __func; \
\
- local_irq_save(__flags); \
arch_efi_call_virt_setup(); \
\
__func = runtime_service32(f); \
__s = efi64_thunk(__func, __VA_ARGS__); \
\
arch_efi_call_virt_teardown(); \
- local_irq_restore(__flags); \
\
__s; \
})
@@ -702,14 +701,17 @@ static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
efi_status_t status;
u32 phys_tm, phys_tc;
+ unsigned long flags;
spin_lock(&rtc_lock);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_tm = virt_to_phys_or_null(tm);
phys_tc = virt_to_phys_or_null(tc);
status = efi_thunk(get_time, phys_tm, phys_tc);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
spin_unlock(&rtc_lock);
return status;
@@ -719,13 +721,16 @@ static efi_status_t efi_thunk_set_time(efi_time_t *tm)
{
efi_status_t status;
u32 phys_tm;
+ unsigned long flags;
spin_lock(&rtc_lock);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_tm = virt_to_phys_or_null(tm);
status = efi_thunk(set_time, phys_tm);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
spin_unlock(&rtc_lock);
return status;
@@ -737,8 +742,10 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
{
efi_status_t status;
u32 phys_enabled, phys_pending, phys_tm;
+ unsigned long flags;
spin_lock(&rtc_lock);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_enabled = virt_to_phys_or_null(enabled);
phys_pending = virt_to_phys_or_null(pending);
@@ -747,6 +754,7 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
status = efi_thunk(get_wakeup_time, phys_enabled,
phys_pending, phys_tm);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
spin_unlock(&rtc_lock);
return status;
@@ -757,13 +765,16 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
{
efi_status_t status;
u32 phys_tm;
+ unsigned long flags;
spin_lock(&rtc_lock);
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_tm = virt_to_phys_or_null(tm);
status = efi_thunk(set_wakeup_time, enabled, phys_tm);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
spin_unlock(&rtc_lock);
return status;
@@ -781,6 +792,9 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
efi_status_t status;
u32 phys_name, phys_vendor, phys_attr;
u32 phys_data_size, phys_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_data_size = virt_to_phys_or_null(data_size);
phys_vendor = virt_to_phys_or_null(vendor);
@@ -791,6 +805,8 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
status = efi_thunk(get_variable, phys_name, phys_vendor,
phys_attr, phys_data_size, phys_data);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
return status;
}
@@ -800,6 +816,34 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
{
u32 phys_name, phys_vendor, phys_data;
efi_status_t status;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+
+ phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
+ phys_vendor = virt_to_phys_or_null(vendor);
+ phys_data = virt_to_phys_or_null_size(data, data_size);
+
+ /* If data_size is > sizeof(u32) we've got problems */
+ status = efi_thunk(set_variable, phys_name, phys_vendor,
+ attr, data_size, phys_data);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
+ return status;
+}
+
+static efi_status_t
+efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
+ u32 attr, unsigned long data_size,
+ void *data)
+{
+ u32 phys_name, phys_vendor, phys_data;
+ efi_status_t status;
+ unsigned long flags;
+
+ if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+ return EFI_NOT_READY;
phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
phys_vendor = virt_to_phys_or_null(vendor);
@@ -809,6 +853,8 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
status = efi_thunk(set_variable, phys_name, phys_vendor,
attr, data_size, phys_data);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
return status;
}
@@ -819,6 +865,9 @@ efi_thunk_get_next_variable(unsigned long *name_size,
{
efi_status_t status;
u32 phys_name_size, phys_name, phys_vendor;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_name_size = virt_to_phys_or_null(name_size);
phys_vendor = virt_to_phys_or_null(vendor);
@@ -827,6 +876,8 @@ efi_thunk_get_next_variable(unsigned long *name_size,
status = efi_thunk(get_next_variable, phys_name_size,
phys_name, phys_vendor);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
return status;
}
@@ -835,10 +886,15 @@ efi_thunk_get_next_high_mono_count(u32 *count)
{
efi_status_t status;
u32 phys_count;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_count = virt_to_phys_or_null(count);
status = efi_thunk(get_next_high_mono_count, phys_count);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
return status;
}
@@ -847,10 +903,15 @@ efi_thunk_reset_system(int reset_type, efi_status_t status,
unsigned long data_size, efi_char16_t *data)
{
u32 phys_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
phys_data = virt_to_phys_or_null_size(data, data_size);
efi_thunk(reset_system, reset_type, status, data_size, phys_data);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
}
static efi_status_t
@@ -872,10 +933,40 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space,
{
efi_status_t status;
u32 phys_storage, phys_remaining, phys_max;
+ unsigned long flags;
+
+ if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+ return EFI_UNSUPPORTED;
+
+ spin_lock_irqsave(&efi_runtime_lock, flags);
+
+ phys_storage = virt_to_phys_or_null(storage_space);
+ phys_remaining = virt_to_phys_or_null(remaining_space);
+ phys_max = virt_to_phys_or_null(max_variable_size);
+
+ status = efi_thunk(query_variable_info, attr, phys_storage,
+ phys_remaining, phys_max);
+
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
+ return status;
+}
+
+static efi_status_t
+efi_thunk_query_variable_info_nonblocking(u32 attr, u64 *storage_space,
+ u64 *remaining_space,
+ u64 *max_variable_size)
+{
+ efi_status_t status;
+ u32 phys_storage, phys_remaining, phys_max;
+ unsigned long flags;
if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
return EFI_UNSUPPORTED;
+ if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+ return EFI_NOT_READY;
+
phys_storage = virt_to_phys_or_null(storage_space);
phys_remaining = virt_to_phys_or_null(remaining_space);
phys_max = virt_to_phys_or_null(max_variable_size);
@@ -883,6 +974,8 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space,
status = efi_thunk(query_variable_info, attr, phys_storage,
phys_remaining, phys_max);
+ spin_unlock_irqrestore(&efi_runtime_lock, flags);
+
return status;
}
@@ -908,9 +1001,11 @@ void efi_thunk_runtime_setup(void)
efi.get_variable = efi_thunk_get_variable;
efi.get_next_variable = efi_thunk_get_next_variable;
efi.set_variable = efi_thunk_set_variable;
+ efi.set_variable_nonblocking = efi_thunk_set_variable_nonblocking;
efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count;
efi.reset_system = efi_thunk_reset_system;
efi.query_variable_info = efi_thunk_query_variable_info;
+ efi.query_variable_info_nonblocking = efi_thunk_query_variable_info_nonblocking;
efi.update_capsule = efi_thunk_update_capsule;
efi.query_capsule_caps = efi_thunk_query_capsule_caps;
}
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 36c1f8b9f7e0..844d31cb8a0c 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -105,12 +105,11 @@ early_param("efi_no_storage_paranoia", setup_storage_paranoia);
*/
void efi_delete_dummy_variable(void)
{
- efi.set_variable((efi_char16_t *)efi_dummy_name,
- &EFI_DUMMY_GUID,
- EFI_VARIABLE_NON_VOLATILE |
- EFI_VARIABLE_BOOTSERVICE_ACCESS |
- EFI_VARIABLE_RUNTIME_ACCESS,
- 0, NULL);
+ efi.set_variable_nonblocking((efi_char16_t *)efi_dummy_name,
+ &EFI_DUMMY_GUID,
+ EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS, 0, NULL);
}
/*
@@ -249,7 +248,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
int num_entries;
void *new;
- if (efi_mem_desc_lookup(addr, &md)) {
+ if (efi_mem_desc_lookup(addr, &md) ||
+ md.type != EFI_BOOT_SERVICES_DATA) {
pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr);
return;
}
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index ce8da3a0412c..fd369a6e9ff8 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -137,7 +137,7 @@ ENTRY(restore_registers)
/* Saved in save_processor_state. */
lgdt saved_context_gdt_desc(%rax)
- xorq %rax, %rax
+ xorl %eax, %eax
/* tell the hibernation core that we've just restored the memory */
movq %rax, in_suspend(%rip)
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 2e9ee023e6bc..81a8e33115ad 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -6,7 +6,7 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string
targets += $(purgatory-y)
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
-$(obj)/sha256.o: $(srctree)/lib/sha256.c
+$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
index 744afdc18cf3..56c44d865f7b 100644
--- a/arch/x86/um/mem_32.c
+++ b/arch/x86/um/mem_32.c
@@ -16,7 +16,7 @@ static int __init gate_vma_init(void)
if (!FIXADDR_USER_START)
return 0;
- gate_vma.vm_mm = NULL;
+ vma_init(&gate_vma, NULL);
gate_vma.vm_start = FIXADDR_USER_START;
gate_vma.vm_end = FIXADDR_USER_END;
gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
diff --git a/arch/x86/um/vdso/.gitignore b/arch/x86/um/vdso/.gitignore
index 9cac6d072199..f8b69d84238e 100644
--- a/arch/x86/um/vdso/.gitignore
+++ b/arch/x86/um/vdso/.gitignore
@@ -1,2 +1 @@
-vdso-syms.lds
vdso.lds
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index b2d6967262b2..822ccdba93ad 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -53,22 +53,6 @@ $(vobjs): KBUILD_CFLAGS += $(CFL)
CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-arcs -ftest-coverage
CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage
-targets += vdso-syms.lds
-extra-$(VDSO64-y) += vdso-syms.lds
-
-#
-# Match symbols in the DSO that look like VDSO*; produce a file of constants.
-#
-sed-vdsosym := -e 's/^00*/0/' \
- -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p'
-quiet_cmd_vdsosym = VDSOSYM $@
-define cmd_vdsosym
- $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@
-endef
-
-$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
- $(call if_changed,vdsosym)
-
#
# The DSO images are built using a special linker script.
#
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 8d4e2e1ae60b..439a94bf89ad 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1207,12 +1207,20 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_setup_features();
- xen_setup_machphys_mapping();
-
/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops.patch = paravirt_patch_default;
pv_cpu_ops = xen_cpu_ops;
+ xen_init_irq_ops();
+
+ /*
+ * Setup xen_vcpu early because it is needed for
+ * local_irq_disable(), irqs_disabled(), e.g. in printk().
+ *
+ * Don't do the full vcpu_info placement stuff until we have
+ * the cpu_possible_mask and a non-dummy shared_info.
+ */
+ xen_vcpu_info_reset(0);
x86_platform.get_nmi_reason = xen_get_nmi_reason;
@@ -1225,10 +1233,12 @@ asmlinkage __visible void __init xen_start_kernel(void)
* Set up some pagetable state before starting to set any ptes.
*/
+ xen_setup_machphys_mapping();
xen_init_mmu_ops();
/* Prevent unwanted bits from being set in PTEs. */
__supported_pte_mask &= ~_PAGE_GLOBAL;
+ __default_kernel_pte_mask &= ~_PAGE_GLOBAL;
/*
* Prevent page tables from being allocated in highmem, even
@@ -1249,20 +1259,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
get_cpu_cap(&boot_cpu_data);
x86_configure_nx();
- xen_init_irq_ops();
-
/* Let's presume PV guests always boot on vCPU with id 0. */
per_cpu(xen_vcpu_id, 0) = 0;
- /*
- * Setup xen_vcpu early because idt_setup_early_handler needs it for
- * local_irq_disable(), irqs_disabled().
- *
- * Don't do the full vcpu_info placement stuff until we have
- * the cpu_possible_mask and a non-dummy shared_info.
- */
- xen_vcpu_info_reset(0);
-
idt_setup_early_handler();
xen_init_capabilities();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 74179852e46c..7515a19fd324 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -128,8 +128,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
void __init xen_init_irq_ops(void)
{
- /* For PVH we use default pv_irq_ops settings. */
- if (!xen_feature(XENFEAT_hvm_callback_vector))
- pv_irq_ops = xen_irq_ops;
+ pv_irq_ops = xen_irq_ops;
x86_init.irqs.intr_init = xen_init_IRQ;
}