aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-05-18 09:42:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-18 09:42:20 -0700
commit3d9944978e0bb6c98b901949cb7a22256e48b23d (patch)
treeea068a5eaca4d5ae5c02afcf903665a1f4cfb65b
parent42ea7d7f2a7356962022cdd124d9043c488ca5e2 (diff)
parentdad1743e5993f19b3d7e7bd0fb35dc45b5326626 (diff)
downloadlinux-stericsson-3d9944978e0bb6c98b901949cb7a22256e48b23d.tar.gz
Merge tag 'linus-mce-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull a machine check recovery fix from Tony Luck. I really don't like how the MCE code does some of the things it does, but this does seem to be an improvement. * tag 'linus-mce-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: x86/mce: Only restart instruction after machine check recovery if it is safe
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c14
1 files changed, 11 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d086a09c087d..11c9166c3337 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -945,9 +945,10 @@ struct mce_info {
atomic_t inuse;
struct task_struct *t;
__u64 paddr;
+ int restartable;
} mce_info[MCE_INFO_MAX];
-static void mce_save_info(__u64 addr)
+static void mce_save_info(__u64 addr, int c)
{
struct mce_info *mi;
@@ -955,6 +956,7 @@ static void mce_save_info(__u64 addr)
if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
mi->t = current;
mi->paddr = addr;
+ mi->restartable = c;
return;
}
}
@@ -1130,7 +1132,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst == MCE_AR_SEVERITY) {
/* schedule action before return to userland */
- mce_save_info(m.addr);
+ mce_save_info(m.addr, m.mcgstatus & MCG_STATUS_RIPV);
set_thread_flag(TIF_MCE_NOTIFY);
} else if (kill_it) {
force_sig(SIGBUS, current);
@@ -1179,7 +1181,13 @@ void mce_notify_process(void)
pr_err("Uncorrected hardware memory error in user-access at %llx",
mi->paddr);
- if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
+ /*
+ * We must call memory_failure() here even if the current process is
+ * doomed. We still need to mark the page as poisoned and alert any
+ * other users of the page.
+ */
+ if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 ||
+ mi->restartable == 0) {
pr_err("Memory error not recovered");
force_sig(SIGBUS, current);
}