x86, mce: implement new status bits
The x86 architecture recently added some new machine check status bits:
S(ignalled) and AR (Action-Required). Signalled allows to check
if a specific event caused an exception or was just logged through CMCI.
AR allows the kernel to decide if an event needs immediate action
or can be delayed or ignored.
Implement support for these new status bits. mce_severity() uses
the new bits to grade the machine check correctly and decide what
to do. The exception handler uses AR to decide to kill or not.
The S bit is used to separate events between the poll/CMCI handler
and the exception handler.
Classical UC always leads to panic. That was true before anyways
because the existing CPUs always passed a PCC with it.
Also corrects the rules whether to kill in user or kernel context
and how to handle missing RIPV.
The machine check handler largely uses the mce-severity grading
engine now instead of making its own decisions. This means the logic
is centralized in one place. This is useful because it has to be
evaluated multiple times.
v2: Some rule fixes; Add AO events
Fix RIPV, RIPV|EIPV order (Ying Huang)
Fix UCNA with AR=1 message (Ying Huang)
Add comment about panicing in m_c_p.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ff9c732..f051a78 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@
static int mce_bootlog = -1;
static int monarch_timeout = -1;
static int mce_panic_timeout;
+int mce_ser;
static char trigger[128];
static char *trigger_argv[2] = { trigger, NULL };
@@ -391,6 +392,15 @@
* Those are just logged through /dev/mcelog.
*
* This is executed in standard interrupt context.
+ *
+ * Note: spec recommends to panic for fatal unsignalled
+ * errors here. However this would be quite problematic --
+ * we would need to reimplement the Monarch handling and
+ * it would mess up the exclusion between exception handler
+ * and poll hander -- * so we skip this for now.
+ * These cases should not happen anyways, or only when the CPU
+ * is already totally * confused. In this case it's likely it will
+ * not fully execute the machine check handler either.
*/
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
@@ -417,13 +427,13 @@
continue;
/*
- * Uncorrected events are handled by the exception handler
- * when it is enabled. But when the exception is disabled log
- * everything.
+ * Uncorrected or signalled events are handled by the exception
+ * handler when it is enabled, so don't process those here.
*
* TBD do the same check for MCI_STATUS_EN here?
*/
- if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
+ if (!(flags & MCP_UC) &&
+ (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;
if (m.status & MCI_STATUS_MISCV)
@@ -790,6 +800,12 @@
barrier();
/*
+ * When no restart IP must always kill or panic.
+ */
+ if (!(m.mcgstatus & MCG_STATUS_RIPV))
+ kill_it = 1;
+
+ /*
* Go through all the banks in exclusion of the other CPUs.
* This way we don't report duplicated events on shared banks
* because the first one to see it will clear it.
@@ -809,10 +825,11 @@
continue;
/*
- * Non uncorrected errors are handled by machine_check_poll
- * Leave them alone, unless this panics.
+ * Non uncorrected or non signaled errors are handled by
+ * machine_check_poll. Leave them alone, unless this panics.
*/
- if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out)
+ if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
+ !no_way_out)
continue;
/*
@@ -820,17 +837,16 @@
*/
add_taint(TAINT_MACHINE_CHECK);
- __set_bit(i, toclear);
+ severity = mce_severity(&m, tolerant, NULL);
- if (m.status & MCI_STATUS_EN) {
- /*
- * If this error was uncorrectable and there was
- * an overflow, we're in trouble. If no overflow,
- * we might get away with just killing a task.
- */
- if (m.status & MCI_STATUS_UC)
- kill_it = 1;
- } else {
+ /*
+ * When machine check was for corrected handler don't touch,
+ * unless we're panicing.
+ */
+ if (severity == MCE_KEEP_SEVERITY && !no_way_out)
+ continue;
+ __set_bit(i, toclear);
+ if (severity == MCE_NO_SEVERITY) {
/*
* Machine check event was not enabled. Clear, but
* ignore.
@@ -838,6 +854,12 @@
continue;
}
+ /*
+ * Kill on action required.
+ */
+ if (severity == MCE_AR_SEVERITY)
+ kill_it = 1;
+
if (m.status & MCI_STATUS_MISCV)
m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
if (m.status & MCI_STATUS_ADDRV)
@@ -846,7 +868,6 @@
mce_get_rip(&m, regs);
mce_log(&m);
- severity = mce_severity(&m, tolerant, NULL);
if (severity > worst) {
*final = m;
worst = severity;
@@ -879,29 +900,9 @@
* one task, do that. If the user has set the tolerance very
* high, don't try to do anything at all.
*/
- if (kill_it && tolerant < 3) {
- int user_space = 0;
- /*
- * If the EIPV bit is set, it means the saved IP is the
- * instruction which caused the MCE.
- */
- if (m.mcgstatus & MCG_STATUS_EIPV)
- user_space = final->ip && (final->cs & 3);
-
- /*
- * If we know that the error was in user space, send a
- * SIGBUS. Otherwise, panic if tolerance is low.
- *
- * force_sig() takes an awful lot of locks and has a slight
- * risk of deadlocking.
- */
- if (user_space) {
- force_sig(SIGBUS, current);
- } else if (panic_on_oops || tolerant < 2) {
- mce_panic("Uncorrected machine check", final, msg);
- }
- }
+ if (kill_it && tolerant < 3)
+ force_sig(SIGBUS, current);
/* notify userspace ASAP */
set_thread_flag(TIF_MCE_NOTIFY);
@@ -1049,6 +1050,9 @@
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
rip_msr = MSR_IA32_MCG_EIP;
+ if (cap & MCG_SER_P)
+ mce_ser = 1;
+
return 0;
}