From c0d121720220584bba2876b032e58a076b843fa1 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 19 Jul 2007 01:49:46 -0700 Subject: drivers/edac: add new nmi rescan Provides a way for NMI reported errors on x86 to notify the EDAC subsystem pending ECC errors by writing to a software state variable. Here's the reworked patch. I added an EDAC stub to the kernel so we can have variables that are in the kernel even if EDAC is a module. I also implemented the idea of using the chip driver to select error detection mode via module parameter and eliminate the kernel compile option. Please review/test. Thx! Also, I only made changes to some of the chipset drivers since I am unfamiliar with the other ones. We can add similar changes as we go. Signed-off-by: Dave Jiang Signed-off-by: Douglas Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 12 ++++++++++++ arch/x86_64/kernel/traps.c | 11 +++++++++++ 2 files changed, 23 insertions(+) (limited to 'arch') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 109ebbcde58..3e7753c78b9 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -41,6 +41,10 @@ #include #endif +#if defined(CONFIG_EDAC) +#include +#endif + #include #include #include @@ -638,6 +642,14 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " "CPU %d.\n", reason, smp_processor_id()); printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); + +#if defined(CONFIG_EDAC) + if(edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 74cbeb2e99a..8713ad4a4db 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -34,6 +34,10 @@ #include #include +#if defined(CONFIG_EDAC) +#include +#endif + #include #include #include @@ -719,6 +723,13 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) reason); printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); +#if defined(CONFIG_EDAC) + if(edac_handler_set()) { + edac_atomic_assert_error(); + return; + } +#endif + if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); -- cgit v1.2.3