Signed-off-by: Don Zickus <dzi...@redhat.com>
---
arch/x86/kernel/apic/Makefile | 7 ++++++-
arch/x86/kernel/traps.c | 2 ++
include/linux/nmi.h | 4 ++++
kernel/Makefile | 1 +
lib/Kconfig.debug | 13 +++++++++++++
5 files changed, 26 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 565c1bf..1a4512e 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,7 +2,12 @@
# Makefile for local APIC drivers and for the IO-APIC code
#
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
+ifneq ($(CONFIG_NMI_WATCHDOG),y)
+obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
+endif
+obj-$(CONFIG_NMI_WATCHDOG) += hw_nmi.o
+
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 3be4687..5b89638 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -406,6 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
== NOTIFY_STOP)
return;
+#ifndef CONFIG_NMI_WATCHDOG
/*
* Ok, so this is none of the documented NMI sources,
* so it must be the NMI watchdog.
@@ -413,6 +414,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
if (nmi_watchdog_tick(regs, reason))
return;
if (!do_nmi_callback(regs, cpu))
+#endif /* !CONFIG_NMI_WATCHDOG */
unknown_nmi_error(reason, regs);
#else
unknown_nmi_error(reason, regs);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index b752e80..a42ff0b 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -47,4 +47,8 @@ static inline bool trigger_all_cpu_backtrace(void)
}
#endif
+#ifdef CONFIG_NMI_WATCHDOG
+int hw_nmi_is_cpu_stuck(struct pt_regs *);
+#endif
+
#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 864ff75..8a5abe5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+obj-$(CONFIG_NMI_WATCHDOG) += nmi_watchdog.o
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25c3ed5..04a43a2 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -170,6 +170,19 @@ config DETECT_SOFTLOCKUP
can be detected via the NMI-watchdog, on platforms that
support it.)
+config NMI_WATCHDOG
+ bool "Detect Hard Lockups with an NMI Watchdog"
+ depends on DEBUG_KERNEL && PERF_EVENTS
+ default y
+ help
+ Say Y here to enable the kernel to use the NMI as a watchdog
+ to detect hard lockups. This is useful when a cpu hangs for no
+ reason but can still respond to NMIs. A backtrace is displayed
+ for reviewing and reporting.
+
+ The overhead should be minimal, just an extra NMI every few
+ seconds.
+
config BOOTPARAM_SOFTLOCKUP_PANIC
bool "Panic (Reboot) On Soft Lockups"
depends on DETECT_SOFTLOCKUP
--
1.6.6.83.gc9a2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
> +config NMI_WATCHDOG
> + bool "Detect Hard Lockups with an NMI Watchdog"
> + depends on DEBUG_KERNEL && PERF_EVENTS
> + default y
> + help
> + Say Y here to enable the kernel to use the NMI as a watchdog
> + to detect hard lockups. This is useful when a cpu hangs for no
> + reason but can still respond to NMIs. A backtrace is displayed
> + for reviewing and reporting.
> +
> + The overhead should be minimal, just an extra NMI every few
> + seconds.
Thought for later patches: I think an architecture should be able to express
via a Kconfig switch that it actually _has_ NMI events. There's architectures
which dont have a PMU driver and only have software events. There's also
architectures that have a PMU driver but no NMIs.
Something like ARCH_HAS_NMI_PERF_EVENTS?
Also, i havent checked, but what is the practical effect of the new generic
watchdog on x86 CPUs that does not have a native PMU driver yet - such as
P4s?
Anyway, i'll create a tip:perf/nmi topic branch for these patches, it
certainly looks like a useful generalization and a new architecture that has
perf could easily enable it, without having to write its own NMI watchdog
implementation. It's also useful for any new watchdog features that people
might want to add. Plus it makes the x86 PMU code cleaner in the long run as
well.
Thanks,
Ingo
nmi_watchdog: Config option to enable new nmi_watchdog
These are the bits that enable the new nmi_watchdog and safely
isolate the old nmi_watchdog. Only one or the other can run,
not both at the same time.
Signed-off-by: Don Zickus <dzi...@redhat.com>
Cc: Linus Torvalds <torv...@linux-foundation.org>
Cc: Andrew Morton <ak...@linux-foundation.org>
Cc: gorc...@gmail.com
Cc: ar...@redhat.com
Cc: pet...@infradead.org
LKML-Reference: <1265424425-31562-4-g...@redhat.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
arch/x86/kernel/apic/Makefile | 7 ++++++-
arch/x86/kernel/traps.c | 2 ++
include/linux/nmi.h | 4 ++++
kernel/Makefile | 1 +
lib/Kconfig.debug | 13 +++++++++++++
5 files changed, 26 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 565c1bf..1a4512e 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,7 +2,12 @@
# Makefile for local APIC drivers and for the IO-APIC code
#
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
+ifneq ($(CONFIG_NMI_WATCHDOG),y)
+obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
+endif
+obj-$(CONFIG_NMI_WATCHDOG) += hw_nmi.o
+
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 51ef893..973cbc4 100644
index 25c3ed5..f80b67e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -170,6 +170,19 @@ config DETECT_SOFTLOCKUP
can be detected via the NMI-watchdog, on platforms that
support it.)
+config NMI_WATCHDOG
+ bool "Detect Hard Lockups with an NMI Watchdog"
+ depends on DEBUG_KERNEL && PERF_EVENTS
+ default y
+ help
+ Say Y here to enable the kernel to use the NMI as a watchdog
+ to detect hard lockups. This is useful when a cpu hangs for no
+ reason but can still respond to NMIs. A backtrace is displayed
+ for reviewing and reporting.
+
+ The overhead should be minimal, just an extra NMI every few
+ seconds.
+
config BOOTPARAM_SOFTLOCKUP_PANIC
bool "Panic (Reboot) On Soft Lockups"
depends on DETECT_SOFTLOCKUP
--
p4 pmu is not yet implemented. I'll try to post on lkml the thnigs
i've done for it today evening, though it's pretty ugly i would say.
I guess I assumed the perf event subsystem would take care of that which
is why I made the config option dependent on PERF_EVENTS. I am open to
suggestions on enhance it.
>
> Also, i havent checked, but what is the practical effect of the new generic
> watchdog on x86 CPUs that does not have a native PMU driver yet - such as
> P4s?
I believe the call to perf_event_create_kernel_counter would fail, which
then prevents the cpu from coming online. Probably not the smartest thing
to do. I was looking at adding code to fall back to trying PERF_TYPE_SOFTWARE.
Let me dig up a P4 box and see what happens.
>
> Anyway, i'll create a tip:perf/nmi topic branch for these patches, it
> certainly looks like a useful generalization and a new architecture that has
> perf could easily enable it, without having to write its own NMI watchdog
> implementation. It's also useful for any new watchdog features that people
> might want to add. Plus it makes the x86 PMU code cleaner in the long run as
> well.
Agreed.
Cheers,
Don
nmi_watchdog: Only enable on x86 for now
It wont even build on other platforms just yet - so restrict it
to x86 for now.
Cc: Don Zickus <dzi...@redhat.com>
Cc: gorc...@gmail.com
Cc: ar...@redhat.com
Cc: pet...@infradead.org
LKML-Reference: <1265424425-31562-4-g...@redhat.com>
Signed-off-by: Ingo Molnar <mi...@elte.hu>
---
lib/Kconfig.debug | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f80b67e..acef882 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -173,6 +173,7 @@ config DETECT_SOFTLOCKUP
config NMI_WATCHDOG
bool "Detect Hard Lockups with an NMI Watchdog"
depends on DEBUG_KERNEL && PERF_EVENTS
+ depends on X86
default y
help
Say Y here to enable the kernel to use the NMI as a watchdog