panic, kexec: make __crash_kexec() NMI safe
Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI panic() doesn't work. The cause of that lies in the PREEMPT_RT definition of mutex_trylock(): if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) return 0; This prevents an nmi_panic() from executing the main body of __crash_kexec() which does the actual kexec into the kdump kernel. The warning and return are explained by:6ce47fd961
("rtmutex: Warn if trylock is called from hard/softirq context") [...] The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex and replace it with an atomic variable. This is somewhat overzealous as *some* callsites could keep using a mutex (e.g. the sysfs-facing ones like crash_shrink_memory()), but this has the benefit of involving a single unified lock and preventing any future NMI-related surprises. Tested by triggering NMI panics via: $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi $ echo 1 > /proc/sys/kernel/unknown_nmi_panic $ echo 1 > /proc/sys/kernel/panic $ ipmitool power diag Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com Fixes:6ce47fd961
("rtmutex: Warn if trylock is called from hard/softirq context") Signed-off-by: Valentin Schneider <vschneid@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Baoquan He <bhe@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Juri Lelli <jlelli@redhat.com> Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
7bb5da0d49
commit
05c6257433
|
@ -93,13 +93,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
|
|||
|
||||
/*
|
||||
* Because we write directly to the reserved memory region when loading
|
||||
* crash kernels we need a mutex here to prevent multiple crash kernels
|
||||
* from attempting to load simultaneously, and to prevent a crash kernel
|
||||
* from loading over the top of a in use crash kernel.
|
||||
*
|
||||
* KISS: always take the mutex.
|
||||
* crash kernels we need a serialization here to prevent multiple crash
|
||||
* kernels from attempting to load simultaneously.
|
||||
*/
|
||||
if (!mutex_trylock(&kexec_mutex))
|
||||
if (!kexec_trylock())
|
||||
return -EBUSY;
|
||||
|
||||
if (flags & KEXEC_ON_CRASH) {
|
||||
|
@ -165,7 +162,7 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
|
|||
|
||||
kimage_free(image);
|
||||
out_unlock:
|
||||
mutex_unlock(&kexec_mutex);
|
||||
kexec_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
#include <crypto/hash.h>
|
||||
#include "kexec_internal.h"
|
||||
|
||||
DEFINE_MUTEX(kexec_mutex);
|
||||
atomic_t __kexec_lock = ATOMIC_INIT(0);
|
||||
|
||||
/* Per cpu memory for storing cpu states in case of system crash. */
|
||||
note_buf_t __percpu *crash_notes;
|
||||
|
@ -959,7 +959,7 @@ late_initcall(kexec_core_sysctl_init);
|
|||
*/
|
||||
void __noclone __crash_kexec(struct pt_regs *regs)
|
||||
{
|
||||
/* Take the kexec_mutex here to prevent sys_kexec_load
|
||||
/* Take the kexec_lock here to prevent sys_kexec_load
|
||||
* running on one cpu from replacing the crash kernel
|
||||
* we are using after a panic on a different cpu.
|
||||
*
|
||||
|
@ -967,7 +967,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
|
|||
* of memory the xchg(&kexec_crash_image) would be
|
||||
* sufficient. But since I reuse the memory...
|
||||
*/
|
||||
if (mutex_trylock(&kexec_mutex)) {
|
||||
if (kexec_trylock()) {
|
||||
if (kexec_crash_image) {
|
||||
struct pt_regs fixed_regs;
|
||||
|
||||
|
@ -976,7 +976,7 @@ void __noclone __crash_kexec(struct pt_regs *regs)
|
|||
machine_crash_shutdown(&fixed_regs);
|
||||
machine_kexec(kexec_crash_image);
|
||||
}
|
||||
mutex_unlock(&kexec_mutex);
|
||||
kexec_unlock();
|
||||
}
|
||||
}
|
||||
STACK_FRAME_NON_STANDARD(__crash_kexec);
|
||||
|
@ -1008,13 +1008,13 @@ ssize_t crash_get_memory_size(void)
|
|||
{
|
||||
ssize_t size = 0;
|
||||
|
||||
if (!mutex_trylock(&kexec_mutex))
|
||||
if (!kexec_trylock())
|
||||
return -EBUSY;
|
||||
|
||||
if (crashk_res.end != crashk_res.start)
|
||||
size = resource_size(&crashk_res);
|
||||
|
||||
mutex_unlock(&kexec_mutex);
|
||||
kexec_unlock();
|
||||
return size;
|
||||
}
|
||||
|
||||
|
@ -1025,7 +1025,7 @@ int crash_shrink_memory(unsigned long new_size)
|
|||
unsigned long old_size;
|
||||
struct resource *ram_res;
|
||||
|
||||
if (!mutex_trylock(&kexec_mutex))
|
||||
if (!kexec_trylock())
|
||||
return -EBUSY;
|
||||
|
||||
if (kexec_crash_image) {
|
||||
|
@ -1064,7 +1064,7 @@ int crash_shrink_memory(unsigned long new_size)
|
|||
insert_resource(&iomem_resource, ram_res);
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&kexec_mutex);
|
||||
kexec_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1136,7 +1136,7 @@ int kernel_kexec(void)
|
|||
{
|
||||
int error = 0;
|
||||
|
||||
if (!mutex_trylock(&kexec_mutex))
|
||||
if (!kexec_trylock())
|
||||
return -EBUSY;
|
||||
if (!kexec_image) {
|
||||
error = -EINVAL;
|
||||
|
@ -1212,6 +1212,6 @@ int kernel_kexec(void)
|
|||
#endif
|
||||
|
||||
Unlock:
|
||||
mutex_unlock(&kexec_mutex);
|
||||
kexec_unlock();
|
||||
return error;
|
||||
}
|
||||
|
|
|
@ -339,7 +339,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
|
|||
|
||||
image = NULL;
|
||||
|
||||
if (!mutex_trylock(&kexec_mutex))
|
||||
if (!kexec_trylock())
|
||||
return -EBUSY;
|
||||
|
||||
dest_image = &kexec_image;
|
||||
|
@ -411,7 +411,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
|
|||
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
|
||||
arch_kexec_protect_crashkres();
|
||||
|
||||
mutex_unlock(&kexec_mutex);
|
||||
kexec_unlock();
|
||||
kimage_free(image);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -13,7 +13,20 @@ void kimage_terminate(struct kimage *image);
|
|||
int kimage_is_destination_range(struct kimage *image,
|
||||
unsigned long start, unsigned long end);
|
||||
|
||||
extern struct mutex kexec_mutex;
|
||||
/*
|
||||
* Whatever is used to serialize accesses to the kexec_crash_image needs to be
|
||||
* NMI safe, as __crash_kexec() can happen during nmi_panic(), so here we use a
|
||||
* "simple" atomic variable that is acquired with a cmpxchg().
|
||||
*/
|
||||
extern atomic_t __kexec_lock;
|
||||
static inline bool kexec_trylock(void)
|
||||
{
|
||||
return atomic_cmpxchg_acquire(&__kexec_lock, 0, 1) == 0;
|
||||
}
|
||||
static inline void kexec_unlock(void)
|
||||
{
|
||||
atomic_set_release(&__kexec_lock, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KEXEC_FILE
|
||||
#include <linux/purgatory.h>
|
||||
|
|
Loading…
Reference in New Issue