linux-brain/arch/powerpc/kernel/stacktrace.c

287 lines
6.8 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Stack trace utility functions etc.
*
* Copyright 2008 Christoph Hellwig, IBM Corp.
2018-05-04 21:38:34 +09:00
* Copyright 2018 SUSE Linux GmbH
* Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
*/
#include <linux/export.h>
2018-05-04 21:38:34 +09:00
#include <linux/kallsyms.h>
#include <linux/module.h>
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 22:07:28 +09:00
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
2018-05-04 21:38:34 +09:00
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
2018-05-04 21:38:34 +09:00
#include <linux/ftrace.h>
powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi() commit 7c6986ade69e3c81bac831645bc72109cd798a80 upstream. In raise_backtrace_ipi() we iterate through the cpumask of CPUs, sending each an IPI asking them to do a backtrace, but we don't wait for the backtrace to happen. We then iterate through the CPU mask again, and if any CPU hasn't done the backtrace and cleared itself from the mask, we print a trace on its behalf, noting that the trace may be "stale". This works well enough when a CPU is not responding, because in that case it doesn't receive the IPI and the sending CPU is left to print the trace. But when all CPUs are responding we are left with a race between the sending and receiving CPUs, if the sending CPU wins the race then it will erroneously print a trace. This leads to spurious "stale" traces from the sending CPU, which can then be interleaved messily with the receiving CPU, note the CPU numbers, eg: [ 1658.929157][ C7] rcu: Stack dump where RCU GP kthread last ran: [ 1658.929223][ C7] Sending NMI from CPU 7 to CPUs 1: [ 1658.929303][ C1] NMI backtrace for cpu 1 [ 1658.929303][ C7] CPU 1 didn't respond to backtrace IPI, inspecting paca. [ 1658.929362][ C1] CPU: 1 PID: 325 Comm: kworker/1:1H Tainted: G W E 5.13.0-rc2+ #46 [ 1658.929405][ C7] irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 325 (kworker/1:1H) [ 1658.929465][ C1] Workqueue: events_highpri test_work_fn [test_lockup] [ 1658.929549][ C7] Back trace of paca->saved_r1 (0xc0000000057fb400) (possibly stale): [ 1658.929592][ C1] NIP: c00000000002cf50 LR: c008000000820178 CTR: c00000000002cfa0 To fix it, change the logic so that the sending CPU waits 5s for the receiving CPU to print its trace. If the receiving CPU prints its trace successfully then the sending CPU just continues, avoiding any spurious "stale" trace. This has the added benefit of allowing all CPUs to print their traces in order and avoids any interleaving of their output. Fixes: 5cc05910f26e ("powerpc/64s: Wire up arch_trigger_cpumask_backtrace()") Cc: stable@vger.kernel.org # v4.18+ Reported-by: Nathan Lynch <nathanl@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210625140408.3351173-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-06-25 15:28:41 +09:00
#include <linux/delay.h>
2018-05-04 21:38:34 +09:00
#include <asm/kprobes.h>
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 22:07:28 +09:00
#include <asm/paca.h>
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
static void save_context_stack(struct stack_trace *trace, unsigned long sp,
struct task_struct *tsk, int savesched)
{
for (;;) {
unsigned long *stack = (unsigned long *) sp;
unsigned long newsp, ip;
if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
return;
newsp = stack[0];
ip = stack[STACK_FRAME_LR_SAVE];
if (savesched || !in_sched_functions(ip)) {
if (!trace->skip)
trace->entries[trace->nr_entries++] = ip;
else
trace->skip--;
}
if (trace->nr_entries >= trace->max_entries)
return;
sp = newsp;
}
}
void save_stack_trace(struct stack_trace *trace)
{
unsigned long sp;
sp = current_stack_pointer();
save_context_stack(trace, sp, current, 1);
}
EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
unsigned long sp;
if (!try_get_task_stack(tsk))
return;
if (tsk == current)
sp = current_stack_pointer();
else
sp = tsk->thread.ksp;
save_context_stack(trace, sp, tsk, 0);
put_task_stack(tsk);
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
void
save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
{
save_context_stack(trace, regs->gpr[1], current, 0);
}
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
2018-05-04 21:38:34 +09:00
#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
/*
* This function returns an error if it detects any unreliable features of the
* stack. Otherwise it guarantees that the stack trace is reliable.
*
* If the task is not 'current', the caller *must* ensure the task is inactive.
*/
static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
struct stack_trace *trace)
2018-05-04 21:38:34 +09:00
{
unsigned long sp;
unsigned long newsp;
2018-05-04 21:38:34 +09:00
unsigned long stack_page = (unsigned long)task_stack_page(tsk);
unsigned long stack_end;
int graph_idx = 0;
bool firstframe;
2018-05-04 21:38:34 +09:00
stack_end = stack_page + THREAD_SIZE;
if (!is_idle_task(tsk)) {
/*
* For user tasks, this is the SP value loaded on
* kernel entry, see "PACAKSAVE(r13)" in _switch() and
* system_call_common()/EXCEPTION_PROLOG_COMMON().
*
* Likewise for non-swapper kernel threads,
* this also happens to be the top of the stack
* as setup by copy_thread().
*
* Note that stack backlinks are not properly setup by
* copy_thread() and thus, a forked task() will have
* an unreliable stack trace until it's been
* _switch()'ed to for the first time.
*/
stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
} else {
/*
* idle tasks have a custom stack layout,
* c.f. cpu_idle_thread_init().
*/
stack_end -= STACK_FRAME_OVERHEAD;
}
if (tsk == current)
sp = current_stack_pointer();
else
sp = tsk->thread.ksp;
2018-05-04 21:38:34 +09:00
if (sp < stack_page + sizeof(struct thread_struct) ||
sp > stack_end - STACK_FRAME_MIN_SIZE) {
return -EINVAL;
2018-05-04 21:38:34 +09:00
}
for (firstframe = true; sp != stack_end;
firstframe = false, sp = newsp) {
2018-05-04 21:38:34 +09:00
unsigned long *stack = (unsigned long *) sp;
unsigned long ip;
2018-05-04 21:38:34 +09:00
/* sanity check: ABI requires SP to be aligned 16 bytes. */
if (sp & 0xF)
return -EINVAL;
2018-05-04 21:38:34 +09:00
newsp = stack[0];
/* Stack grows downwards; unwinder may only go up. */
if (newsp <= sp)
return -EINVAL;
2018-05-04 21:38:34 +09:00
if (newsp != stack_end &&
newsp > stack_end - STACK_FRAME_MIN_SIZE) {
return -EINVAL; /* invalid backlink, too far up. */
2018-05-04 21:38:34 +09:00
}
/*
* We can only trust the bottom frame's backlink, the
* rest of the frame may be uninitialized, continue to
* the next.
*/
if (firstframe)
continue;
/* Mark stacktraces with exception frames as unreliable. */
if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
return -EINVAL;
}
2018-05-04 21:38:34 +09:00
/* Examine the saved LR: it must point into kernel code. */
ip = stack[STACK_FRAME_LR_SAVE];
if (!__kernel_text_address(ip))
return -EINVAL;
2018-05-04 21:38:34 +09:00
/*
* FIXME: IMHO these tests do not belong in
* arch-dependent code, they are generic.
*/
ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
#ifdef CONFIG_KPROBES
2018-05-04 21:38:34 +09:00
/*
* Mark stacktraces with kretprobed functions on them
* as unreliable.
*/
if (ip == (unsigned long)kretprobe_trampoline)
return -EINVAL;
#endif
2018-05-04 21:38:34 +09:00
if (trace->nr_entries >= trace->max_entries)
return -E2BIG;
2018-05-04 21:38:34 +09:00
if (!trace->skip)
trace->entries[trace->nr_entries++] = ip;
else
trace->skip--;
}
return 0;
}
int save_stack_trace_tsk_reliable(struct task_struct *tsk,
struct stack_trace *trace)
{
int ret;
/*
* If the task doesn't have a stack (e.g., a zombie), the stack is
* "reliably" empty.
*/
if (!try_get_task_stack(tsk))
return 0;
ret = __save_stack_trace_tsk_reliable(tsk, trace);
put_task_stack(tsk);
return ret;
}
2018-05-04 21:38:34 +09:00
#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 22:07:28 +09:00
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 22:07:28 +09:00
static void handle_backtrace_ipi(struct pt_regs *regs)
{
nmi_cpu_backtrace(regs);
}
static void raise_backtrace_ipi(cpumask_t *mask)
{
powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi() commit 7c6986ade69e3c81bac831645bc72109cd798a80 upstream. In raise_backtrace_ipi() we iterate through the cpumask of CPUs, sending each an IPI asking them to do a backtrace, but we don't wait for the backtrace to happen. We then iterate through the CPU mask again, and if any CPU hasn't done the backtrace and cleared itself from the mask, we print a trace on its behalf, noting that the trace may be "stale". This works well enough when a CPU is not responding, because in that case it doesn't receive the IPI and the sending CPU is left to print the trace. But when all CPUs are responding we are left with a race between the sending and receiving CPUs, if the sending CPU wins the race then it will erroneously print a trace. This leads to spurious "stale" traces from the sending CPU, which can then be interleaved messily with the receiving CPU, note the CPU numbers, eg: [ 1658.929157][ C7] rcu: Stack dump where RCU GP kthread last ran: [ 1658.929223][ C7] Sending NMI from CPU 7 to CPUs 1: [ 1658.929303][ C1] NMI backtrace for cpu 1 [ 1658.929303][ C7] CPU 1 didn't respond to backtrace IPI, inspecting paca. [ 1658.929362][ C1] CPU: 1 PID: 325 Comm: kworker/1:1H Tainted: G W E 5.13.0-rc2+ #46 [ 1658.929405][ C7] irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 325 (kworker/1:1H) [ 1658.929465][ C1] Workqueue: events_highpri test_work_fn [test_lockup] [ 1658.929549][ C7] Back trace of paca->saved_r1 (0xc0000000057fb400) (possibly stale): [ 1658.929592][ C1] NIP: c00000000002cf50 LR: c008000000820178 CTR: c00000000002cfa0 To fix it, change the logic so that the sending CPU waits 5s for the receiving CPU to print its trace. If the receiving CPU prints its trace successfully then the sending CPU just continues, avoiding any spurious "stale" trace. This has the added benefit of allowing all CPUs to print their traces in order and avoids any interleaving of their output. Fixes: 5cc05910f26e ("powerpc/64s: Wire up arch_trigger_cpumask_backtrace()") Cc: stable@vger.kernel.org # v4.18+ Reported-by: Nathan Lynch <nathanl@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210625140408.3351173-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-06-25 15:28:41 +09:00
struct paca_struct *p;
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 22:07:28 +09:00
unsigned int cpu;
powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi() commit 7c6986ade69e3c81bac831645bc72109cd798a80 upstream. In raise_backtrace_ipi() we iterate through the cpumask of CPUs, sending each an IPI asking them to do a backtrace, but we don't wait for the backtrace to happen. We then iterate through the CPU mask again, and if any CPU hasn't done the backtrace and cleared itself from the mask, we print a trace on its behalf, noting that the trace may be "stale". This works well enough when a CPU is not responding, because in that case it doesn't receive the IPI and the sending CPU is left to print the trace. But when all CPUs are responding we are left with a race between the sending and receiving CPUs, if the sending CPU wins the race then it will erroneously print a trace. This leads to spurious "stale" traces from the sending CPU, which can then be interleaved messily with the receiving CPU, note the CPU numbers, eg: [ 1658.929157][ C7] rcu: Stack dump where RCU GP kthread last ran: [ 1658.929223][ C7] Sending NMI from CPU 7 to CPUs 1: [ 1658.929303][ C1] NMI backtrace for cpu 1 [ 1658.929303][ C7] CPU 1 didn't respond to backtrace IPI, inspecting paca. [ 1658.929362][ C1] CPU: 1 PID: 325 Comm: kworker/1:1H Tainted: G W E 5.13.0-rc2+ #46 [ 1658.929405][ C7] irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 325 (kworker/1:1H) [ 1658.929465][ C1] Workqueue: events_highpri test_work_fn [test_lockup] [ 1658.929549][ C7] Back trace of paca->saved_r1 (0xc0000000057fb400) (possibly stale): [ 1658.929592][ C1] NIP: c00000000002cf50 LR: c008000000820178 CTR: c00000000002cfa0 To fix it, change the logic so that the sending CPU waits 5s for the receiving CPU to print its trace. If the receiving CPU prints its trace successfully then the sending CPU just continues, avoiding any spurious "stale" trace. This has the added benefit of allowing all CPUs to print their traces in order and avoids any interleaving of their output. Fixes: 5cc05910f26e ("powerpc/64s: Wire up arch_trigger_cpumask_backtrace()") Cc: stable@vger.kernel.org # v4.18+ Reported-by: Nathan Lynch <nathanl@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210625140408.3351173-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-06-25 15:28:41 +09:00
u64 delay_us;
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 22:07:28 +09:00
for_each_cpu(cpu, mask) {
powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi() commit 7c6986ade69e3c81bac831645bc72109cd798a80 upstream. In raise_backtrace_ipi() we iterate through the cpumask of CPUs, sending each an IPI asking them to do a backtrace, but we don't wait for the backtrace to happen. We then iterate through the CPU mask again, and if any CPU hasn't done the backtrace and cleared itself from the mask, we print a trace on its behalf, noting that the trace may be "stale". This works well enough when a CPU is not responding, because in that case it doesn't receive the IPI and the sending CPU is left to print the trace. But when all CPUs are responding we are left with a race between the sending and receiving CPUs, if the sending CPU wins the race then it will erroneously print a trace. This leads to spurious "stale" traces from the sending CPU, which can then be interleaved messily with the receiving CPU, note the CPU numbers, eg: [ 1658.929157][ C7] rcu: Stack dump where RCU GP kthread last ran: [ 1658.929223][ C7] Sending NMI from CPU 7 to CPUs 1: [ 1658.929303][ C1] NMI backtrace for cpu 1 [ 1658.929303][ C7] CPU 1 didn't respond to backtrace IPI, inspecting paca. [ 1658.929362][ C1] CPU: 1 PID: 325 Comm: kworker/1:1H Tainted: G W E 5.13.0-rc2+ #46 [ 1658.929405][ C7] irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 325 (kworker/1:1H) [ 1658.929465][ C1] Workqueue: events_highpri test_work_fn [test_lockup] [ 1658.929549][ C7] Back trace of paca->saved_r1 (0xc0000000057fb400) (possibly stale): [ 1658.929592][ C1] NIP: c00000000002cf50 LR: c008000000820178 CTR: c00000000002cfa0 To fix it, change the logic so that the sending CPU waits 5s for the receiving CPU to print its trace. If the receiving CPU prints its trace successfully then the sending CPU just continues, avoiding any spurious "stale" trace. This has the added benefit of allowing all CPUs to print their traces in order and avoids any interleaving of their output. Fixes: 5cc05910f26e ("powerpc/64s: Wire up arch_trigger_cpumask_backtrace()") Cc: stable@vger.kernel.org # v4.18+ Reported-by: Nathan Lynch <nathanl@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210625140408.3351173-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-06-25 15:28:41 +09:00
if (cpu == smp_processor_id()) {
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 22:07:28 +09:00
handle_backtrace_ipi(NULL);
powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi() commit 7c6986ade69e3c81bac831645bc72109cd798a80 upstream. In raise_backtrace_ipi() we iterate through the cpumask of CPUs, sending each an IPI asking them to do a backtrace, but we don't wait for the backtrace to happen. We then iterate through the CPU mask again, and if any CPU hasn't done the backtrace and cleared itself from the mask, we print a trace on its behalf, noting that the trace may be "stale". This works well enough when a CPU is not responding, because in that case it doesn't receive the IPI and the sending CPU is left to print the trace. But when all CPUs are responding we are left with a race between the sending and receiving CPUs, if the sending CPU wins the race then it will erroneously print a trace. This leads to spurious "stale" traces from the sending CPU, which can then be interleaved messily with the receiving CPU, note the CPU numbers, eg: [ 1658.929157][ C7] rcu: Stack dump where RCU GP kthread last ran: [ 1658.929223][ C7] Sending NMI from CPU 7 to CPUs 1: [ 1658.929303][ C1] NMI backtrace for cpu 1 [ 1658.929303][ C7] CPU 1 didn't respond to backtrace IPI, inspecting paca. [ 1658.929362][ C1] CPU: 1 PID: 325 Comm: kworker/1:1H Tainted: G W E 5.13.0-rc2+ #46 [ 1658.929405][ C7] irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 325 (kworker/1:1H) [ 1658.929465][ C1] Workqueue: events_highpri test_work_fn [test_lockup] [ 1658.929549][ C7] Back trace of paca->saved_r1 (0xc0000000057fb400) (possibly stale): [ 1658.929592][ C1] NIP: c00000000002cf50 LR: c008000000820178 CTR: c00000000002cfa0 To fix it, change the logic so that the sending CPU waits 5s for the receiving CPU to print its trace. If the receiving CPU prints its trace successfully then the sending CPU just continues, avoiding any spurious "stale" trace. This has the added benefit of allowing all CPUs to print their traces in order and avoids any interleaving of their output. Fixes: 5cc05910f26e ("powerpc/64s: Wire up arch_trigger_cpumask_backtrace()") Cc: stable@vger.kernel.org # v4.18+ Reported-by: Nathan Lynch <nathanl@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210625140408.3351173-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-06-25 15:28:41 +09:00
continue;
}
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 22:07:28 +09:00
powerpc/stacktrace: Fix spurious "stale" traces in raise_backtrace_ipi() commit 7c6986ade69e3c81bac831645bc72109cd798a80 upstream. In raise_backtrace_ipi() we iterate through the cpumask of CPUs, sending each an IPI asking them to do a backtrace, but we don't wait for the backtrace to happen. We then iterate through the CPU mask again, and if any CPU hasn't done the backtrace and cleared itself from the mask, we print a trace on its behalf, noting that the trace may be "stale". This works well enough when a CPU is not responding, because in that case it doesn't receive the IPI and the sending CPU is left to print the trace. But when all CPUs are responding we are left with a race between the sending and receiving CPUs, if the sending CPU wins the race then it will erroneously print a trace. This leads to spurious "stale" traces from the sending CPU, which can then be interleaved messily with the receiving CPU, note the CPU numbers, eg: [ 1658.929157][ C7] rcu: Stack dump where RCU GP kthread last ran: [ 1658.929223][ C7] Sending NMI from CPU 7 to CPUs 1: [ 1658.929303][ C1] NMI backtrace for cpu 1 [ 1658.929303][ C7] CPU 1 didn't respond to backtrace IPI, inspecting paca. [ 1658.929362][ C1] CPU: 1 PID: 325 Comm: kworker/1:1H Tainted: G W E 5.13.0-rc2+ #46 [ 1658.929405][ C7] irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 325 (kworker/1:1H) [ 1658.929465][ C1] Workqueue: events_highpri test_work_fn [test_lockup] [ 1658.929549][ C7] Back trace of paca->saved_r1 (0xc0000000057fb400) (possibly stale): [ 1658.929592][ C1] NIP: c00000000002cf50 LR: c008000000820178 CTR: c00000000002cfa0 To fix it, change the logic so that the sending CPU waits 5s for the receiving CPU to print its trace. If the receiving CPU prints its trace successfully then the sending CPU just continues, avoiding any spurious "stale" trace. This has the added benefit of allowing all CPUs to print their traces in order and avoids any interleaving of their output. Fixes: 5cc05910f26e ("powerpc/64s: Wire up arch_trigger_cpumask_backtrace()") Cc: stable@vger.kernel.org # v4.18+ Reported-by: Nathan Lynch <nathanl@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210625140408.3351173-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-06-25 15:28:41 +09:00
delay_us = 5 * USEC_PER_SEC;
if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) {
// Now wait up to 5s for the other CPU to do its backtrace
while (cpumask_test_cpu(cpu, mask) && delay_us) {
udelay(1);
delay_us--;
}
// Other CPU cleared itself from the mask
if (delay_us)
continue;
}
p = paca_ptrs[cpu];
powerpc/64s: Wire up arch_trigger_cpumask_backtrace() This allows eg. the RCU stall detector, or the soft/hardlockup detectors to trigger a backtrace on all CPUs. We implement this by sending a "safe" NMI, which will actually only send an IPI. Unfortunately the generic code prints "NMI", so that's a little confusing but we can probably live with it. If one of the CPUs doesn't respond to the IPI, we then print some info from it's paca and do a backtrace based on its saved_r1. Example output: INFO: rcu_sched detected stalls on CPUs/tasks: 2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735 (detected by 4, t=58847 jiffies, g=58, c=57, q=1258) Sending NMI from CPU 4 to CPUs 2: CPU 2 didn't respond to backtrace IPI, inspecting paca. irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash) Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale): Call Trace: [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable) [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80 [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0 [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0 [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0 [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240 [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110 [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
2018-05-02 22:07:28 +09:00
cpumask_clear_cpu(cpu, mask);
pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu);
if (!virt_addr_valid(p)) {
pr_warn("paca pointer appears corrupt? (%px)\n", p);
continue;
}
pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d",
p->irq_soft_mask, p->in_mce, p->in_nmi);
if (virt_addr_valid(p->__current))
pr_cont(" current: %d (%s)\n", p->__current->pid,
p->__current->comm);
else
pr_cont(" current pointer corrupt? (%px)\n", p->__current);
pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
show_stack(p->__current, (unsigned long *)p->saved_r1);
}
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{
nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
}
#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */