powerpc/64s: Wire up arch_trigger_cpumask_backtrace()

This allows eg. the RCU stall detector, or the soft/hardlockup
detectors to trigger a backtrace on all CPUs.

We implement this by sending a "safe" NMI, which will actually only
send an IPI. Unfortunately the generic code prints "NMI", so that's a
little confusing but we can probably live with it.

If one of the CPUs doesn't respond to the IPI, we then print some info
from it's paca and do a backtrace based on its saved_r1.

Example output:

  INFO: rcu_sched detected stalls on CPUs/tasks:
  	2-...0: (0 ticks this GP) idle=1be/1/4611686018427387904 softirq=1055/1055 fqs=25735
  	(detected by 4, t=58847 jiffies, g=58, c=57, q=1258)
  Sending NMI from CPU 4 to CPUs 2:
  CPU 2 didn't respond to backtrace IPI, inspecting paca.
  irq_soft_mask: 0x01 in_mce: 0 in_nmi: 0 current: 3623 (bash)
  Back trace of paca->saved_r1 (0xc0000000e1c83ba0) (possibly stale):
  Call Trace:
  [c0000000e1c83ba0] [0000000000000014] 0x14 (unreliable)
  [c0000000e1c83bc0] [c000000000765798] lkdtm_do_action+0x48/0x80
  [c0000000e1c83bf0] [c000000000765a40] direct_entry+0x110/0x1b0
  [c0000000e1c83c90] [c00000000058e650] full_proxy_write+0x90/0xe0
  [c0000000e1c83ce0] [c0000000003aae3c] __vfs_write+0x6c/0x1f0
  [c0000000e1c83d80] [c0000000003ab214] vfs_write+0xd4/0x240
  [c0000000e1c83dd0] [c0000000003ab5cc] ksys_write+0x6c/0x110
  [c0000000e1c83e30] [c00000000000b860] system_call+0x58/0x6c

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
This commit is contained in:
Michael Ellerman 2018-05-02 23:07:28 +10:00
parent 6ba55716a2
commit 5cc05910f2
2 changed files with 57 additions and 0 deletions

View File

@ -8,4 +8,10 @@ extern void arch_touch_nmi_watchdog(void);
static inline void arch_touch_nmi_watchdog(void) {}
#endif
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_STACKTRACE)
extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
bool exclude_self);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif
#endif /* _ASM_NMI_H */

View File

@ -13,6 +13,7 @@
#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
@ -22,6 +23,8 @@
#include <linux/ftrace.h>
#include <asm/kprobes.h>
#include <asm/paca.h>
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
@ -194,3 +197,51 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
#ifdef CONFIG_PPC_BOOK3S_64
static void handle_backtrace_ipi(struct pt_regs *regs)
{
nmi_cpu_backtrace(regs);
}
static void raise_backtrace_ipi(cpumask_t *mask)
{
unsigned int cpu;
for_each_cpu(cpu, mask) {
if (cpu == smp_processor_id())
handle_backtrace_ipi(NULL);
else
smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
}
for_each_cpu(cpu, mask) {
struct paca_struct *p = paca_ptrs[cpu];
cpumask_clear_cpu(cpu, mask);
pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu);
if (!virt_addr_valid(p)) {
pr_warn("paca pointer appears corrupt? (%px)\n", p);
continue;
}
pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d",
p->irq_soft_mask, p->in_mce, p->in_nmi);
if (virt_addr_valid(p->__current))
pr_cont(" current: %d (%s)\n", p->__current->pid,
p->__current->comm);
else
pr_cont(" current pointer corrupt? (%px)\n", p->__current);
pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
show_stack(p->__current, (unsigned long *)p->saved_r1);
}
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{
nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
}
#endif /* CONFIG_PPC64 */