linux-brain/arch/mips/kernel/process.c

782 lines
19 KiB
C
Raw Normal View History

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 1994 - 1999, 2000 by Ralf Baechle and others.
* Copyright (C) 2005, 2006 by Ralf Baechle (ralf@linux-mips.org)
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2004 Thiemo Seufer
* Copyright (C) 2013 Imagination Technologies Ltd.
*/
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/tick.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/personality.h>
#include <linux/sys.h>
#include <linux/init.h>
#include <linux/completion.h>
#include <linux/kallsyms.h>
#include <linux/random.h>
2015-01-08 21:17:37 +09:00
#include <linux/prctl.h>
MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() commit b63e132b6433a41cf311e8bc382d33fd2b73b505 upstream. The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [<f7c76c1c>] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [<f845bd33>] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [<e56ea39f>] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [<e88bbcae>] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [<b2ada1c7>] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [<c7521934>] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [<ab9fc705>] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [<cb259b74>] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [<b3fce717>] exit_mmap+0x76/0xea [42760.890428] [<c4c8a2f6>] mmput+0x80/0x11a [42760.896632] [<a41a08f4>] do_exit+0x1f4/0x80c [42760.903158] [<ee01cef6>] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton <paul.burton@mips.com> Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan <jhogan@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Huacai Chen <chenhc@lemote.com> Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") [ Huacai: backported to 4.9: Replace "call_single_data_t" with "struct call_single_data" ] Signed-off-by: Huacai Chen <chenhc@lemote.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-23 02:55:46 +09:00
#include <linux/nmi.h>
#include <asm/asm.h>
#include <asm/bootinfo.h>
#include <asm/cpu.h>
MIPS: Use per-mm page to execute branch delay slot instructions In some cases the kernel needs to execute an instruction from the delay slot of an emulated branch instruction. These cases include: - Emulated floating point branch instructions (bc1[ft]l?) for systems which don't include an FPU, or upon which the kernel is run with the "nofpu" parameter. - MIPSr6 systems running binaries targeting older revisions of the architecture, which may include branch instructions whose encodings are no longer valid in MIPSr6. Executing instructions from such delay slots is done by writing the instruction to memory followed by a trap, as part of an "emuframe", and executing it. This avoids the requirement of an emulator for the entire MIPS instruction set. Prior to this patch such emuframes are written to the user stack and executed from there. This patch moves FP branch delay emuframes off of the user stack and into a per-mm page. Allocating a page per-mm leaves userland with access to only what it had access to previously, and compared to other solutions is relatively simple. When a thread requires a delay slot emulation, it is allocated a frame. A thread may only have one frame allocated at any one time, since it may only ever be executing one instruction at any one time. In order to ensure that we can free up allocated frame later, its index is recorded in struct thread_struct. In the typical case, after executing the delay slot instruction we'll execute a break instruction with the BRK_MEMU code. This traps back to the kernel & leads to a call to do_dsemulret which frees the allocated frame & moves the user PC back to the instruction that would have executed following the emulated branch. In some cases the delay slot instruction may be invalid, such as a branch, or may trigger an exception. In these cases the BRK_MEMU break instruction will not be hit. In order to ensure that frames are freed this patch introduces dsemul_thread_cleanup() and calls it to free any allocated frame upon thread exit. If the instruction generated an exception & leads to a signal being delivered to the thread, or indeed if a signal simply happens to be delivered to the thread whilst it is executing from the struct emuframe, then we need to take care to exit the frame appropriately. This is done by either rolling back the user PC to the branch or advancing it to the continuation PC prior to signal delivery, using dsemul_thread_rollback(). If this were not done then a sigreturn would return to the struct emuframe, and if that frame had meanwhile been used in response to an emulated branch instruction within the signal handler then we would execute the wrong user code. Whilst a user could theoretically place something like a compact branch to self in a delay slot and cause their thread to become stuck in an infinite loop with the frame never being deallocated, this would: - Only affect the users single process. - Be architecturally invalid since there would be a branch in the delay slot, which is forbidden. - Be extremely unlikely to happen by mistake, and provide a program with no more ability to harm the system than a simple infinite loop would. If a thread requires a delay slot emulation & no frame is available to it (ie. the process has enough other threads that all frames are currently in use) then the thread joins a waitqueue. It will sleep until a frame is freed by another thread in the process. Since we now know whether a thread has an allocated frame due to our tracking of its index, the cookie field of struct emuframe is removed as we can be more certain whether we have a valid frame. Since a thread may only ever have a single frame at any given time, the epc field of struct emuframe is also removed & the PC to continue from is instead stored in struct thread_struct. Together these changes simplify & shrink struct emuframe somewhat, allowing twice as many frames to fit into the page allocated for them. The primary benefit of this patch is that we are now free to mark the user stack non-executable where that is possible. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: Maciej Rozycki <maciej.rozycki@imgtec.com> Cc: Faraz Shahbazker <faraz.shahbazker@imgtec.com> Cc: Raghu Gandham <raghu.gandham@imgtec.com> Cc: Matthew Fortune <matthew.fortune@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13764/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-07-08 19:06:19 +09:00
#include <asm/dsemul.h>
#include <asm/dsp.h>
#include <asm/fpu.h>
#include <asm/irq.h>
#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/mipsregs.h>
#include <asm/processor.h>
#include <asm/reg.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/elf.h>
#include <asm/isadep.h>
#include <asm/inst.h>
#include <asm/stacktrace.h>
#include <asm/irq_regs.h>
#ifdef CONFIG_HOTPLUG_CPU
void arch_cpu_idle_dead(void)
{
play_dead();
}
#endif
asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_kernel_thread(void);
void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
{
unsigned long status;
/* New thread loses kernel privileges. */
status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK);
status |= KU_USER;
regs->cp0_status = status;
lose_fpu(0);
clear_thread_flag(TIF_MSA_CTX_LIVE);
clear_used_math();
MIPS: Use per-mm page to execute branch delay slot instructions In some cases the kernel needs to execute an instruction from the delay slot of an emulated branch instruction. These cases include: - Emulated floating point branch instructions (bc1[ft]l?) for systems which don't include an FPU, or upon which the kernel is run with the "nofpu" parameter. - MIPSr6 systems running binaries targeting older revisions of the architecture, which may include branch instructions whose encodings are no longer valid in MIPSr6. Executing instructions from such delay slots is done by writing the instruction to memory followed by a trap, as part of an "emuframe", and executing it. This avoids the requirement of an emulator for the entire MIPS instruction set. Prior to this patch such emuframes are written to the user stack and executed from there. This patch moves FP branch delay emuframes off of the user stack and into a per-mm page. Allocating a page per-mm leaves userland with access to only what it had access to previously, and compared to other solutions is relatively simple. When a thread requires a delay slot emulation, it is allocated a frame. A thread may only have one frame allocated at any one time, since it may only ever be executing one instruction at any one time. In order to ensure that we can free up allocated frame later, its index is recorded in struct thread_struct. In the typical case, after executing the delay slot instruction we'll execute a break instruction with the BRK_MEMU code. This traps back to the kernel & leads to a call to do_dsemulret which frees the allocated frame & moves the user PC back to the instruction that would have executed following the emulated branch. In some cases the delay slot instruction may be invalid, such as a branch, or may trigger an exception. In these cases the BRK_MEMU break instruction will not be hit. In order to ensure that frames are freed this patch introduces dsemul_thread_cleanup() and calls it to free any allocated frame upon thread exit. If the instruction generated an exception & leads to a signal being delivered to the thread, or indeed if a signal simply happens to be delivered to the thread whilst it is executing from the struct emuframe, then we need to take care to exit the frame appropriately. This is done by either rolling back the user PC to the branch or advancing it to the continuation PC prior to signal delivery, using dsemul_thread_rollback(). If this were not done then a sigreturn would return to the struct emuframe, and if that frame had meanwhile been used in response to an emulated branch instruction within the signal handler then we would execute the wrong user code. Whilst a user could theoretically place something like a compact branch to self in a delay slot and cause their thread to become stuck in an infinite loop with the frame never being deallocated, this would: - Only affect the users single process. - Be architecturally invalid since there would be a branch in the delay slot, which is forbidden. - Be extremely unlikely to happen by mistake, and provide a program with no more ability to harm the system than a simple infinite loop would. If a thread requires a delay slot emulation & no frame is available to it (ie. the process has enough other threads that all frames are currently in use) then the thread joins a waitqueue. It will sleep until a frame is freed by another thread in the process. Since we now know whether a thread has an allocated frame due to our tracking of its index, the cookie field of struct emuframe is removed as we can be more certain whether we have a valid frame. Since a thread may only ever have a single frame at any given time, the epc field of struct emuframe is also removed & the PC to continue from is instead stored in struct thread_struct. Together these changes simplify & shrink struct emuframe somewhat, allowing twice as many frames to fit into the page allocated for them. The primary benefit of this patch is that we are now free to mark the user stack non-executable where that is possible. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: Maciej Rozycki <maciej.rozycki@imgtec.com> Cc: Faraz Shahbazker <faraz.shahbazker@imgtec.com> Cc: Raghu Gandham <raghu.gandham@imgtec.com> Cc: Matthew Fortune <matthew.fortune@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13764/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-07-08 19:06:19 +09:00
atomic_set(&current->thread.bd_emu_frame, BD_EMUFRAME_NONE);
init_dsp();
regs->cp0_epc = pc;
regs->regs[29] = sp;
}
MIPS: Use per-mm page to execute branch delay slot instructions In some cases the kernel needs to execute an instruction from the delay slot of an emulated branch instruction. These cases include: - Emulated floating point branch instructions (bc1[ft]l?) for systems which don't include an FPU, or upon which the kernel is run with the "nofpu" parameter. - MIPSr6 systems running binaries targeting older revisions of the architecture, which may include branch instructions whose encodings are no longer valid in MIPSr6. Executing instructions from such delay slots is done by writing the instruction to memory followed by a trap, as part of an "emuframe", and executing it. This avoids the requirement of an emulator for the entire MIPS instruction set. Prior to this patch such emuframes are written to the user stack and executed from there. This patch moves FP branch delay emuframes off of the user stack and into a per-mm page. Allocating a page per-mm leaves userland with access to only what it had access to previously, and compared to other solutions is relatively simple. When a thread requires a delay slot emulation, it is allocated a frame. A thread may only have one frame allocated at any one time, since it may only ever be executing one instruction at any one time. In order to ensure that we can free up allocated frame later, its index is recorded in struct thread_struct. In the typical case, after executing the delay slot instruction we'll execute a break instruction with the BRK_MEMU code. This traps back to the kernel & leads to a call to do_dsemulret which frees the allocated frame & moves the user PC back to the instruction that would have executed following the emulated branch. In some cases the delay slot instruction may be invalid, such as a branch, or may trigger an exception. In these cases the BRK_MEMU break instruction will not be hit. In order to ensure that frames are freed this patch introduces dsemul_thread_cleanup() and calls it to free any allocated frame upon thread exit. If the instruction generated an exception & leads to a signal being delivered to the thread, or indeed if a signal simply happens to be delivered to the thread whilst it is executing from the struct emuframe, then we need to take care to exit the frame appropriately. This is done by either rolling back the user PC to the branch or advancing it to the continuation PC prior to signal delivery, using dsemul_thread_rollback(). If this were not done then a sigreturn would return to the struct emuframe, and if that frame had meanwhile been used in response to an emulated branch instruction within the signal handler then we would execute the wrong user code. Whilst a user could theoretically place something like a compact branch to self in a delay slot and cause their thread to become stuck in an infinite loop with the frame never being deallocated, this would: - Only affect the users single process. - Be architecturally invalid since there would be a branch in the delay slot, which is forbidden. - Be extremely unlikely to happen by mistake, and provide a program with no more ability to harm the system than a simple infinite loop would. If a thread requires a delay slot emulation & no frame is available to it (ie. the process has enough other threads that all frames are currently in use) then the thread joins a waitqueue. It will sleep until a frame is freed by another thread in the process. Since we now know whether a thread has an allocated frame due to our tracking of its index, the cookie field of struct emuframe is removed as we can be more certain whether we have a valid frame. Since a thread may only ever have a single frame at any given time, the epc field of struct emuframe is also removed & the PC to continue from is instead stored in struct thread_struct. Together these changes simplify & shrink struct emuframe somewhat, allowing twice as many frames to fit into the page allocated for them. The primary benefit of this patch is that we are now free to mark the user stack non-executable where that is possible. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: Maciej Rozycki <maciej.rozycki@imgtec.com> Cc: Faraz Shahbazker <faraz.shahbazker@imgtec.com> Cc: Raghu Gandham <raghu.gandham@imgtec.com> Cc: Matthew Fortune <matthew.fortune@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13764/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-07-08 19:06:19 +09:00
void exit_thread(struct task_struct *tsk)
{
/*
* User threads may have allocated a delay slot emulation frame.
* If so, clean up that allocation.
*/
if (!(current->flags & PF_KTHREAD))
dsemul_thread_cleanup(tsk);
}
MIPS: fork: Fix MSA/FPU/DSP context duplication race There is a race in the MIPS fork code which allows the child to get a stale copy of parent MSA/FPU/DSP state that is active in hardware registers when the fork() is called. This is because copy_thread() saves the live register state into the child context only if the hardware is currently in use, apparently on the assumption that the hardware state cannot have been saved and disabled since the initial duplication of the task_struct. However preemption is certainly possible during this window. An example sequence of events is as follows: 1) The parent userland process puts important data into saved floating point registers ($f20-$f31), which are then dirty compared to the process' stored context. 2) The parent process calls fork() which does a clone system call. 3) In the kernel, do_fork() -> copy_process() -> dup_task_struct() -> arch_dup_task_struct() (which uses the weakly defined default implementation). This duplicates the parent process' task context, which includes a stale version of its FP context from when it was last saved, probably some time before (1). 4) At some point before copy_process() calls copy_thread(), such as when duplicating the memory map, the process is desceduled. Perhaps it is preempted asynchronously, or perhaps it sleeps while blocked on a mutex. The dirty FP state in the FP registers is saved to the parent process' context and the FPU is disabled. 5) When the process is rescheduled again it continues copying state until it gets to copy_thread(), which checks whether the FPU is in use, so that it can copy that dirty state to the child process' task context. Because of the deschedule however the FPU is not in use, so the child process' context is left with stale FP context from the last time the parent saved it (some time before (1)). 6) When the new child process is scheduled it reads the important data from the saved floating point register, and ends up doing a NULL pointer dereference as a result of the stale data. This use of saved floating point registers across function calls can be triggered fairly easily by explicitly using inline asm with a current (MIPS R2) compiler, but is far more likely to happen unintentionally with a MIPS R6 compiler where the FP registers are more likely to get used as scratch registers for storing non-fp data. It is easily fixed, in the same way that other architectures do it, by overriding the implementation of arch_dup_task_struct() to sync the dirty hardware state to the parent process' task context *prior* to duplicating it, rather than copying straight to the child process' task context in copy_thread(). Note, the FPU hardware is not disabled so the parent process may continue executing with the live register context, but now the child process is guaranteed to have an identical copy of it at that point. Signed-off-by: James Hogan <james.hogan@imgtec.com> Reported-by: Matthew Fortune <matthew.fortune@imgtec.com> Tested-by: Markos Chandras <markos.chandras@imgtec.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9075/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2015-01-19 19:30:54 +09:00
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
/*
* Save any process state which is live in hardware registers to the
* parent context prior to duplication. This prevents the new child
* state becoming stale if the parent is preempted before copy_thread()
* gets a chance to save the parent's live hardware registers to the
* child context.
*/
preempt_disable();
if (is_msa_enabled())
save_msa(current);
else if (is_fpu_owner())
_save_fp(current);
save_dsp(current);
preempt_enable();
*dst = *src;
return 0;
}
/*
* Copy architecture-specific thread state
*/
int copy_thread(unsigned long clone_flags, unsigned long usp,
unsigned long kthread_arg, struct task_struct *p)
{
struct thread_info *ti = task_thread_info(p);
struct pt_regs *childregs, *regs = current_pt_regs();
unsigned long childksp;
childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;
/* set up new TSS. */
childregs = (struct pt_regs *) childksp - 1;
/* Put the stack after the struct pt_regs. */
childksp = (unsigned long) childregs;
p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
unsigned long status = p->thread.cp0_status;
memset(childregs, 0, sizeof(struct pt_regs));
ti->addr_limit = KERNEL_DS;
p->thread.reg16 = usp; /* fn */
p->thread.reg17 = kthread_arg;
p->thread.reg29 = childksp;
p->thread.reg31 = (unsigned long) ret_from_kernel_thread;
#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
status = (status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) |
((status & (ST0_KUC | ST0_IEC)) << 2);
#else
status |= ST0_EXL;
#endif
childregs->cp0_status = status;
return 0;
}
/* user thread */
*childregs = *regs;
childregs->regs[7] = 0; /* Clear error flag */
childregs->regs[2] = 0; /* Child gets zero as return value */
if (usp)
childregs->regs[29] = usp;
ti->addr_limit = USER_DS;
p->thread.reg29 = (unsigned long) childregs;
p->thread.reg31 = (unsigned long) ret_from_fork;
/*
* New tasks lose permission to use the fpu. This accelerates context
* switching for most programs since they don't use the fpu.
*/
childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
clear_tsk_thread_flag(p, TIF_USEDFPU);
clear_tsk_thread_flag(p, TIF_USEDMSA);
clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE);
#ifdef CONFIG_MIPS_MT_FPAFF
clear_tsk_thread_flag(p, TIF_FPUBOUND);
#endif /* CONFIG_MIPS_MT_FPAFF */
MIPS: Use per-mm page to execute branch delay slot instructions In some cases the kernel needs to execute an instruction from the delay slot of an emulated branch instruction. These cases include: - Emulated floating point branch instructions (bc1[ft]l?) for systems which don't include an FPU, or upon which the kernel is run with the "nofpu" parameter. - MIPSr6 systems running binaries targeting older revisions of the architecture, which may include branch instructions whose encodings are no longer valid in MIPSr6. Executing instructions from such delay slots is done by writing the instruction to memory followed by a trap, as part of an "emuframe", and executing it. This avoids the requirement of an emulator for the entire MIPS instruction set. Prior to this patch such emuframes are written to the user stack and executed from there. This patch moves FP branch delay emuframes off of the user stack and into a per-mm page. Allocating a page per-mm leaves userland with access to only what it had access to previously, and compared to other solutions is relatively simple. When a thread requires a delay slot emulation, it is allocated a frame. A thread may only have one frame allocated at any one time, since it may only ever be executing one instruction at any one time. In order to ensure that we can free up allocated frame later, its index is recorded in struct thread_struct. In the typical case, after executing the delay slot instruction we'll execute a break instruction with the BRK_MEMU code. This traps back to the kernel & leads to a call to do_dsemulret which frees the allocated frame & moves the user PC back to the instruction that would have executed following the emulated branch. In some cases the delay slot instruction may be invalid, such as a branch, or may trigger an exception. In these cases the BRK_MEMU break instruction will not be hit. In order to ensure that frames are freed this patch introduces dsemul_thread_cleanup() and calls it to free any allocated frame upon thread exit. If the instruction generated an exception & leads to a signal being delivered to the thread, or indeed if a signal simply happens to be delivered to the thread whilst it is executing from the struct emuframe, then we need to take care to exit the frame appropriately. This is done by either rolling back the user PC to the branch or advancing it to the continuation PC prior to signal delivery, using dsemul_thread_rollback(). If this were not done then a sigreturn would return to the struct emuframe, and if that frame had meanwhile been used in response to an emulated branch instruction within the signal handler then we would execute the wrong user code. Whilst a user could theoretically place something like a compact branch to self in a delay slot and cause their thread to become stuck in an infinite loop with the frame never being deallocated, this would: - Only affect the users single process. - Be architecturally invalid since there would be a branch in the delay slot, which is forbidden. - Be extremely unlikely to happen by mistake, and provide a program with no more ability to harm the system than a simple infinite loop would. If a thread requires a delay slot emulation & no frame is available to it (ie. the process has enough other threads that all frames are currently in use) then the thread joins a waitqueue. It will sleep until a frame is freed by another thread in the process. Since we now know whether a thread has an allocated frame due to our tracking of its index, the cookie field of struct emuframe is removed as we can be more certain whether we have a valid frame. Since a thread may only ever have a single frame at any given time, the epc field of struct emuframe is also removed & the PC to continue from is instead stored in struct thread_struct. Together these changes simplify & shrink struct emuframe somewhat, allowing twice as many frames to fit into the page allocated for them. The primary benefit of this patch is that we are now free to mark the user stack non-executable where that is possible. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: Maciej Rozycki <maciej.rozycki@imgtec.com> Cc: Faraz Shahbazker <faraz.shahbazker@imgtec.com> Cc: Raghu Gandham <raghu.gandham@imgtec.com> Cc: Matthew Fortune <matthew.fortune@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13764/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-07-08 19:06:19 +09:00
atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE);
if (clone_flags & CLONE_SETTLS)
ti->tp_value = regs->regs[7];
return 0;
}
#ifdef CONFIG_CC_STACKPROTECTOR
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
struct mips_frame_info {
void *func;
unsigned long func_size;
int frame_size;
int pc_offset;
};
#define J_TARGET(pc,target) \
(((unsigned long)(pc) & 0xf0000000) | ((target) << 2))
MIPS: Calculate microMIPS ra properly when unwinding the stack commit bb9bc4689b9c635714fbcd5d335bad9934a7ebfc upstream. get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2016-11-08 00:07:06 +09:00
static inline int is_ra_save_ins(union mips_instruction *ip, int *poff)
{
#ifdef CONFIG_CPU_MICROMIPS
/*
* swsp ra,offset
* swm16 reglist,offset(sp)
* swm32 reglist,offset(sp)
* sw32 ra,offset(sp)
* jradiussp - NOT SUPPORTED
*
* microMIPS is way more fun...
*/
if (mm_insn_16bit(ip->halfword[1])) {
MIPS: Calculate microMIPS ra properly when unwinding the stack commit bb9bc4689b9c635714fbcd5d335bad9934a7ebfc upstream. get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2016-11-08 00:07:06 +09:00
switch (ip->mm16_r5_format.opcode) {
case mm_swsp16_op:
if (ip->mm16_r5_format.rt != 31)
return 0;
*poff = ip->mm16_r5_format.simmediate;
*poff = (*poff << 2) / sizeof(ulong);
return 1;
case mm_pool16c_op:
switch (ip->mm16_m_format.func) {
case mm_swm16_op:
*poff = ip->mm16_m_format.imm;
*poff += 1 + ip->mm16_m_format.rlist;
*poff = (*poff << 2) / sizeof(ulong);
return 1;
default:
return 0;
}
default:
return 0;
}
}
MIPS: Calculate microMIPS ra properly when unwinding the stack commit bb9bc4689b9c635714fbcd5d335bad9934a7ebfc upstream. get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2016-11-08 00:07:06 +09:00
switch (ip->i_format.opcode) {
case mm_sw32_op:
if (ip->i_format.rs != 29)
return 0;
if (ip->i_format.rt != 31)
return 0;
*poff = ip->i_format.simmediate / sizeof(ulong);
return 1;
case mm_pool32b_op:
switch (ip->mm_m_format.func) {
case mm_swm32_func:
if (ip->mm_m_format.rd < 0x10)
return 0;
if (ip->mm_m_format.base != 29)
return 0;
*poff = ip->mm_m_format.simmediate;
*poff += (ip->mm_m_format.rd & 0xf) * sizeof(u32);
*poff /= sizeof(ulong);
return 1;
default:
return 0;
}
default:
return 0;
}
#else
/* sw / sd $ra, offset($sp) */
MIPS: Calculate microMIPS ra properly when unwinding the stack commit bb9bc4689b9c635714fbcd5d335bad9934a7ebfc upstream. get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2016-11-08 00:07:06 +09:00
if ((ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op) &&
ip->i_format.rs == 29 && ip->i_format.rt == 31) {
*poff = ip->i_format.simmediate / sizeof(ulong);
return 1;
}
return 0;
#endif
}
static inline int is_jump_ins(union mips_instruction *ip)
{
#ifdef CONFIG_CPU_MICROMIPS
/*
* jr16,jrc,jalr16,jalr16
* jal
* jalr/jr,jalr.hb/jr.hb,jalrs,jalrs.hb
* jraddiusp - NOT SUPPORTED
*
* microMIPS is kind of more fun...
*/
if (mm_insn_16bit(ip->halfword[1])) {
if ((ip->mm16_r5_format.opcode == mm_pool16c_op &&
(ip->mm16_r5_format.rt & mm_jr16_op) == mm_jr16_op))
return 1;
return 0;
}
if (ip->j_format.opcode == mm_j32_op)
return 1;
if (ip->j_format.opcode == mm_jal32_op)
return 1;
if (ip->r_format.opcode != mm_pool32a_op ||
ip->r_format.func != mm_pool32axf_op)
return 0;
return ((ip->u_format.uimmediate >> 6) & mm_jalr_op) == mm_jalr_op;
#else
if (ip->j_format.opcode == j_op)
return 1;
if (ip->j_format.opcode == jal_op)
return 1;
if (ip->r_format.opcode != spec_op)
return 0;
return ip->r_format.func == jalr_op || ip->r_format.func == jr_op;
#endif
}
static inline int is_sp_move_ins(union mips_instruction *ip)
{
#ifdef CONFIG_CPU_MICROMIPS
/*
* addiusp -imm
* addius5 sp,-imm
* addiu32 sp,sp,-imm
* jradiussp - NOT SUPPORTED
*
* microMIPS is not more fun...
*/
if (mm_insn_16bit(ip->halfword[1])) {
return (ip->mm16_r3_format.opcode == mm_pool16d_op &&
ip->mm16_r3_format.simmediate && mm_addiusp_func) ||
(ip->mm16_r5_format.opcode == mm_pool16d_op &&
ip->mm16_r5_format.rt == 29);
}
return ip->mm_i_format.opcode == mm_addiu32_op &&
ip->mm_i_format.rt == 29 && ip->mm_i_format.rs == 29;
#else
/* addiu/daddiu sp,sp,-imm */
if (ip->i_format.rs != 29 || ip->i_format.rt != 29)
return 0;
if (ip->i_format.opcode == addiu_op || ip->i_format.opcode == daddiu_op)
return 1;
#endif
return 0;
}
static int get_frame_info(struct mips_frame_info *info)
{
bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS);
union mips_instruction insn, *ip, *ip_end;
const unsigned int max_insns = 128;
unsigned int i;
info->pc_offset = -1;
info->frame_size = 0;
ip = (void *)msk_isa16_mode((ulong)info->func);
if (!ip)
goto err;
ip_end = (void *)ip + info->func_size;
for (i = 0; i < max_insns && ip < ip_end; i++, ip++) {
if (is_mmips && mm_insn_16bit(ip->halfword[0])) {
insn.halfword[0] = 0;
insn.halfword[1] = ip->halfword[0];
} else if (is_mmips) {
insn.halfword[0] = ip->halfword[1];
insn.halfword[1] = ip->halfword[0];
} else {
insn.word = ip->word;
}
if (is_jump_ins(&insn))
break;
if (!info->frame_size) {
if (is_sp_move_ins(&insn))
{
#ifdef CONFIG_CPU_MICROMIPS
if (mm_insn_16bit(ip->halfword[0]))
{
unsigned short tmp;
if (ip->halfword[0] & mm_addiusp_func)
{
tmp = (((ip->halfword[0] >> 1) & 0x1ff) << 2);
info->frame_size = -(signed short)(tmp | ((tmp & 0x100) ? 0xfe00 : 0));
} else {
tmp = (ip->halfword[0] >> 1);
info->frame_size = -(signed short)(tmp & 0xf);
}
ip = (void *) &ip->halfword[1];
ip--;
} else
#endif
info->frame_size = - ip->i_format.simmediate;
}
continue;
}
MIPS: Calculate microMIPS ra properly when unwinding the stack commit bb9bc4689b9c635714fbcd5d335bad9934a7ebfc upstream. get_frame_info() calculates the offset of the return address within a stack frame simply by dividing a the bottom 16 bits of the instruction, treated as a signed integer, by the size of a long. Whilst this works for MIPS32 & MIPS64 ISAs where the sw or sd instructions are used, it's incorrect for microMIPS where encodings differ. The result is that we typically completely fail to unwind the stack on microMIPS. Fix this by adjusting is_ra_save_ins() to calculate the return address offset, and take into account the various different encodings there in the same place as we consider whether an instruction is storing the ra/$31 register. With this we are now able to unwind the stack for kernels targetting the microMIPS ISA, for example we can produce: Call Trace: [<80109e1f>] show_stack+0x63/0x7c [<8011ea17>] __warn+0x9b/0xac [<8011ea45>] warn_slowpath_fmt+0x1d/0x20 [<8013fe53>] register_console+0x43/0x314 [<8067c58d>] of_setup_earlycon+0x1dd/0x1ec [<8067f63f>] early_init_dt_scan_chosen_stdout+0xe7/0xf8 [<8066c115>] do_early_param+0x75/0xac [<801302f9>] parse_args+0x1dd/0x308 [<8066c459>] parse_early_options+0x25/0x28 [<8066c48b>] parse_early_param+0x2f/0x38 [<8066e8cf>] setup_arch+0x113/0x488 [<8066c4f3>] start_kernel+0x57/0x328 ---[ end trace 0000000000000000 ]--- Whereas previously we only produced: Call Trace: [<80109e1f>] show_stack+0x63/0x7c ---[ end trace 0000000000000000 ]--- Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 34c2f668d0f6 ("MIPS: microMIPS: Add unaligned access support.") Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com> Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14532/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2016-11-08 00:07:06 +09:00
if (info->pc_offset == -1 &&
is_ra_save_ins(&insn, &info->pc_offset))
break;
}
if (info->frame_size && info->pc_offset >= 0) /* nested */
return 0;
if (info->pc_offset < 0) /* leaf */
return 1;
/* prologue seems bogus... */
err:
return -1;
}
static struct mips_frame_info schedule_mfi __read_mostly;
#ifdef CONFIG_KALLSYMS
static unsigned long get___schedule_addr(void)
{
return kallsyms_lookup_name("__schedule");
}
#else
static unsigned long get___schedule_addr(void)
{
union mips_instruction *ip = (void *)schedule;
int max_insns = 8;
int i;
for (i = 0; i < max_insns; i++, ip++) {
if (ip->j_format.opcode == j_op)
return J_TARGET(ip, ip->j_format.target);
}
return 0;
}
#endif
static int __init frame_info_init(void)
{
unsigned long size = 0;
#ifdef CONFIG_KALLSYMS
unsigned long ofs;
#endif
unsigned long addr;
addr = get___schedule_addr();
if (!addr)
addr = (unsigned long)schedule;
#ifdef CONFIG_KALLSYMS
kallsyms_lookup_size_offset(addr, &size, &ofs);
#endif
schedule_mfi.func = (void *)addr;
schedule_mfi.func_size = size;
get_frame_info(&schedule_mfi);
/*
* Without schedule() frame info, result given by
* thread_saved_pc() and get_wchan() are not reliable.
*/
if (schedule_mfi.pc_offset < 0)
printk("Can't analyze schedule() prologue at %p\n", schedule);
return 0;
}
arch_initcall(frame_info_init);
/*
* Return saved PC of a blocked thread.
*/
unsigned long thread_saved_pc(struct task_struct *tsk)
{
struct thread_struct *t = &tsk->thread;
/* New born processes are a special case */
if (t->reg31 == (unsigned long) ret_from_fork)
return t->reg31;
if (schedule_mfi.pc_offset < 0)
return 0;
return ((unsigned long *)t->reg29)[schedule_mfi.pc_offset];
}
#ifdef CONFIG_KALLSYMS
/* generic stack unwinding function */
unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
unsigned long *sp,
unsigned long pc,
unsigned long *ra)
{
MIPS: IRQ Stack: Unwind IRQ stack onto task stack [ Upstream commit db8466c581cca1a08b505f1319c3ecd246f16fa8 ] When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Sasha Levin <alexander.levin@verizon.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-03-21 23:52:25 +09:00
unsigned long low, high, irq_stack_high;
struct mips_frame_info info;
unsigned long size, ofs;
MIPS: IRQ Stack: Unwind IRQ stack onto task stack [ Upstream commit db8466c581cca1a08b505f1319c3ecd246f16fa8 ] When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Sasha Levin <alexander.levin@verizon.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-03-21 23:52:25 +09:00
struct pt_regs *regs;
int leaf;
if (!stack_page)
return 0;
/*
MIPS: IRQ Stack: Unwind IRQ stack onto task stack [ Upstream commit db8466c581cca1a08b505f1319c3ecd246f16fa8 ] When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Sasha Levin <alexander.levin@verizon.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-03-21 23:52:25 +09:00
* IRQ stacks start at IRQ_STACK_START
* task stacks at THREAD_SIZE - 32
*/
MIPS: IRQ Stack: Unwind IRQ stack onto task stack [ Upstream commit db8466c581cca1a08b505f1319c3ecd246f16fa8 ] When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Sasha Levin <alexander.levin@verizon.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-03-21 23:52:25 +09:00
low = stack_page;
if (!preemptible() && on_irq_stack(raw_smp_processor_id(), *sp)) {
high = stack_page + IRQ_STACK_START;
irq_stack_high = high;
} else {
high = stack_page + THREAD_SIZE - 32;
irq_stack_high = 0;
}
/*
* If we reached the top of the interrupt stack, start unwinding
* the interrupted task stack.
*/
if (unlikely(*sp == irq_stack_high)) {
unsigned long task_sp = *(unsigned long *)*sp;
/*
* Check that the pointer saved in the IRQ stack head points to
* something within the stack of the current task
*/
if (!object_is_on_stack((void *)task_sp))
return 0;
/*
* Follow pointer to tasks kernel stack frame where interrupted
* state was saved.
*/
regs = (struct pt_regs *)task_sp;
pc = regs->cp0_epc;
if (!user_mode(regs) && __kernel_text_address(pc)) {
*sp = regs->regs[29];
*ra = regs->regs[31];
return pc;
}
return 0;
}
if (!kallsyms_lookup_size_offset(pc, &size, &ofs))
return 0;
/*
* Return ra if an exception occurred at the first instruction
*/
if (unlikely(ofs == 0)) {
pc = *ra;
*ra = 0;
return pc;
}
info.func = (void *)(pc - ofs);
info.func_size = ofs; /* analyze from start to ofs */
leaf = get_frame_info(&info);
if (leaf < 0)
return 0;
MIPS: IRQ Stack: Unwind IRQ stack onto task stack [ Upstream commit db8466c581cca1a08b505f1319c3ecd246f16fa8 ] When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Cc: Masanari Iida <standby24x7@gmail.com> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jason A. Donenfeld <jason@zx2c4.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Sasha Levin <alexander.levin@verizon.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-03-21 23:52:25 +09:00
if (*sp < low || *sp + info.frame_size > high)
return 0;
if (leaf)
/*
* For some extreme cases, get_frame_info() can
* consider wrongly a nested function as a leaf
* one. In that cases avoid to return always the
* same value.
*/
pc = pc != *ra ? *ra : 0;
else
pc = ((unsigned long *)(*sp))[info.pc_offset];
*sp += info.frame_size;
*ra = 0;
return __kernel_text_address(pc) ? pc : 0;
}
EXPORT_SYMBOL(unwind_stack_by_address);
/* used by show_backtrace() */
unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
unsigned long pc, unsigned long *ra)
{
unsigned long stack_page = 0;
int cpu;
for_each_possible_cpu(cpu) {
if (on_irq_stack(cpu, *sp)) {
stack_page = (unsigned long)irq_stack[cpu];
break;
}
}
if (!stack_page)
stack_page = (unsigned long)task_stack_page(task);
return unwind_stack_by_address(stack_page, sp, pc, ra);
}
#endif
/*
* get_wchan - a maintenance nightmare^W^Wpain in the ass ...
*/
unsigned long get_wchan(struct task_struct *task)
{
unsigned long pc = 0;
#ifdef CONFIG_KALLSYMS
unsigned long sp;
unsigned long ra = 0;
#endif
if (!task || task == current || task->state == TASK_RUNNING)
goto out;
if (!task_stack_page(task))
goto out;
pc = thread_saved_pc(task);
#ifdef CONFIG_KALLSYMS
sp = task->thread.reg29 + schedule_mfi.frame_size;
while (in_sched_functions(pc))
pc = unwind_stack(task, &sp, pc, &ra);
#endif
out:
return pc;
}
/*
* Don't forget that the stack pointer must be aligned on a 8 bytes
* boundary for 32-bits ABI and 16 bytes for 64-bits ABI.
*/
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
sp -= get_random_int() & ~PAGE_MASK;
return sp & ALMASK;
}
MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() commit b63e132b6433a41cf311e8bc382d33fd2b73b505 upstream. The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [<f7c76c1c>] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [<f845bd33>] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [<e56ea39f>] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [<e88bbcae>] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [<b2ada1c7>] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [<c7521934>] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [<ab9fc705>] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [<cb259b74>] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [<b3fce717>] exit_mmap+0x76/0xea [42760.890428] [<c4c8a2f6>] mmput+0x80/0x11a [42760.896632] [<a41a08f4>] do_exit+0x1f4/0x80c [42760.903158] [<ee01cef6>] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton <paul.burton@mips.com> Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan <jhogan@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Huacai Chen <chenhc@lemote.com> Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") [ Huacai: backported to 4.9: Replace "call_single_data_t" with "struct call_single_data" ] Signed-off-by: Huacai Chen <chenhc@lemote.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-23 02:55:46 +09:00
static DEFINE_PER_CPU(struct call_single_data, backtrace_csd);
static struct cpumask backtrace_csd_busy;
MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() commit b63e132b6433a41cf311e8bc382d33fd2b73b505 upstream. The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [<f7c76c1c>] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [<f845bd33>] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [<e56ea39f>] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [<e88bbcae>] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [<b2ada1c7>] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [<c7521934>] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [<ab9fc705>] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [<cb259b74>] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [<b3fce717>] exit_mmap+0x76/0xea [42760.890428] [<c4c8a2f6>] mmput+0x80/0x11a [42760.896632] [<a41a08f4>] do_exit+0x1f4/0x80c [42760.903158] [<ee01cef6>] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton <paul.burton@mips.com> Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan <jhogan@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Huacai Chen <chenhc@lemote.com> Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") [ Huacai: backported to 4.9: Replace "call_single_data_t" with "struct call_single_data" ] Signed-off-by: Huacai Chen <chenhc@lemote.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-23 02:55:46 +09:00
static void handle_backtrace(void *info)
{
nmi_cpu_backtrace(get_irq_regs());
cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy);
}
MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() commit b63e132b6433a41cf311e8bc382d33fd2b73b505 upstream. The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [<f7c76c1c>] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [<f845bd33>] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [<e56ea39f>] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [<e88bbcae>] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [<b2ada1c7>] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [<c7521934>] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [<ab9fc705>] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [<cb259b74>] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [<b3fce717>] exit_mmap+0x76/0xea [42760.890428] [<c4c8a2f6>] mmput+0x80/0x11a [42760.896632] [<a41a08f4>] do_exit+0x1f4/0x80c [42760.903158] [<ee01cef6>] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton <paul.burton@mips.com> Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan <jhogan@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Huacai Chen <chenhc@lemote.com> Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") [ Huacai: backported to 4.9: Replace "call_single_data_t" with "struct call_single_data" ] Signed-off-by: Huacai Chen <chenhc@lemote.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-23 02:55:46 +09:00
static void raise_backtrace(cpumask_t *mask)
{
MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() commit b63e132b6433a41cf311e8bc382d33fd2b73b505 upstream. The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [<f7c76c1c>] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [<f845bd33>] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [<e56ea39f>] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [<e88bbcae>] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [<b2ada1c7>] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [<c7521934>] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [<ab9fc705>] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [<cb259b74>] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [<b3fce717>] exit_mmap+0x76/0xea [42760.890428] [<c4c8a2f6>] mmput+0x80/0x11a [42760.896632] [<a41a08f4>] do_exit+0x1f4/0x80c [42760.903158] [<ee01cef6>] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton <paul.burton@mips.com> Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan <jhogan@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Huacai Chen <chenhc@lemote.com> Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") [ Huacai: backported to 4.9: Replace "call_single_data_t" with "struct call_single_data" ] Signed-off-by: Huacai Chen <chenhc@lemote.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-23 02:55:46 +09:00
struct call_single_data *csd;
int cpu;
nmi_backtrace: add more trigger_*_cpu_backtrace() methods Patch series "improvements to the nmi_backtrace code" v9. This patch series modifies the trigger_xxx_backtrace() NMI-based remote backtracing code to make it more flexible, and makes a few small improvements along the way. The motivation comes from the task isolation code, where there are scenarios where we want to be able to diagnose a case where some cpu is about to interrupt a task-isolated cpu. It can be helpful to see both where the interrupting cpu is, and also an approximation of where the cpu that is being interrupted is. The nmi_backtrace framework allows us to discover the stack of the interrupted cpu. I've tested that the change works as desired on tile, and build-tested x86, arm, mips, and sparc64. For x86 I confirmed that the generic cpuidle stuff as well as the architecture-specific routines are in the new cpuidle section. For arm, mips, and sparc I just build-tested it and made sure the generic cpuidle routines were in the new cpuidle section, but I didn't attempt to figure out which the platform-specific idle routines might be. That might be more usefully done by someone with platform experience in follow-up patches. This patch (of 4): Currently you can only request a backtrace of either all cpus, or all cpus but yourself. It can also be helpful to request a remote backtrace of a single cpu, and since we want that, the logical extension is to support a cpumask as the underlying primitive. This change modifies the existing lib/nmi_backtrace.c code to take a cpumask as its basic primitive, and modifies the linux/nmi.h code to use the new "cpumask" method instead. The existing clients of nmi_backtrace (arm and x86) are converted to using the new cpumask approach in this change. The other users of the backtracing API (sparc64 and mips) are converted to use the cpumask approach rather than the all/allbutself approach. The mips code ignored the "include_self" boolean but with this change it will now also dump a local backtrace if requested. Link: http://lkml.kernel.org/r/1472487169-14923-2-git-send-email-cmetcalf@mellanox.com Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com> Tested-by: Daniel Thompson <daniel.thompson@linaro.org> [arm] Reviewed-by: Aaron Tomlin <atomlin@redhat.com> Reviewed-by: Petr Mladek <pmladek@suse.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Russell King <linux@arm.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-08 09:02:45 +09:00
MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() commit b63e132b6433a41cf311e8bc382d33fd2b73b505 upstream. The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [<f7c76c1c>] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [<f845bd33>] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [<e56ea39f>] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [<e88bbcae>] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [<b2ada1c7>] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [<c7521934>] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [<ab9fc705>] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [<cb259b74>] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [<b3fce717>] exit_mmap+0x76/0xea [42760.890428] [<c4c8a2f6>] mmput+0x80/0x11a [42760.896632] [<a41a08f4>] do_exit+0x1f4/0x80c [42760.903158] [<ee01cef6>] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton <paul.burton@mips.com> Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan <jhogan@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Huacai Chen <chenhc@lemote.com> Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") [ Huacai: backported to 4.9: Replace "call_single_data_t" with "struct call_single_data" ] Signed-off-by: Huacai Chen <chenhc@lemote.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-23 02:55:46 +09:00
for_each_cpu(cpu, mask) {
/*
* If we previously sent an IPI to the target CPU & it hasn't
* cleared its bit in the busy cpumask then it didn't handle
* our previous IPI & it's not safe for us to reuse the
* call_single_data_t.
*/
if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) {
pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n",
cpu);
continue;
}
nmi_backtrace: add more trigger_*_cpu_backtrace() methods Patch series "improvements to the nmi_backtrace code" v9. This patch series modifies the trigger_xxx_backtrace() NMI-based remote backtracing code to make it more flexible, and makes a few small improvements along the way. The motivation comes from the task isolation code, where there are scenarios where we want to be able to diagnose a case where some cpu is about to interrupt a task-isolated cpu. It can be helpful to see both where the interrupting cpu is, and also an approximation of where the cpu that is being interrupted is. The nmi_backtrace framework allows us to discover the stack of the interrupted cpu. I've tested that the change works as desired on tile, and build-tested x86, arm, mips, and sparc64. For x86 I confirmed that the generic cpuidle stuff as well as the architecture-specific routines are in the new cpuidle section. For arm, mips, and sparc I just build-tested it and made sure the generic cpuidle routines were in the new cpuidle section, but I didn't attempt to figure out which the platform-specific idle routines might be. That might be more usefully done by someone with platform experience in follow-up patches. This patch (of 4): Currently you can only request a backtrace of either all cpus, or all cpus but yourself. It can also be helpful to request a remote backtrace of a single cpu, and since we want that, the logical extension is to support a cpumask as the underlying primitive. This change modifies the existing lib/nmi_backtrace.c code to take a cpumask as its basic primitive, and modifies the linux/nmi.h code to use the new "cpumask" method instead. The existing clients of nmi_backtrace (arm and x86) are converted to using the new cpumask approach in this change. The other users of the backtracing API (sparc64 and mips) are converted to use the cpumask approach rather than the all/allbutself approach. The mips code ignored the "include_self" boolean but with this change it will now also dump a local backtrace if requested. Link: http://lkml.kernel.org/r/1472487169-14923-2-git-send-email-cmetcalf@mellanox.com Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com> Tested-by: Daniel Thompson <daniel.thompson@linaro.org> [arm] Reviewed-by: Aaron Tomlin <atomlin@redhat.com> Reviewed-by: Petr Mladek <pmladek@suse.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Russell King <linux@arm.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-08 09:02:45 +09:00
MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() commit b63e132b6433a41cf311e8bc382d33fd2b73b505 upstream. The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [<f7c76c1c>] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [<f845bd33>] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [<e56ea39f>] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [<e88bbcae>] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [<b2ada1c7>] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [<c7521934>] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [<ab9fc705>] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [<cb259b74>] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [<b3fce717>] exit_mmap+0x76/0xea [42760.890428] [<c4c8a2f6>] mmput+0x80/0x11a [42760.896632] [<a41a08f4>] do_exit+0x1f4/0x80c [42760.903158] [<ee01cef6>] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton <paul.burton@mips.com> Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan <jhogan@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Huacai Chen <chenhc@lemote.com> Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") [ Huacai: backported to 4.9: Replace "call_single_data_t" with "struct call_single_data" ] Signed-off-by: Huacai Chen <chenhc@lemote.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-23 02:55:46 +09:00
csd = &per_cpu(backtrace_csd, cpu);
csd->func = handle_backtrace;
smp_call_function_single_async(cpu, csd);
}
}
nmi_backtrace: add more trigger_*_cpu_backtrace() methods Patch series "improvements to the nmi_backtrace code" v9. This patch series modifies the trigger_xxx_backtrace() NMI-based remote backtracing code to make it more flexible, and makes a few small improvements along the way. The motivation comes from the task isolation code, where there are scenarios where we want to be able to diagnose a case where some cpu is about to interrupt a task-isolated cpu. It can be helpful to see both where the interrupting cpu is, and also an approximation of where the cpu that is being interrupted is. The nmi_backtrace framework allows us to discover the stack of the interrupted cpu. I've tested that the change works as desired on tile, and build-tested x86, arm, mips, and sparc64. For x86 I confirmed that the generic cpuidle stuff as well as the architecture-specific routines are in the new cpuidle section. For arm, mips, and sparc I just build-tested it and made sure the generic cpuidle routines were in the new cpuidle section, but I didn't attempt to figure out which the platform-specific idle routines might be. That might be more usefully done by someone with platform experience in follow-up patches. This patch (of 4): Currently you can only request a backtrace of either all cpus, or all cpus but yourself. It can also be helpful to request a remote backtrace of a single cpu, and since we want that, the logical extension is to support a cpumask as the underlying primitive. This change modifies the existing lib/nmi_backtrace.c code to take a cpumask as its basic primitive, and modifies the linux/nmi.h code to use the new "cpumask" method instead. The existing clients of nmi_backtrace (arm and x86) are converted to using the new cpumask approach in this change. The other users of the backtracing API (sparc64 and mips) are converted to use the cpumask approach rather than the all/allbutself approach. The mips code ignored the "include_self" boolean but with this change it will now also dump a local backtrace if requested. Link: http://lkml.kernel.org/r/1472487169-14923-2-git-send-email-cmetcalf@mellanox.com Signed-off-by: Chris Metcalf <cmetcalf@mellanox.com> Tested-by: Daniel Thompson <daniel.thompson@linaro.org> [arm] Reviewed-by: Aaron Tomlin <atomlin@redhat.com> Reviewed-by: Petr Mladek <pmladek@suse.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Russell King <linux@arm.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-10-08 09:02:45 +09:00
MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() commit b63e132b6433a41cf311e8bc382d33fd2b73b505 upstream. The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [<f7c76c1c>] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [<f845bd33>] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [<e56ea39f>] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [<e88bbcae>] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [<b2ada1c7>] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [<c7521934>] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [<ab9fc705>] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [<cb259b74>] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [<b3fce717>] exit_mmap+0x76/0xea [42760.890428] [<c4c8a2f6>] mmput+0x80/0x11a [42760.896632] [<a41a08f4>] do_exit+0x1f4/0x80c [42760.903158] [<ee01cef6>] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton <paul.burton@mips.com> Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan <jhogan@kernel.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Huacai Chen <chenhc@lemote.com> Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") [ Huacai: backported to 4.9: Replace "call_single_data_t" with "struct call_single_data" ] Signed-off-by: Huacai Chen <chenhc@lemote.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-06-23 02:55:46 +09:00
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{
nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
}
2015-01-08 21:17:37 +09:00
int mips_get_process_fp_mode(struct task_struct *task)
{
int value = 0;
if (!test_tsk_thread_flag(task, TIF_32BIT_FPREGS))
value |= PR_FP_MODE_FR;
if (test_tsk_thread_flag(task, TIF_HYBRID_FPREGS))
value |= PR_FP_MODE_FRE;
return value;
}
MIPS: Force CPUs to lose FP context during mode switches Commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") added support for the PR_SET_FP_MODE prctl, which allows a userland program to modify its FP mode at runtime. This is most notably required if dynamic linking leads to the FP mode requirement changing at runtime from that indicated in the initial executable's ELF header. In order to avoid overhead in the general FP context restore code, it aimed to have threads in the process become unable to enable the FPU during a mode switch & have the thread calling the prctl syscall wait for all other threads in the process to be context switched at least once. Once that happens we can know that no thread in the process whose mode will be switched has live FP context, and it's safe to perform the mode switch. However in the (rare) case of modeswitches occurring in multithreaded programs this can lead to indeterminate delays for the thread invoking the prctl syscall, and the code monitoring for those context switches was woefully inadequate for all but the simplest cases. Fix this by broadcasting an IPI if other CPUs may have live FP context for an affected thread, with a handler causing those CPUs to relinquish their FPU ownership. Threads will then be allowed to continue running but will stall on the wait_on_atomic_t in enable_restore_fp_context if they attempt to use FP again whilst the mode switch is still in progress. The end result is less fragile poking at scheduler context switch counts & a more expedient completion of the mode switch. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Maciej W. Rozycki <macro@imgtec.com> Cc: Adam Buchbinder <adam.buchbinder@gmail.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: stable <stable@vger.kernel.org> # v4.0+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13145/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-04-21 20:43:58 +09:00
static void prepare_for_fp_mode_switch(void *info)
{
struct mm_struct *mm = info;
if (current->mm == mm)
lose_fpu(1);
}
2015-01-08 21:17:37 +09:00
int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
{
const unsigned int known_bits = PR_FP_MODE_FR | PR_FP_MODE_FRE;
struct task_struct *t;
MIPS: Force CPUs to lose FP context during mode switches Commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") added support for the PR_SET_FP_MODE prctl, which allows a userland program to modify its FP mode at runtime. This is most notably required if dynamic linking leads to the FP mode requirement changing at runtime from that indicated in the initial executable's ELF header. In order to avoid overhead in the general FP context restore code, it aimed to have threads in the process become unable to enable the FPU during a mode switch & have the thread calling the prctl syscall wait for all other threads in the process to be context switched at least once. Once that happens we can know that no thread in the process whose mode will be switched has live FP context, and it's safe to perform the mode switch. However in the (rare) case of modeswitches occurring in multithreaded programs this can lead to indeterminate delays for the thread invoking the prctl syscall, and the code monitoring for those context switches was woefully inadequate for all but the simplest cases. Fix this by broadcasting an IPI if other CPUs may have live FP context for an affected thread, with a handler causing those CPUs to relinquish their FPU ownership. Threads will then be allowed to continue running but will stall on the wait_on_atomic_t in enable_restore_fp_context if they attempt to use FP again whilst the mode switch is still in progress. The end result is less fragile poking at scheduler context switch counts & a more expedient completion of the mode switch. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Maciej W. Rozycki <macro@imgtec.com> Cc: Adam Buchbinder <adam.buchbinder@gmail.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: stable <stable@vger.kernel.org> # v4.0+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13145/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-04-21 20:43:58 +09:00
int max_users;
2015-01-08 21:17:37 +09:00
MIPS: Validate PR_SET_FP_MODE prctl(2) requests against the ABI of the task commit b67336eee3fcb8ecedc6c13e2bf88aacfa3151e2 upstream. Fix an API loophole introduced with commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS"), where the caller of prctl(2) is incorrectly allowed to make a change to CP0.Status.FR or CP0.Config5.FRE register bits even if CONFIG_MIPS_O32_FP64_SUPPORT has not been enabled, despite that an executable requesting the mode requested via ELF file annotation would not be allowed to run in the first place, or for n64 and n64 ABI tasks which do not have non-default modes defined at all. Add suitable checks to `mips_set_process_fp_mode' and bail out if an invalid mode change has been requested for the ABI in effect, even if the FPU hardware or emulation would otherwise allow it. Always succeed however without taking any further action if the mode requested is the same as one already in effect, regardless of whether any mode change, should it be requested, would actually be allowed for the task concerned. Signed-off-by: Maciej W. Rozycki <macro@mips.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Paul Burton <paul.burton@mips.com> Cc: James Hogan <james.hogan@mips.com> Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17800/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-11-27 18:33:03 +09:00
/* If nothing to change, return right away, successfully. */
if (value == mips_get_process_fp_mode(task))
return 0;
/* Only accept a mode change if 64-bit FP enabled for o32. */
if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
return -EOPNOTSUPP;
/* And only for o32 tasks. */
if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
return -EOPNOTSUPP;
2015-01-08 21:17:37 +09:00
/* Check the value is valid */
if (value & ~known_bits)
return -EOPNOTSUPP;
MIPS: prctl: Disallow FRE without FR with PR_SET_FP_MODE requests commit 28e4213dd331e944e7fca1954a946829162ed9d4 upstream. Having PR_FP_MODE_FRE (i.e. Config5.FRE) set without PR_FP_MODE_FR (i.e. Status.FR) is not supported as the lone purpose of Config5.FRE is to emulate Status.FR=0 handling on FPU hardware that has Status.FR=1 hardwired[1][2]. Also we do not handle this case elsewhere, and assume throughout our code that TIF_HYBRID_FPREGS and TIF_32BIT_FPREGS cannot be set both at once for a task, leading to inconsistent behaviour if this does happen. Return unsuccessfully then from prctl(2) PR_SET_FP_MODE calls requesting PR_FP_MODE_FRE to be set with PR_FP_MODE_FR clear. This corresponds to modes allowed by `mips_set_personality_fp'. References: [1] "MIPS Architecture For Programmers, Vol. III: MIPS32 / microMIPS32 Privileged Resource Architecture", Imagination Technologies, Document Number: MD00090, Revision 6.02, July 10, 2015, Table 9.69 "Config5 Register Field Descriptions", p. 262 [2] "MIPS Architecture For Programmers, Volume III: MIPS64 / microMIPS64 Privileged Resource Architecture", Imagination Technologies, Document Number: MD00091, Revision 6.03, December 22, 2015, Table 9.72 "Config5 Register Field Descriptions", p. 288 Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Signed-off-by: Maciej W. Rozycki <macro@mips.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: linux-mips@linux-mips.org Cc: <stable@vger.kernel.org> # 4.0+ Patchwork: https://patchwork.linux-mips.org/patch/19327/ Signed-off-by: James Hogan <jhogan@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-05-16 07:04:44 +09:00
/* Setting FRE without FR is not supported. */
if ((value & (PR_FP_MODE_FR | PR_FP_MODE_FRE)) == PR_FP_MODE_FRE)
return -EOPNOTSUPP;
2015-01-08 21:17:37 +09:00
/* Avoid inadvertently triggering emulation */
if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu &&
!(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64))
2015-01-08 21:17:37 +09:00
return -EOPNOTSUPP;
if ((value & PR_FP_MODE_FRE) && raw_cpu_has_fpu && !cpu_has_fre)
2015-01-08 21:17:37 +09:00
return -EOPNOTSUPP;
/* FR = 0 not supported in MIPS R6 */
if (!(value & PR_FP_MODE_FR) && raw_cpu_has_fpu && cpu_has_mips_r6)
return -EOPNOTSUPP;
/* Proceed with the mode switch */
preempt_disable();
2015-01-08 21:17:37 +09:00
/* Save FP & vector context, then disable FPU & MSA */
if (task->signal == current->signal)
lose_fpu(1);
/* Prevent any threads from obtaining live FP context */
atomic_set(&task->mm->context.fp_mode_switching, 1);
smp_mb__after_atomic();
/*
MIPS: Force CPUs to lose FP context during mode switches Commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") added support for the PR_SET_FP_MODE prctl, which allows a userland program to modify its FP mode at runtime. This is most notably required if dynamic linking leads to the FP mode requirement changing at runtime from that indicated in the initial executable's ELF header. In order to avoid overhead in the general FP context restore code, it aimed to have threads in the process become unable to enable the FPU during a mode switch & have the thread calling the prctl syscall wait for all other threads in the process to be context switched at least once. Once that happens we can know that no thread in the process whose mode will be switched has live FP context, and it's safe to perform the mode switch. However in the (rare) case of modeswitches occurring in multithreaded programs this can lead to indeterminate delays for the thread invoking the prctl syscall, and the code monitoring for those context switches was woefully inadequate for all but the simplest cases. Fix this by broadcasting an IPI if other CPUs may have live FP context for an affected thread, with a handler causing those CPUs to relinquish their FPU ownership. Threads will then be allowed to continue running but will stall on the wait_on_atomic_t in enable_restore_fp_context if they attempt to use FP again whilst the mode switch is still in progress. The end result is less fragile poking at scheduler context switch counts & a more expedient completion of the mode switch. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Maciej W. Rozycki <macro@imgtec.com> Cc: Adam Buchbinder <adam.buchbinder@gmail.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: stable <stable@vger.kernel.org> # v4.0+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13145/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-04-21 20:43:58 +09:00
* If there are multiple online CPUs then force any which are running
* threads in this process to lose their FPU context, which they can't
* regain until fp_mode_switching is cleared later.
2015-01-08 21:17:37 +09:00
*/
if (num_online_cpus() > 1) {
MIPS: Force CPUs to lose FP context during mode switches Commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") added support for the PR_SET_FP_MODE prctl, which allows a userland program to modify its FP mode at runtime. This is most notably required if dynamic linking leads to the FP mode requirement changing at runtime from that indicated in the initial executable's ELF header. In order to avoid overhead in the general FP context restore code, it aimed to have threads in the process become unable to enable the FPU during a mode switch & have the thread calling the prctl syscall wait for all other threads in the process to be context switched at least once. Once that happens we can know that no thread in the process whose mode will be switched has live FP context, and it's safe to perform the mode switch. However in the (rare) case of modeswitches occurring in multithreaded programs this can lead to indeterminate delays for the thread invoking the prctl syscall, and the code monitoring for those context switches was woefully inadequate for all but the simplest cases. Fix this by broadcasting an IPI if other CPUs may have live FP context for an affected thread, with a handler causing those CPUs to relinquish their FPU ownership. Threads will then be allowed to continue running but will stall on the wait_on_atomic_t in enable_restore_fp_context if they attempt to use FP again whilst the mode switch is still in progress. The end result is less fragile poking at scheduler context switch counts & a more expedient completion of the mode switch. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Maciej W. Rozycki <macro@imgtec.com> Cc: Adam Buchbinder <adam.buchbinder@gmail.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: stable <stable@vger.kernel.org> # v4.0+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13145/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-04-21 20:43:58 +09:00
/* No need to send an IPI for the local CPU */
max_users = (task->mm == current->mm) ? 1 : 0;
2015-01-08 21:17:37 +09:00
MIPS: Force CPUs to lose FP context during mode switches Commit 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") added support for the PR_SET_FP_MODE prctl, which allows a userland program to modify its FP mode at runtime. This is most notably required if dynamic linking leads to the FP mode requirement changing at runtime from that indicated in the initial executable's ELF header. In order to avoid overhead in the general FP context restore code, it aimed to have threads in the process become unable to enable the FPU during a mode switch & have the thread calling the prctl syscall wait for all other threads in the process to be context switched at least once. Once that happens we can know that no thread in the process whose mode will be switched has live FP context, and it's safe to perform the mode switch. However in the (rare) case of modeswitches occurring in multithreaded programs this can lead to indeterminate delays for the thread invoking the prctl syscall, and the code monitoring for those context switches was woefully inadequate for all but the simplest cases. Fix this by broadcasting an IPI if other CPUs may have live FP context for an affected thread, with a handler causing those CPUs to relinquish their FPU ownership. Threads will then be allowed to continue running but will stall on the wait_on_atomic_t in enable_restore_fp_context if they attempt to use FP again whilst the mode switch is still in progress. The end result is less fragile poking at scheduler context switch counts & a more expedient completion of the mode switch. Signed-off-by: Paul Burton <paul.burton@imgtec.com> Fixes: 9791554b45a2 ("MIPS,prctl: add PR_[GS]ET_FP_MODE prctl options for MIPS") Reviewed-by: Maciej W. Rozycki <macro@imgtec.com> Cc: Adam Buchbinder <adam.buchbinder@gmail.com> Cc: James Hogan <james.hogan@imgtec.com> Cc: stable <stable@vger.kernel.org> # v4.0+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13145/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2016-04-21 20:43:58 +09:00
if (atomic_read(&current->mm->mm_users) > max_users)
smp_call_function(prepare_for_fp_mode_switch,
(void *)current->mm, 1);
2015-01-08 21:17:37 +09:00
}
/*
* There are now no threads of the process with live FP context, so it
* is safe to proceed with the FP mode switch.
*/
for_each_thread(task, t) {
/* Update desired FP register width */
if (value & PR_FP_MODE_FR) {
clear_tsk_thread_flag(t, TIF_32BIT_FPREGS);
} else {
set_tsk_thread_flag(t, TIF_32BIT_FPREGS);
clear_tsk_thread_flag(t, TIF_MSA_CTX_LIVE);
}
/* Update desired FP single layout */
if (value & PR_FP_MODE_FRE)
set_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
else
clear_tsk_thread_flag(t, TIF_HYBRID_FPREGS);
}
/* Allow threads to use FP again */
atomic_set(&task->mm->context.fp_mode_switching, 0);
preempt_enable();
2015-01-08 21:17:37 +09:00
return 0;
}