linux-brain/arch/arm/kernel/entry-common.S

419 lines
10 KiB
ArmAsm
Raw Normal View History

/*
* linux/arch/arm/kernel/entry-common.S
*
* Copyright (C) 2000 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+ ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-07-01 00:29:12 +09:00
#include <asm/assembler.h>
#include <asm/unistd.h>
#include <asm/ftrace.h>
#include <asm/unwind.h>
#ifdef CONFIG_NEED_RET_TO_USER
#include <mach/entry-macro.S>
#else
.macro arch_ret_to_user, tmp1, tmp2
.endm
#endif
#include "entry-header.S"
.align 5
#if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING))
/*
* This is the fast syscall return path. We do as little as possible here,
* such as avoiding writing r0 to the stack. We only use this path if we
* have tracing and context tracking disabled - the overheads from those
* features make this path too inefficient.
*/
ret_fast_syscall:
ARM: 8781/1: Fix Thumb-2 syscall return for binutils 2.29+ [ Upstream commit afc9f65e01cd114cb2cedf544d22239116ce0cc6 ] When building the kernel as Thumb-2 with binutils 2.29 or newer, if the assembler has seen the .type directive (via ENDPROC()) for a symbol, it automatically handles the setting of the lowest bit when the symbol is used with ADR. The badr macro on the other hand handles this lowest bit manually. This leads to a jump to a wrong address in the wrong state in the syscall return path: Internal error: Oops - undefined instruction: 0 [#2] SMP THUMB2 Modules linked in: CPU: 0 PID: 652 Comm: modprobe Tainted: G D 4.18.0-rc3+ #8 PC is at ret_fast_syscall+0x4/0x62 LR is at sys_brk+0x109/0x128 pc : [<80101004>] lr : [<801c8a35>] psr: 60000013 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 50c5387d Table: 9e82006a DAC: 00000051 Process modprobe (pid: 652, stack limit = 0x(ptrval)) 80101000 <ret_fast_syscall>: 80101000: b672 cpsid i 80101002: f8d9 2008 ldr.w r2, [r9, #8] 80101006: f1b2 4ffe cmp.w r2, #2130706432 ; 0x7f000000 80101184 <local_restart>: 80101184: f8d9 a000 ldr.w sl, [r9] 80101188: e92d 0030 stmdb sp!, {r4, r5} 8010118c: f01a 0ff0 tst.w sl, #240 ; 0xf0 80101190: d117 bne.n 801011c2 <__sys_trace> 80101192: 46ba mov sl, r7 80101194: f5ba 7fc8 cmp.w sl, #400 ; 0x190 80101198: bf28 it cs 8010119a: f04f 0a00 movcs.w sl, #0 8010119e: f3af 8014 nop.w {20} 801011a2: f2af 1ea2 subw lr, pc, #418 ; 0x1a2 To fix this, add a new symbol name which doesn't have ENDPROC used on it and use that with badr. We can't remove the badr usage since that would would cause breakage with older binutils. Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> Signed-off-by: Sasha Levin <sashal@kernel.org>
2018-07-13 19:12:22 +09:00
__ret_fast_syscall:
UNWIND(.fnstart )
UNWIND(.cantunwind )
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
bne fast_work_pending
/* perform architecture specific actions before user return */
arch_ret_to_user r1, lr
restore_user_regs fast = 1, offset = S_OFF
UNWIND(.fnend )
ENDPROC(ret_fast_syscall)
/* Ok, we need to do extra processing, enter the slow path. */
fast_work_pending:
str r0, [sp, #S_R0+S_OFF]! @ returned r0
/* fall through to work_pending */
#else
/*
* The "replacement" ret_fast_syscall for when tracing or context tracking
* is enabled. As we will need to call out to some C functions, we save
* r0 first to avoid needing to save registers around each C function call.
*/
ret_fast_syscall:
ARM: 8781/1: Fix Thumb-2 syscall return for binutils 2.29+ [ Upstream commit afc9f65e01cd114cb2cedf544d22239116ce0cc6 ] When building the kernel as Thumb-2 with binutils 2.29 or newer, if the assembler has seen the .type directive (via ENDPROC()) for a symbol, it automatically handles the setting of the lowest bit when the symbol is used with ADR. The badr macro on the other hand handles this lowest bit manually. This leads to a jump to a wrong address in the wrong state in the syscall return path: Internal error: Oops - undefined instruction: 0 [#2] SMP THUMB2 Modules linked in: CPU: 0 PID: 652 Comm: modprobe Tainted: G D 4.18.0-rc3+ #8 PC is at ret_fast_syscall+0x4/0x62 LR is at sys_brk+0x109/0x128 pc : [<80101004>] lr : [<801c8a35>] psr: 60000013 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 50c5387d Table: 9e82006a DAC: 00000051 Process modprobe (pid: 652, stack limit = 0x(ptrval)) 80101000 <ret_fast_syscall>: 80101000: b672 cpsid i 80101002: f8d9 2008 ldr.w r2, [r9, #8] 80101006: f1b2 4ffe cmp.w r2, #2130706432 ; 0x7f000000 80101184 <local_restart>: 80101184: f8d9 a000 ldr.w sl, [r9] 80101188: e92d 0030 stmdb sp!, {r4, r5} 8010118c: f01a 0ff0 tst.w sl, #240 ; 0xf0 80101190: d117 bne.n 801011c2 <__sys_trace> 80101192: 46ba mov sl, r7 80101194: f5ba 7fc8 cmp.w sl, #400 ; 0x190 80101198: bf28 it cs 8010119a: f04f 0a00 movcs.w sl, #0 8010119e: f3af 8014 nop.w {20} 801011a2: f2af 1ea2 subw lr, pc, #418 ; 0x1a2 To fix this, add a new symbol name which doesn't have ENDPROC used on it and use that with badr. We can't remove the badr usage since that would would cause breakage with older binutils. Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> Signed-off-by: Sasha Levin <sashal@kernel.org>
2018-07-13 19:12:22 +09:00
__ret_fast_syscall:
UNWIND(.fnstart )
UNWIND(.cantunwind )
str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
disable_irq_notrace @ disable interrupts
ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing
tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
beq no_work_pending
UNWIND(.fnend )
ENDPROC(ret_fast_syscall)
/* Slower path - fall through to work_pending */
#endif
tst r1, #_TIF_SYSCALL_WORK
bne __sys_trace_return_nosave
slow_work_pending:
mov r0, sp @ 'regs'
mov r2, why @ 'syscall'
bl do_work_pending
cmp r0, #0
beq no_work_pending
movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
ldmia sp, {r0 - r6} @ have to reload r0 - r6
b local_restart @ ... and off we go
ENDPROC(ret_fast_syscall)
/*
* "slow" syscall return path. "why" tells us if this was a real syscall.
* IRQs may be enabled here, so always disable them. Note that we use the
* "notrace" version to avoid calling into the tracing code unnecessarily.
* do_work_pending() will update this state if necessary.
*/
ENTRY(ret_to_user)
ret_slow_syscall:
disable_irq_notrace @ disable interrupts
ARM: 6952/1: fix lockdep warning of "unannotated irqs-off" This patch fixes the lockdep warning of "unannotated irqs-off"[1]. After entering __irq_usr, arm core will disable interrupt automatically, but __irq_usr does not annotate the irq disable, so lockdep may complain the warning if it has chance to check this in irq handler. This patch adds trace_hardirqs_off in __irq_usr before entering irq_handler to handle the irq, also calls ret_to_user_from_irq to avoid calling disable_irq again. This is also a fix for irq off tracer. [1], lockdep warning log of "unannotated irqs-off" [ 13.804687] ------------[ cut here ]------------ [ 13.809570] WARNING: at kernel/lockdep.c:3335 check_flags+0x78/0x1d0() [ 13.816467] Modules linked in: [ 13.819732] Backtrace: [ 13.822357] [<c01cb42c>] (dump_backtrace+0x0/0x100) from [<c06abb14>] (dump_stack+0x20/0x24) [ 13.831268] r6:c07d8c2c r5:00000d07 r4:00000000 r3:00000000 [ 13.837280] [<c06abaf4>] (dump_stack+0x0/0x24) from [<c01ffc04>] (warn_slowpath_common+0x5c/0x74) [ 13.846649] [<c01ffba8>] (warn_slowpath_common+0x0/0x74) from [<c01ffc48>] (warn_slowpath_null+0x2c/0x34) [ 13.856781] r8:00000000 r7:00000000 r6:c18b8194 r5:60000093 r4:ef182000 [ 13.863708] r3:00000009 [ 13.866485] [<c01ffc1c>] (warn_slowpath_null+0x0/0x34) from [<c0237d84>] (check_flags+0x78/0x1d0) [ 13.875823] [<c0237d0c>] (check_flags+0x0/0x1d0) from [<c023afc8>] (lock_acquire+0x4c/0x150) [ 13.884704] [<c023af7c>] (lock_acquire+0x0/0x150) from [<c06af638>] (_raw_spin_lock+0x4c/0x84) [ 13.893798] [<c06af5ec>] (_raw_spin_lock+0x0/0x84) from [<c01f9a44>] (sched_ttwu_pending+0x58/0x8c) [ 13.903320] r6:ef92d040 r5:00000003 r4:c18b8180 [ 13.908233] [<c01f99ec>] (sched_ttwu_pending+0x0/0x8c) from [<c01f9a90>] (scheduler_ipi+0x18/0x1c) [ 13.917663] r6:ef183fb0 r5:00000003 r4:00000000 r3:00000001 [ 13.923645] [<c01f9a78>] (scheduler_ipi+0x0/0x1c) from [<c01bc458>] (do_IPI+0x9c/0xfc) [ 13.932006] [<c01bc3bc>] (do_IPI+0x0/0xfc) from [<c06b0888>] (__irq_usr+0x48/0xe0) [ 13.939971] Exception stack(0xef183fb0 to 0xef183ff8) [ 13.945281] 3fa0: ffffffc3 0001500c 00000001 0001500c [ 13.953948] 3fc0: 00000050 400b45f0 400d9000 00000000 00000001 400d9600 6474e552 bea05b3c [ 13.962585] 3fe0: 400d96c0 bea059c0 400b6574 400b65d8 20000010 ffffffff [ 13.969573] r6:00000403 r5:fa240100 r4:ffffffff r3:20000010 [ 13.975585] ---[ end trace efc4896ab0fb62cb ]--- [ 13.980468] possible reason: unannotated irqs-off. [ 13.985534] irq event stamp: 1610 [ 13.989044] hardirqs last enabled at (1610): [<c01c703c>] no_work_pending+0x8/0x2c [ 13.997131] hardirqs last disabled at (1609): [<c01c7024>] ret_slow_syscall+0xc/0x1c [ 14.005371] softirqs last enabled at (0): [<c01fe5e4>] copy_process+0x2cc/0xa24 [ 14.013183] softirqs last disabled at (0): [< (null)>] (null) Signed-off-by: Ming Lei <ming.lei@canonical.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2011-06-05 10:24:58 +09:00
ENTRY(ret_to_user_from_irq)
ldr r1, [tsk, #TI_FLAGS]
tst r1, #_TIF_WORK_MASK
bne slow_work_pending
no_work_pending:
asm_trace_hardirqs_on save = 0
/* perform architecture specific actions before user return */
arch_ret_to_user r1, lr
ct_user_enter save = 0
restore_user_regs fast = 0, offset = 0
ARM: 6952/1: fix lockdep warning of "unannotated irqs-off" This patch fixes the lockdep warning of "unannotated irqs-off"[1]. After entering __irq_usr, arm core will disable interrupt automatically, but __irq_usr does not annotate the irq disable, so lockdep may complain the warning if it has chance to check this in irq handler. This patch adds trace_hardirqs_off in __irq_usr before entering irq_handler to handle the irq, also calls ret_to_user_from_irq to avoid calling disable_irq again. This is also a fix for irq off tracer. [1], lockdep warning log of "unannotated irqs-off" [ 13.804687] ------------[ cut here ]------------ [ 13.809570] WARNING: at kernel/lockdep.c:3335 check_flags+0x78/0x1d0() [ 13.816467] Modules linked in: [ 13.819732] Backtrace: [ 13.822357] [<c01cb42c>] (dump_backtrace+0x0/0x100) from [<c06abb14>] (dump_stack+0x20/0x24) [ 13.831268] r6:c07d8c2c r5:00000d07 r4:00000000 r3:00000000 [ 13.837280] [<c06abaf4>] (dump_stack+0x0/0x24) from [<c01ffc04>] (warn_slowpath_common+0x5c/0x74) [ 13.846649] [<c01ffba8>] (warn_slowpath_common+0x0/0x74) from [<c01ffc48>] (warn_slowpath_null+0x2c/0x34) [ 13.856781] r8:00000000 r7:00000000 r6:c18b8194 r5:60000093 r4:ef182000 [ 13.863708] r3:00000009 [ 13.866485] [<c01ffc1c>] (warn_slowpath_null+0x0/0x34) from [<c0237d84>] (check_flags+0x78/0x1d0) [ 13.875823] [<c0237d0c>] (check_flags+0x0/0x1d0) from [<c023afc8>] (lock_acquire+0x4c/0x150) [ 13.884704] [<c023af7c>] (lock_acquire+0x0/0x150) from [<c06af638>] (_raw_spin_lock+0x4c/0x84) [ 13.893798] [<c06af5ec>] (_raw_spin_lock+0x0/0x84) from [<c01f9a44>] (sched_ttwu_pending+0x58/0x8c) [ 13.903320] r6:ef92d040 r5:00000003 r4:c18b8180 [ 13.908233] [<c01f99ec>] (sched_ttwu_pending+0x0/0x8c) from [<c01f9a90>] (scheduler_ipi+0x18/0x1c) [ 13.917663] r6:ef183fb0 r5:00000003 r4:00000000 r3:00000001 [ 13.923645] [<c01f9a78>] (scheduler_ipi+0x0/0x1c) from [<c01bc458>] (do_IPI+0x9c/0xfc) [ 13.932006] [<c01bc3bc>] (do_IPI+0x0/0xfc) from [<c06b0888>] (__irq_usr+0x48/0xe0) [ 13.939971] Exception stack(0xef183fb0 to 0xef183ff8) [ 13.945281] 3fa0: ffffffc3 0001500c 00000001 0001500c [ 13.953948] 3fc0: 00000050 400b45f0 400d9000 00000000 00000001 400d9600 6474e552 bea05b3c [ 13.962585] 3fe0: 400d96c0 bea059c0 400b6574 400b65d8 20000010 ffffffff [ 13.969573] r6:00000403 r5:fa240100 r4:ffffffff r3:20000010 [ 13.975585] ---[ end trace efc4896ab0fb62cb ]--- [ 13.980468] possible reason: unannotated irqs-off. [ 13.985534] irq event stamp: 1610 [ 13.989044] hardirqs last enabled at (1610): [<c01c703c>] no_work_pending+0x8/0x2c [ 13.997131] hardirqs last disabled at (1609): [<c01c7024>] ret_slow_syscall+0xc/0x1c [ 14.005371] softirqs last enabled at (0): [<c01fe5e4>] copy_process+0x2cc/0xa24 [ 14.013183] softirqs last disabled at (0): [< (null)>] (null) Signed-off-by: Ming Lei <ming.lei@canonical.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2011-06-05 10:24:58 +09:00
ENDPROC(ret_to_user_from_irq)
ENDPROC(ret_to_user)
/*
* This is how we return from a fork.
*/
ENTRY(ret_from_fork)
bl schedule_tail
cmp r5, #0
movne r0, r4
badrne lr, 1f
ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+ ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-07-01 00:29:12 +09:00
retne r5
ARM: fix oops on initial entry to userspace with Thumb2 kernels Daniel Mack reports an oops at boot with the latest kernels: Internal error: Oops - undefined instruction: 0 [#1] SMP THUMB2 Modules linked in: CPU: 0 Not tainted (3.6.0-11057-g584df1d #145) PC is at cpsw_probe+0x45a/0x9ac LR is at trace_hardirqs_on_caller+0x8f/0xfc pc : [<c03493de>] lr : [<c005e81f>] psr: 60000113 sp : cf055fb0 ip : 00000000 fp : 00000000 r10: 00000000 r9 : 00000000 r8 : 00000000 r7 : 00000000 r6 : 00000000 r5 : c0344555 r4 : 00000000 r3 : cf057a40 r2 : 00000000 r1 : 00000001 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 50c5387d Table: 8f3f4019 DAC: 00000015 Process init (pid: 1, stack limit = 0xcf054240) Stack: (0xcf055fb0 to 0xcf056000) 5fa0: 00000001 00000000 00000000 00000000 5fc0: cf055fb0 c000d1a8 00000000 00000000 00000000 00000000 00000000 00000000 5fe0: 00000000 be9b3f10 00000000 b6f6add0 00000010 00000000 aaaabfaf a8babbaa The analysis of this is as follows. In init/main.c, we issue: kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND); This creates a new thread, which falls through to the ret_from_fork assembly, with r4 set NULL and r5 set to kernel_init. You can see this in your oops dump register set - r5 is 0xc0344555, which is the address of kernel_init plus 1 which marks the function as Thumb code. Now, let's look at this code a little closer - this is what the disassembly looks like: c000d180 <ret_from_fork>: c000d180: f03a fe08 bl c0047d94 <schedule_tail> c000d184: 2d00 cmp r5, #0 c000d186: bf1e ittt ne c000d188: 4620 movne r0, r4 c000d18a: 46fe movne lr, pc <-- XXXXXXX c000d18c: 46af movne pc, r5 c000d18e: 46e9 mov r9, sp c000d190: ea4f 3959 mov.w r9, r9, lsr #13 c000d194: ea4f 3949 mov.w r9, r9, lsl #13 c000d198: e7c8 b.n c000d12c <ret_to_user> c000d19a: bf00 nop c000d19c: f3af 8000 nop.w This code was introduced in 9fff2fa0db911 (arm: switch to saner kernel_execve() semantics). I have marked one instruction, and it's the significant one - I'll come back to that later. Eventually, having had a successful call to kernel_execve(), kernel_init() returns zero. In returning, it uses the value in 'lr' which was set by the instruction I marked above. Unfortunately, this causes lr to contain 0xc000d18e - an even address. This switches the ISA to ARM on return but with a non word aligned PC value. So, what do we end up executing? Well, not the instructions above - yes the opcodes, but they don't mean the same thing in ARM mode. In ARM mode, it looks like this instead: c000d18c: 46e946af strbtmi r4, [r9], pc, lsr #13 c000d190: 3959ea4f ldmdbcc r9, {r0, r1, r2, r3, r6, r9, fp, sp, lr, pc}^ c000d194: 3949ea4f stmdbcc r9, {r0, r1, r2, r3, r6, r9, fp, sp, lr, pc}^ c000d198: bf00e7c8 svclt 0x0000e7c8 c000d19c: 8000f3af andhi pc, r0, pc, lsr #7 c000d1a0: e88db092 stm sp, {r1, r4, r7, ip, sp, pc} c000d1a4: 46e81fff ; <UNDEFINED> instruction: 0x46e81fff c000d1a8: 8a00f3ef bhi 0xc004a16c c000d1ac: 0a0cf08a beq 0xc03493dc I have included more above, because it's relevant. The PSR flags which we can see in the oops dump are nZCv, so Z and C are set. All the above ARM instructions are not executed, except for two. c000d1a0, which has no writeback, and writes below the current stack pointer (and that data is lost when we take the next exception.) The other instruction which is executed is c000d1ac, which takes us to... 0xc03493dc. However, remember that bit 1 of the PC got set. So that makes the PC value 0xc03493de. And that value is the value we find in the oops dump for PC. What is the instruction here when interpreted in ARM mode? 0: f71e150c ; <UNDEFINED> instruction: 0xf71e150c and there we have our undefined instruction (remember that the 'never' condition code, 0xf, has been deprecated and is now always executed as it is now being used for additional instructions.) This path also nicely explains the state of the stack we see in the oops dump too. The above is a consistent and sane story for how we got to the oops dump, which all stems from the instruction at 0xc000d18a being wrong. Reported-by: Daniel Mack <zonque@gmail.com> Tested-by: Daniel Mack <zonque@gmail.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-10-15 08:16:49 +09:00
1: get_thread_info tsk
b ret_slow_syscall
ENDPROC(ret_from_fork)
.equ NR_syscalls,0
#define CALL(x) .equ NR_syscalls,NR_syscalls+1
#include "calls.S"
/*
* Ensure that the system call table is equal to __NR_syscalls,
* which is the value the rest of the system sees
*/
.ifne NR_syscalls - __NR_syscalls
.error "__NR_syscalls is not equal to the size of the syscall table"
.endif
#undef CALL
#define CALL(x) .long x
/*=============================================================================
* SWI handler
*-----------------------------------------------------------------------------
*/
.align 5
ENTRY(vector_swi)
#ifdef CONFIG_CPU_V7M
v7m_exception_entry
#else
sub sp, sp, #PT_REGS_SIZE
stmia sp, {r0 - r12} @ Calling r0 - r12
ARM( add r8, sp, #S_PC )
ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr
THUMB( mov r8, sp )
THUMB( store_user_sp_lr r8, r10, S_SP ) @ calling sp, lr
mrs r8, spsr @ called from non-FIQ mode, so ok.
str lr, [sp, #S_PC] @ Save calling PC
str r8, [sp, #S_PSR] @ Save CPSR
str r0, [sp, #S_OLD_R0] @ Save OLD_R0
#endif
zero_fp
alignment_trap r10, ip, __cr_alignment
enable_irq
ct_user_exit
get_thread_info tsk
/*
* Get the system call number.
*/
#if defined(CONFIG_OABI_COMPAT)
/*
* If we have CONFIG_OABI_COMPAT then we need to look at the swi
* value to determine if it is an EABI or an old ABI call.
*/
#ifdef CONFIG_ARM_THUMB
tst r8, #PSR_T_BIT
movne r10, #0 @ no thumb OABI emulation
USER( ldreq r10, [lr, #-4] ) @ get SWI instruction
#else
USER( ldr r10, [lr, #-4] ) @ get SWI instruction
#endif
ARM_BE8(rev r10, r10) @ little endian instruction
#elif defined(CONFIG_AEABI)
/*
* Pure EABI user space always put syscall number into scno (r7).
*/
#elif defined(CONFIG_ARM_THUMB)
/* Legacy ABI only, possibly thumb mode. */
tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs
addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in
USER( ldreq scno, [lr, #-4] )
#else
/* Legacy ABI only. */
USER( ldr scno, [lr, #-4] ) @ get SWI instruction
#endif
uaccess_disable tbl
adr tbl, sys_call_table @ load syscall table pointer
#if defined(CONFIG_OABI_COMPAT)
/*
* If the swi argument is zero, this is an EABI call and we do nothing.
*
* If this is an old ABI call, get the syscall number into scno and
* get the old ABI syscall table address.
*/
bics r10, r10, #0xff000000
eorne scno, r10, #__NR_OABI_SYSCALL_BASE
ldrne tbl, =sys_oabi_call_table
#elif !defined(CONFIG_AEABI)
bic scno, scno, #0xff000000 @ mask off SWI op-code
eor scno, scno, #__NR_SYSCALL_BASE @ check OS number
#endif
local_restart:
ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing
stmdb sp!, {r4, r5} @ push fifth and sixth args
tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls?
bne __sys_trace
ARM: 8781/1: Fix Thumb-2 syscall return for binutils 2.29+ [ Upstream commit afc9f65e01cd114cb2cedf544d22239116ce0cc6 ] When building the kernel as Thumb-2 with binutils 2.29 or newer, if the assembler has seen the .type directive (via ENDPROC()) for a symbol, it automatically handles the setting of the lowest bit when the symbol is used with ADR. The badr macro on the other hand handles this lowest bit manually. This leads to a jump to a wrong address in the wrong state in the syscall return path: Internal error: Oops - undefined instruction: 0 [#2] SMP THUMB2 Modules linked in: CPU: 0 PID: 652 Comm: modprobe Tainted: G D 4.18.0-rc3+ #8 PC is at ret_fast_syscall+0x4/0x62 LR is at sys_brk+0x109/0x128 pc : [<80101004>] lr : [<801c8a35>] psr: 60000013 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 50c5387d Table: 9e82006a DAC: 00000051 Process modprobe (pid: 652, stack limit = 0x(ptrval)) 80101000 <ret_fast_syscall>: 80101000: b672 cpsid i 80101002: f8d9 2008 ldr.w r2, [r9, #8] 80101006: f1b2 4ffe cmp.w r2, #2130706432 ; 0x7f000000 80101184 <local_restart>: 80101184: f8d9 a000 ldr.w sl, [r9] 80101188: e92d 0030 stmdb sp!, {r4, r5} 8010118c: f01a 0ff0 tst.w sl, #240 ; 0xf0 80101190: d117 bne.n 801011c2 <__sys_trace> 80101192: 46ba mov sl, r7 80101194: f5ba 7fc8 cmp.w sl, #400 ; 0x190 80101198: bf28 it cs 8010119a: f04f 0a00 movcs.w sl, #0 8010119e: f3af 8014 nop.w {20} 801011a2: f2af 1ea2 subw lr, pc, #418 ; 0x1a2 To fix this, add a new symbol name which doesn't have ENDPROC used on it and use that with badr. We can't remove the badr usage since that would would cause breakage with older binutils. Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> Signed-off-by: Sasha Levin <sashal@kernel.org>
2018-07-13 19:12:22 +09:00
invoke_syscall tbl, scno, r10, __ret_fast_syscall
add r1, sp, #S_OFF
2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back
bcs arm_syscall
mov why, #0 @ no longer a real syscall
b sys_ni_syscall @ not private func
#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI)
/*
* We failed to handle a fault trying to access the page
* containing the swi instruction, but we're not really in a
* position to return -EFAULT. Instead, return back to the
* instruction and re-enter the user fault handling path trying
* to page it in. This will likely result in sending SEGV to the
* current task.
*/
9001:
sub lr, lr, #4
str lr, [sp, #S_PC]
b ret_fast_syscall
#endif
ENDPROC(vector_swi)
/*
* This is the really slow path. We're going to be doing
* context switches, and waiting for our parent to respond.
*/
__sys_trace:
mov r1, scno
add r0, sp, #S_OFF
bl syscall_trace_enter
mov scno, r0
invoke_syscall tbl, scno, r10, __sys_trace_return, reload=1
cmp scno, #-1 @ skip the syscall?
bne 2b
add sp, sp, #S_OFF @ restore stack
b ret_slow_syscall
__sys_trace_return:
str r0, [sp, #S_R0 + S_OFF]! @ save returned r0
mov r0, sp
bl syscall_trace_exit
b ret_slow_syscall
__sys_trace_return_nosave:
enable_irq_notrace
mov r0, sp
bl syscall_trace_exit
b ret_slow_syscall
.align 5
#ifdef CONFIG_ALIGNMENT_TRAP
.type __cr_alignment, #object
__cr_alignment:
.word cr_alignment
#endif
.ltorg
/*
* This is the syscall table declaration for native ABI syscalls.
* With EABI a couple syscalls are obsolete and defined as sys_ni_syscall.
*/
#define ABI(native, compat) native
#ifdef CONFIG_AEABI
#define OBSOLETE(syscall) sys_ni_syscall
#else
#define OBSOLETE(syscall) syscall
#endif
.type sys_call_table, #object
ENTRY(sys_call_table)
#include "calls.S"
#undef ABI
#undef OBSOLETE
/*============================================================================
* Special system call wrappers
*/
@ r0 = syscall number
@ r8 = syscall table
sys_syscall:
bic scno, r0, #__NR_OABI_SYSCALL_BASE
cmp scno, #__NR_syscall - __NR_SYSCALL_BASE
cmpne scno, #NR_syscalls @ check range
#ifdef CONFIG_CPU_SPECTRE
movhs scno, #0
csdb
#endif
stmloia sp, {r5, r6} @ shuffle args
movlo r0, r1
movlo r1, r2
movlo r2, r3
movlo r3, r4
ldrlo pc, [tbl, scno, lsl #2]
b sys_ni_syscall
ENDPROC(sys_syscall)
sys_sigreturn_wrapper:
add r0, sp, #S_OFF
arm: fix really nasty sigreturn bug If a signal hits us outside of a syscall and another gets delivered when we are in sigreturn (e.g. because it had been in sa_mask for the first one and got sent to us while we'd been in the first handler), we have a chance of returning from the second handler to location one insn prior to where we ought to return. If r0 happens to contain -513 (-ERESTARTNOINTR), sigreturn will get confused into doing restart syscall song and dance. Incredible joy to debug, since it manifests as random, infrequent and very hard to reproduce double execution of instructions in userland code... The fix is simple - mark it "don't bother with restarts" in wrapper, i.e. set r8 to 0 in sys_sigreturn and sys_rt_sigreturn wrappers, suppressing the syscall restart handling on return from these guys. They can't legitimately return a restart-worthy error anyway. Testcase: #include <unistd.h> #include <signal.h> #include <stdlib.h> #include <sys/time.h> #include <errno.h> void f(int n) { __asm__ __volatile__( "ldr r0, [%0]\n" "b 1f\n" "b 2f\n" "1:b .\n" "2:\n" : : "r"(&n)); } void handler1(int sig) { } void handler2(int sig) { raise(1); } void handler3(int sig) { exit(0); } main() { struct sigaction s = {.sa_handler = handler2}; struct itimerval t1 = { .it_value = {1} }; struct itimerval t2 = { .it_value = {2} }; signal(1, handler1); sigemptyset(&s.sa_mask); sigaddset(&s.sa_mask, 1); sigaction(SIGALRM, &s, NULL); signal(SIGVTALRM, handler3); setitimer(ITIMER_REAL, &t1, NULL); setitimer(ITIMER_VIRTUAL, &t2, NULL); f(-513); /* -ERESTARTNOINTR */ write(1, "buggered\n", 9); return 1; } Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Acked-by: Russell King <rmk+kernel@arm.linux.org.uk> Cc: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2010-09-17 22:34:39 +09:00
mov why, #0 @ prevent syscall restart handling
b sys_sigreturn
ENDPROC(sys_sigreturn_wrapper)
sys_rt_sigreturn_wrapper:
add r0, sp, #S_OFF
arm: fix really nasty sigreturn bug If a signal hits us outside of a syscall and another gets delivered when we are in sigreturn (e.g. because it had been in sa_mask for the first one and got sent to us while we'd been in the first handler), we have a chance of returning from the second handler to location one insn prior to where we ought to return. If r0 happens to contain -513 (-ERESTARTNOINTR), sigreturn will get confused into doing restart syscall song and dance. Incredible joy to debug, since it manifests as random, infrequent and very hard to reproduce double execution of instructions in userland code... The fix is simple - mark it "don't bother with restarts" in wrapper, i.e. set r8 to 0 in sys_sigreturn and sys_rt_sigreturn wrappers, suppressing the syscall restart handling on return from these guys. They can't legitimately return a restart-worthy error anyway. Testcase: #include <unistd.h> #include <signal.h> #include <stdlib.h> #include <sys/time.h> #include <errno.h> void f(int n) { __asm__ __volatile__( "ldr r0, [%0]\n" "b 1f\n" "b 2f\n" "1:b .\n" "2:\n" : : "r"(&n)); } void handler1(int sig) { } void handler2(int sig) { raise(1); } void handler3(int sig) { exit(0); } main() { struct sigaction s = {.sa_handler = handler2}; struct itimerval t1 = { .it_value = {1} }; struct itimerval t2 = { .it_value = {2} }; signal(1, handler1); sigemptyset(&s.sa_mask); sigaddset(&s.sa_mask, 1); sigaction(SIGALRM, &s, NULL); signal(SIGVTALRM, handler3); setitimer(ITIMER_REAL, &t1, NULL); setitimer(ITIMER_VIRTUAL, &t2, NULL); f(-513); /* -ERESTARTNOINTR */ write(1, "buggered\n", 9); return 1; } Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Acked-by: Russell King <rmk+kernel@arm.linux.org.uk> Cc: stable@kernel.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2010-09-17 22:34:39 +09:00
mov why, #0 @ prevent syscall restart handling
b sys_rt_sigreturn
ENDPROC(sys_rt_sigreturn_wrapper)
sys_statfs64_wrapper:
teq r1, #88
moveq r1, #84
b sys_statfs64
ENDPROC(sys_statfs64_wrapper)
sys_fstatfs64_wrapper:
teq r1, #88
moveq r1, #84
b sys_fstatfs64
ENDPROC(sys_fstatfs64_wrapper)
/*
* Note: off_4k (r5) is always units of 4K. If we can't do the requested
* offset, we return EINVAL.
*/
sys_mmap2:
#if PAGE_SHIFT > 12
tst r5, #PGOFF_MASK
moveq r5, r5, lsr #PAGE_SHIFT - 12
streq r5, [sp, #4]
beq sys_mmap_pgoff
mov r0, #-EINVAL
ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+ ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2014-07-01 00:29:12 +09:00
ret lr
#else
str r5, [sp, #4]
b sys_mmap_pgoff
#endif
ENDPROC(sys_mmap2)
#ifdef CONFIG_OABI_COMPAT
/*
* These are syscalls with argument register differences
*/
sys_oabi_pread64:
stmia sp, {r3, r4}
b sys_pread64
ENDPROC(sys_oabi_pread64)
sys_oabi_pwrite64:
stmia sp, {r3, r4}
b sys_pwrite64
ENDPROC(sys_oabi_pwrite64)
sys_oabi_truncate64:
mov r3, r2
mov r2, r1
b sys_truncate64
ENDPROC(sys_oabi_truncate64)
sys_oabi_ftruncate64:
mov r3, r2
mov r2, r1
b sys_ftruncate64
ENDPROC(sys_oabi_ftruncate64)
sys_oabi_readahead:
str r3, [sp]
mov r3, r2
mov r2, r1
b sys_readahead
ENDPROC(sys_oabi_readahead)
/*
* Let's declare a second syscall table for old ABI binaries
* using the compatibility syscall entries.
*/
#define ABI(native, compat) compat
#define OBSOLETE(syscall) syscall
.type sys_oabi_call_table, #object
ENTRY(sys_oabi_call_table)
#include "calls.S"
#undef ABI
#undef OBSOLETE
#endif