2019-06-04 17:11:33 +09:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2013-01-18 18:42:18 +09:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __ASM_SPINLOCK_H
|
|
|
|
#define __ASM_SPINLOCK_H
|
|
|
|
|
|
|
|
#include <asm/spinlock_types.h>
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <asm/barrier.h>
|
|
|
|
|
|
|
|
#define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__)
|
2016-05-26 17:35:03 +09:00
|
|
|
|
2015-07-14 21:25:05 +09:00
|
|
|
#ifdef CONFIG_ARC_HAS_LLSC
|
|
|
|
|
|
|
|
static inline void arch_spin_lock(arch_spinlock_t *lock)
|
|
|
|
{
|
|
|
|
unsigned int val;
|
|
|
|
|
|
|
|
__asm__ __volatile__(
|
|
|
|
"1: llock %[val], [%[slock]] \n"
|
|
|
|
" breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */
|
|
|
|
" scond %[LOCKED], [%[slock]] \n" /* acquire */
|
|
|
|
" bnz 1b \n"
|
|
|
|
" \n"
|
|
|
|
: [val] "=&r" (val)
|
|
|
|
: [slock] "r" (&(lock->slock)),
|
|
|
|
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
|
|
|
|
: "memory", "cc");
|
|
|
|
|
ARCv2: spinlock: remove the extra smp_mb before lock, after unlock
- ARCv2 LLSC spinlocks have smp_mb() both before and after the LLSC
instructions, which is not required per lkmm ACQ/REL semantics.
smp_mb() is only needed _after_ lock and _before_ unlock.
So remove the extra barriers.
The reason they were there was mainly historical. At the time of
initial SMP Linux bringup on HS38 cores, I was too conservative,
given the fluidity of both hw and sw. The last attempt to ditch the
extra barrier showed some hackbench regression which is apparently
not the case now (atleast for LLSC case, read on...)
- EX based spinlocks (!CONFIG_ARC_HAS_LLSC) still needs the extra
smp_mb(), not due to lkmm, but due to some hardware shenanigans.
W/o that, hackbench triggers RCU stall splat so extra DMB is retained
!LLSC based systems are not realistic Linux sstem anyways so they can
afford to be a nit suboptimal ;-)
| [ARCLinux]# for i in (seq 1 1 5) ; do hackbench; done
| Running with 10 groups 400 process
| INFO: task hackbench:158 blocked for more than 10 seconds.
| Not tainted 4.20.0-00005-g96b18288a88e-dirty #117
| "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
| hackbench D 0 158 135 0x00000000
|
| Stack Trace:
| watchdog: BUG: soft lockup - CPU#3 stuck for 59s! [hackbench:469]
| Modules linked in:
| Path: (null)
| CPU: 3 PID: 469 Comm: hackbench Not tainted 4.20.0-00005-g96b18288a88e-dirty
|
| [ECR ]: 0x00000000 => Check Programmer's Manual
| [EFA ]: 0x00000000
| [BLINK ]: do_exit+0x4a6/0x7d0
| [ERET ]: _raw_write_unlock_irq+0x44/0x5c
- And while at it, remove the extar smp_mb() from EX based
arch_read_trylock() since the spin lock there guarantees a full
barrier anyways
- For LLSC case, hackbench threads improves with this patch (HAPS @ 50MHz)
---- before ----
|
| [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done
| Running with 10 groups 400 threads
| Time: 16.253
| Time: 16.445
| Time: 16.590
| Time: 16.721
| Time: 16.544
---- after ----
|
| [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done
| Running with 10 groups 400 threads
| Time: 15.638
| Time: 15.730
| Time: 15.870
| Time: 15.842
| Time: 15.729
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2019-03-08 06:29:59 +09:00
|
|
|
/*
|
|
|
|
* ACQUIRE barrier to ensure load/store after taking the lock
|
|
|
|
* don't "bleed-up" out of the critical section (leak-in is allowed)
|
|
|
|
* http://www.spinics.net/lists/kernel/msg2010409.html
|
|
|
|
*
|
|
|
|
* ARCv2 only has load-load, store-store and all-all barrier
|
|
|
|
* thus need the full all-all barrier
|
|
|
|
*/
|
2015-07-14 21:25:05 +09:00
|
|
|
smp_mb();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 1 - lock taken successfully */
|
|
|
|
static inline int arch_spin_trylock(arch_spinlock_t *lock)
|
|
|
|
{
|
|
|
|
unsigned int val, got_it = 0;
|
|
|
|
|
|
|
|
__asm__ __volatile__(
|
|
|
|
"1: llock %[val], [%[slock]] \n"
|
|
|
|
" breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */
|
|
|
|
" scond %[LOCKED], [%[slock]] \n" /* acquire */
|
|
|
|
" bnz 1b \n"
|
|
|
|
" mov %[got_it], 1 \n"
|
|
|
|
"4: \n"
|
|
|
|
" \n"
|
|
|
|
: [val] "=&r" (val),
|
|
|
|
[got_it] "+&r" (got_it)
|
|
|
|
: [slock] "r" (&(lock->slock)),
|
|
|
|
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
|
|
|
|
: "memory", "cc");
|
|
|
|
|
|
|
|
smp_mb();
|
|
|
|
|
|
|
|
return got_it;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void arch_spin_unlock(arch_spinlock_t *lock)
|
|
|
|
{
|
|
|
|
smp_mb();
|
|
|
|
|
ARCv2: spinlock: remove the extra smp_mb before lock, after unlock
- ARCv2 LLSC spinlocks have smp_mb() both before and after the LLSC
instructions, which is not required per lkmm ACQ/REL semantics.
smp_mb() is only needed _after_ lock and _before_ unlock.
So remove the extra barriers.
The reason they were there was mainly historical. At the time of
initial SMP Linux bringup on HS38 cores, I was too conservative,
given the fluidity of both hw and sw. The last attempt to ditch the
extra barrier showed some hackbench regression which is apparently
not the case now (atleast for LLSC case, read on...)
- EX based spinlocks (!CONFIG_ARC_HAS_LLSC) still needs the extra
smp_mb(), not due to lkmm, but due to some hardware shenanigans.
W/o that, hackbench triggers RCU stall splat so extra DMB is retained
!LLSC based systems are not realistic Linux sstem anyways so they can
afford to be a nit suboptimal ;-)
| [ARCLinux]# for i in (seq 1 1 5) ; do hackbench; done
| Running with 10 groups 400 process
| INFO: task hackbench:158 blocked for more than 10 seconds.
| Not tainted 4.20.0-00005-g96b18288a88e-dirty #117
| "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
| hackbench D 0 158 135 0x00000000
|
| Stack Trace:
| watchdog: BUG: soft lockup - CPU#3 stuck for 59s! [hackbench:469]
| Modules linked in:
| Path: (null)
| CPU: 3 PID: 469 Comm: hackbench Not tainted 4.20.0-00005-g96b18288a88e-dirty
|
| [ECR ]: 0x00000000 => Check Programmer's Manual
| [EFA ]: 0x00000000
| [BLINK ]: do_exit+0x4a6/0x7d0
| [ERET ]: _raw_write_unlock_irq+0x44/0x5c
- And while at it, remove the extar smp_mb() from EX based
arch_read_trylock() since the spin lock there guarantees a full
barrier anyways
- For LLSC case, hackbench threads improves with this patch (HAPS @ 50MHz)
---- before ----
|
| [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done
| Running with 10 groups 400 threads
| Time: 16.253
| Time: 16.445
| Time: 16.590
| Time: 16.721
| Time: 16.544
---- after ----
|
| [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done
| Running with 10 groups 400 threads
| Time: 15.638
| Time: 15.730
| Time: 15.870
| Time: 15.842
| Time: 15.729
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2019-03-08 06:29:59 +09:00
|
|
|
WRITE_ONCE(lock->slock, __ARCH_SPIN_LOCK_UNLOCKED__);
|
2015-07-14 21:25:05 +09:00
|
|
|
}
|
|
|
|
|
2015-07-16 14:01:45 +09:00
|
|
|
/*
|
|
|
|
* Read-write spinlocks, allowing multiple readers but only one writer.
|
|
|
|
* Unfair locking as Writers could be starved indefinitely by Reader(s)
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline void arch_read_lock(arch_rwlock_t *rw)
|
|
|
|
{
|
|
|
|
unsigned int val;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* zero means writer holds the lock exclusively, deny Reader.
|
|
|
|
* Otherwise grant lock to first/subseq reader
|
|
|
|
*
|
|
|
|
* if (rw->counter > 0) {
|
|
|
|
* rw->counter--;
|
|
|
|
* ret = 1;
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
|
|
|
|
__asm__ __volatile__(
|
|
|
|
"1: llock %[val], [%[rwlock]] \n"
|
|
|
|
" brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */
|
|
|
|
" sub %[val], %[val], 1 \n" /* reader lock */
|
|
|
|
" scond %[val], [%[rwlock]] \n"
|
|
|
|
" bnz 1b \n"
|
|
|
|
" \n"
|
|
|
|
: [val] "=&r" (val)
|
|
|
|
: [rwlock] "r" (&(rw->counter)),
|
|
|
|
[WR_LOCKED] "ir" (0)
|
|
|
|
: "memory", "cc");
|
|
|
|
|
|
|
|
smp_mb();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 1 - lock taken successfully */
|
|
|
|
static inline int arch_read_trylock(arch_rwlock_t *rw)
|
|
|
|
{
|
|
|
|
unsigned int val, got_it = 0;
|
|
|
|
|
|
|
|
__asm__ __volatile__(
|
|
|
|
"1: llock %[val], [%[rwlock]] \n"
|
|
|
|
" brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */
|
|
|
|
" sub %[val], %[val], 1 \n" /* counter-- */
|
|
|
|
" scond %[val], [%[rwlock]] \n"
|
|
|
|
" bnz 1b \n" /* retry if collided with someone */
|
|
|
|
" mov %[got_it], 1 \n"
|
|
|
|
" \n"
|
|
|
|
"4: ; --- done --- \n"
|
|
|
|
|
|
|
|
: [val] "=&r" (val),
|
|
|
|
[got_it] "+&r" (got_it)
|
|
|
|
: [rwlock] "r" (&(rw->counter)),
|
|
|
|
[WR_LOCKED] "ir" (0)
|
|
|
|
: "memory", "cc");
|
|
|
|
|
|
|
|
smp_mb();
|
|
|
|
|
|
|
|
return got_it;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void arch_write_lock(arch_rwlock_t *rw)
|
|
|
|
{
|
|
|
|
unsigned int val;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
|
|
|
|
* deny writer. Otherwise if unlocked grant to writer
|
|
|
|
* Hence the claim that Linux rwlocks are unfair to writers.
|
|
|
|
* (can be starved for an indefinite time by readers).
|
|
|
|
*
|
|
|
|
* if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
|
|
|
|
* rw->counter = 0;
|
|
|
|
* ret = 1;
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
|
|
|
|
__asm__ __volatile__(
|
|
|
|
"1: llock %[val], [%[rwlock]] \n"
|
|
|
|
" brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */
|
|
|
|
" mov %[val], %[WR_LOCKED] \n"
|
|
|
|
" scond %[val], [%[rwlock]] \n"
|
|
|
|
" bnz 1b \n"
|
|
|
|
" \n"
|
|
|
|
: [val] "=&r" (val)
|
|
|
|
: [rwlock] "r" (&(rw->counter)),
|
|
|
|
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
|
|
|
|
[WR_LOCKED] "ir" (0)
|
|
|
|
: "memory", "cc");
|
|
|
|
|
|
|
|
smp_mb();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 1 - lock taken successfully */
|
|
|
|
static inline int arch_write_trylock(arch_rwlock_t *rw)
|
|
|
|
{
|
|
|
|
unsigned int val, got_it = 0;
|
|
|
|
|
|
|
|
__asm__ __volatile__(
|
|
|
|
"1: llock %[val], [%[rwlock]] \n"
|
|
|
|
" brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */
|
|
|
|
" mov %[val], %[WR_LOCKED] \n"
|
|
|
|
" scond %[val], [%[rwlock]] \n"
|
|
|
|
" bnz 1b \n" /* retry if collided with someone */
|
|
|
|
" mov %[got_it], 1 \n"
|
|
|
|
" \n"
|
|
|
|
"4: ; --- done --- \n"
|
|
|
|
|
|
|
|
: [val] "=&r" (val),
|
|
|
|
[got_it] "+&r" (got_it)
|
|
|
|
: [rwlock] "r" (&(rw->counter)),
|
|
|
|
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
|
|
|
|
[WR_LOCKED] "ir" (0)
|
|
|
|
: "memory", "cc");
|
|
|
|
|
|
|
|
smp_mb();
|
|
|
|
|
|
|
|
return got_it;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void arch_read_unlock(arch_rwlock_t *rw)
|
|
|
|
{
|
|
|
|
unsigned int val;
|
|
|
|
|
|
|
|
smp_mb();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* rw->counter++;
|
|
|
|
*/
|
|
|
|
__asm__ __volatile__(
|
|
|
|
"1: llock %[val], [%[rwlock]] \n"
|
|
|
|
" add %[val], %[val], 1 \n"
|
|
|
|
" scond %[val], [%[rwlock]] \n"
|
|
|
|
" bnz 1b \n"
|
|
|
|
" \n"
|
|
|
|
: [val] "=&r" (val)
|
|
|
|
: [rwlock] "r" (&(rw->counter))
|
|
|
|
: "memory", "cc");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void arch_write_unlock(arch_rwlock_t *rw)
|
|
|
|
{
|
|
|
|
smp_mb();
|
|
|
|
|
ARCv2: spinlock: remove the extra smp_mb before lock, after unlock
- ARCv2 LLSC spinlocks have smp_mb() both before and after the LLSC
instructions, which is not required per lkmm ACQ/REL semantics.
smp_mb() is only needed _after_ lock and _before_ unlock.
So remove the extra barriers.
The reason they were there was mainly historical. At the time of
initial SMP Linux bringup on HS38 cores, I was too conservative,
given the fluidity of both hw and sw. The last attempt to ditch the
extra barrier showed some hackbench regression which is apparently
not the case now (atleast for LLSC case, read on...)
- EX based spinlocks (!CONFIG_ARC_HAS_LLSC) still needs the extra
smp_mb(), not due to lkmm, but due to some hardware shenanigans.
W/o that, hackbench triggers RCU stall splat so extra DMB is retained
!LLSC based systems are not realistic Linux sstem anyways so they can
afford to be a nit suboptimal ;-)
| [ARCLinux]# for i in (seq 1 1 5) ; do hackbench; done
| Running with 10 groups 400 process
| INFO: task hackbench:158 blocked for more than 10 seconds.
| Not tainted 4.20.0-00005-g96b18288a88e-dirty #117
| "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
| hackbench D 0 158 135 0x00000000
|
| Stack Trace:
| watchdog: BUG: soft lockup - CPU#3 stuck for 59s! [hackbench:469]
| Modules linked in:
| Path: (null)
| CPU: 3 PID: 469 Comm: hackbench Not tainted 4.20.0-00005-g96b18288a88e-dirty
|
| [ECR ]: 0x00000000 => Check Programmer's Manual
| [EFA ]: 0x00000000
| [BLINK ]: do_exit+0x4a6/0x7d0
| [ERET ]: _raw_write_unlock_irq+0x44/0x5c
- And while at it, remove the extar smp_mb() from EX based
arch_read_trylock() since the spin lock there guarantees a full
barrier anyways
- For LLSC case, hackbench threads improves with this patch (HAPS @ 50MHz)
---- before ----
|
| [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done
| Running with 10 groups 400 threads
| Time: 16.253
| Time: 16.445
| Time: 16.590
| Time: 16.721
| Time: 16.544
---- after ----
|
| [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done
| Running with 10 groups 400 threads
| Time: 15.638
| Time: 15.730
| Time: 15.870
| Time: 15.842
| Time: 15.729
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2019-03-08 06:29:59 +09:00
|
|
|
WRITE_ONCE(rw->counter, __ARCH_RW_LOCK_UNLOCKED__);
|
2015-07-16 14:01:45 +09:00
|
|
|
}
|
|
|
|
|
2015-07-14 21:25:05 +09:00
|
|
|
#else /* !CONFIG_ARC_HAS_LLSC */
|
|
|
|
|
2013-01-18 18:42:18 +09:00
|
|
|
static inline void arch_spin_lock(arch_spinlock_t *lock)
|
|
|
|
{
|
2015-07-14 21:25:05 +09:00
|
|
|
unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
|
2013-01-18 18:42:18 +09:00
|
|
|
|
2014-11-20 19:12:09 +09:00
|
|
|
/*
|
ARCv2: spinlock: remove the extra smp_mb before lock, after unlock
- ARCv2 LLSC spinlocks have smp_mb() both before and after the LLSC
instructions, which is not required per lkmm ACQ/REL semantics.
smp_mb() is only needed _after_ lock and _before_ unlock.
So remove the extra barriers.
The reason they were there was mainly historical. At the time of
initial SMP Linux bringup on HS38 cores, I was too conservative,
given the fluidity of both hw and sw. The last attempt to ditch the
extra barrier showed some hackbench regression which is apparently
not the case now (atleast for LLSC case, read on...)
- EX based spinlocks (!CONFIG_ARC_HAS_LLSC) still needs the extra
smp_mb(), not due to lkmm, but due to some hardware shenanigans.
W/o that, hackbench triggers RCU stall splat so extra DMB is retained
!LLSC based systems are not realistic Linux sstem anyways so they can
afford to be a nit suboptimal ;-)
| [ARCLinux]# for i in (seq 1 1 5) ; do hackbench; done
| Running with 10 groups 400 process
| INFO: task hackbench:158 blocked for more than 10 seconds.
| Not tainted 4.20.0-00005-g96b18288a88e-dirty #117
| "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
| hackbench D 0 158 135 0x00000000
|
| Stack Trace:
| watchdog: BUG: soft lockup - CPU#3 stuck for 59s! [hackbench:469]
| Modules linked in:
| Path: (null)
| CPU: 3 PID: 469 Comm: hackbench Not tainted 4.20.0-00005-g96b18288a88e-dirty
|
| [ECR ]: 0x00000000 => Check Programmer's Manual
| [EFA ]: 0x00000000
| [BLINK ]: do_exit+0x4a6/0x7d0
| [ERET ]: _raw_write_unlock_irq+0x44/0x5c
- And while at it, remove the extar smp_mb() from EX based
arch_read_trylock() since the spin lock there guarantees a full
barrier anyways
- For LLSC case, hackbench threads improves with this patch (HAPS @ 50MHz)
---- before ----
|
| [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done
| Running with 10 groups 400 threads
| Time: 16.253
| Time: 16.445
| Time: 16.590
| Time: 16.721
| Time: 16.544
---- after ----
|
| [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done
| Running with 10 groups 400 threads
| Time: 15.638
| Time: 15.730
| Time: 15.870
| Time: 15.842
| Time: 15.729
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2019-03-08 06:29:59 +09:00
|
|
|
* Per lkmm, smp_mb() is only required after _lock (and before_unlock)
|
|
|
|
* for ACQ and REL semantics respectively. However EX based spinlocks
|
|
|
|
* need the extra smp_mb to workaround a hardware quirk.
|
2014-11-20 19:12:09 +09:00
|
|
|
*/
|
|
|
|
smp_mb();
|
|
|
|
|
2013-01-18 18:42:18 +09:00
|
|
|
__asm__ __volatile__(
|
|
|
|
"1: ex %0, [%1] \n"
|
2017-05-28 15:52:05 +09:00
|
|
|
#ifdef CONFIG_EZNPS_MTM_EXT
|
|
|
|
" .word %3 \n"
|
|
|
|
#endif
|
2013-01-18 18:42:18 +09:00
|
|
|
" breq %0, %2, 1b \n"
|
2015-07-14 21:25:05 +09:00
|
|
|
: "+&r" (val)
|
2013-01-18 18:42:18 +09:00
|
|
|
: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
|
2017-05-28 15:52:05 +09:00
|
|
|
#ifdef CONFIG_EZNPS_MTM_EXT
|
|
|
|
, "i"(CTOP_INST_SCHD_RW)
|
|
|
|
#endif
|
2013-01-18 18:42:18 +09:00
|
|
|
: "memory");
|
2014-11-20 19:12:09 +09:00
|
|
|
|
|
|
|
smp_mb();
|
2013-01-18 18:42:18 +09:00
|
|
|
}
|
|
|
|
|
2015-07-14 21:25:05 +09:00
|
|
|
/* 1 - lock taken successfully */
|
2013-01-18 18:42:18 +09:00
|
|
|
static inline int arch_spin_trylock(arch_spinlock_t *lock)
|
|
|
|
{
|
2015-07-14 21:25:05 +09:00
|
|
|
unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
|
2013-01-18 18:42:18 +09:00
|
|
|
|
2014-11-20 19:12:09 +09:00
|
|
|
smp_mb();
|
|
|
|
|
2013-01-18 18:42:18 +09:00
|
|
|
__asm__ __volatile__(
|
|
|
|
"1: ex %0, [%1] \n"
|
2015-07-14 21:25:05 +09:00
|
|
|
: "+r" (val)
|
2013-01-18 18:42:18 +09:00
|
|
|
: "r"(&(lock->slock))
|
|
|
|
: "memory");
|
|
|
|
|
2014-11-20 19:12:09 +09:00
|
|
|
smp_mb();
|
|
|
|
|
2015-07-14 21:25:05 +09:00
|
|
|
return (val == __ARCH_SPIN_LOCK_UNLOCKED__);
|
2013-01-18 18:42:18 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void arch_spin_unlock(arch_spinlock_t *lock)
|
|
|
|
{
|
2015-07-14 21:25:05 +09:00
|
|
|
unsigned int val = __ARCH_SPIN_LOCK_UNLOCKED__;
|
ARC: Workaround spinlock livelock in SMP SystemC simulation
Some ARC SMP systems lack native atomic R-M-W (LLOCK/SCOND) insns and
can only use atomic EX insn (reg with mem) to build higher level R-M-W
primitives. This includes a SystemC based SMP simulation model.
So rwlocks need to use a protecting spinlock for atomic cmp-n-exchange
operation to update reader(s)/writer count.
The spinlock operation itself looks as follows:
mov reg, 1 ; 1=locked, 0=unlocked
retry:
EX reg, [lock] ; load existing, store 1, atomically
BREQ reg, 1, rety ; if already locked, retry
In single-threaded simulation, SystemC alternates between the 2 cores
with "N" insn each based scheduling. Additionally for insn with global
side effect, such as EX writing to shared mem, a core switch is
enforced too.
Given that, 2 cores doing a repeated EX on same location, Linux often
got into a livelock e.g. when both cores were fiddling with tasklist
lock (gdbserver / hackbench) for read/write respectively as the
sequence diagram below shows:
core1 core2
-------- --------
1. spin lock [EX r=0, w=1] - LOCKED
2. rwlock(Read) - LOCKED
3. spin unlock [ST 0] - UNLOCKED
spin lock [EX r=0,w=1] - LOCKED
-- resched core 1----
5. spin lock [EX r=1] - ALREADY-LOCKED
-- resched core 2----
6. rwlock(Write) - READER-LOCKED
7. spin unlock [ST 0]
8. rwlock failed, retry again
9. spin lock [EX r=0, w=1]
-- resched core 1----
10 spinlock locked in #9, retry #5
11. spin lock [EX gets 1]
-- resched core 2----
...
...
The fix was to unlock using the EX insn too (step 7), to trigger another
SystemC scheduling pass which would let core1 proceed, eliding the
livelock.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2013-09-25 20:23:32 +09:00
|
|
|
|
2014-11-20 19:12:09 +09:00
|
|
|
/*
|
|
|
|
* RELEASE barrier: given the instructions avail on ARCv2, full barrier
|
|
|
|
* is the only option
|
|
|
|
*/
|
|
|
|
smp_mb();
|
|
|
|
|
2015-10-17 18:59:59 +09:00
|
|
|
/*
|
|
|
|
* EX is not really required here, a simple STore of 0 suffices.
|
|
|
|
* However this causes tasklist livelocks in SystemC based SMP virtual
|
|
|
|
* platforms where the systemc core scheduler uses EX as a cue for
|
|
|
|
* moving to next core. Do a git log of this file for details
|
|
|
|
*/
|
ARC: Workaround spinlock livelock in SMP SystemC simulation
Some ARC SMP systems lack native atomic R-M-W (LLOCK/SCOND) insns and
can only use atomic EX insn (reg with mem) to build higher level R-M-W
primitives. This includes a SystemC based SMP simulation model.
So rwlocks need to use a protecting spinlock for atomic cmp-n-exchange
operation to update reader(s)/writer count.
The spinlock operation itself looks as follows:
mov reg, 1 ; 1=locked, 0=unlocked
retry:
EX reg, [lock] ; load existing, store 1, atomically
BREQ reg, 1, rety ; if already locked, retry
In single-threaded simulation, SystemC alternates between the 2 cores
with "N" insn each based scheduling. Additionally for insn with global
side effect, such as EX writing to shared mem, a core switch is
enforced too.
Given that, 2 cores doing a repeated EX on same location, Linux often
got into a livelock e.g. when both cores were fiddling with tasklist
lock (gdbserver / hackbench) for read/write respectively as the
sequence diagram below shows:
core1 core2
-------- --------
1. spin lock [EX r=0, w=1] - LOCKED
2. rwlock(Read) - LOCKED
3. spin unlock [ST 0] - UNLOCKED
spin lock [EX r=0,w=1] - LOCKED
-- resched core 1----
5. spin lock [EX r=1] - ALREADY-LOCKED
-- resched core 2----
6. rwlock(Write) - READER-LOCKED
7. spin unlock [ST 0]
8. rwlock failed, retry again
9. spin lock [EX r=0, w=1]
-- resched core 1----
10 spinlock locked in #9, retry #5
11. spin lock [EX gets 1]
-- resched core 2----
...
...
The fix was to unlock using the EX insn too (step 7), to trigger another
SystemC scheduling pass which would let core1 proceed, eliding the
livelock.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2013-09-25 20:23:32 +09:00
|
|
|
__asm__ __volatile__(
|
|
|
|
" ex %0, [%1] \n"
|
2015-07-14 21:25:05 +09:00
|
|
|
: "+r" (val)
|
ARC: Workaround spinlock livelock in SMP SystemC simulation
Some ARC SMP systems lack native atomic R-M-W (LLOCK/SCOND) insns and
can only use atomic EX insn (reg with mem) to build higher level R-M-W
primitives. This includes a SystemC based SMP simulation model.
So rwlocks need to use a protecting spinlock for atomic cmp-n-exchange
operation to update reader(s)/writer count.
The spinlock operation itself looks as follows:
mov reg, 1 ; 1=locked, 0=unlocked
retry:
EX reg, [lock] ; load existing, store 1, atomically
BREQ reg, 1, rety ; if already locked, retry
In single-threaded simulation, SystemC alternates between the 2 cores
with "N" insn each based scheduling. Additionally for insn with global
side effect, such as EX writing to shared mem, a core switch is
enforced too.
Given that, 2 cores doing a repeated EX on same location, Linux often
got into a livelock e.g. when both cores were fiddling with tasklist
lock (gdbserver / hackbench) for read/write respectively as the
sequence diagram below shows:
core1 core2
-------- --------
1. spin lock [EX r=0, w=1] - LOCKED
2. rwlock(Read) - LOCKED
3. spin unlock [ST 0] - UNLOCKED
spin lock [EX r=0,w=1] - LOCKED
-- resched core 1----
5. spin lock [EX r=1] - ALREADY-LOCKED
-- resched core 2----
6. rwlock(Write) - READER-LOCKED
7. spin unlock [ST 0]
8. rwlock failed, retry again
9. spin lock [EX r=0, w=1]
-- resched core 1----
10 spinlock locked in #9, retry #5
11. spin lock [EX gets 1]
-- resched core 2----
...
...
The fix was to unlock using the EX insn too (step 7), to trigger another
SystemC scheduling pass which would let core1 proceed, eliding the
livelock.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2013-09-25 20:23:32 +09:00
|
|
|
: "r"(&(lock->slock))
|
|
|
|
: "memory");
|
|
|
|
|
2014-11-20 19:12:09 +09:00
|
|
|
/*
|
ARCv2: spinlock: remove the extra smp_mb before lock, after unlock
- ARCv2 LLSC spinlocks have smp_mb() both before and after the LLSC
instructions, which is not required per lkmm ACQ/REL semantics.
smp_mb() is only needed _after_ lock and _before_ unlock.
So remove the extra barriers.
The reason they were there was mainly historical. At the time of
initial SMP Linux bringup on HS38 cores, I was too conservative,
given the fluidity of both hw and sw. The last attempt to ditch the
extra barrier showed some hackbench regression which is apparently
not the case now (atleast for LLSC case, read on...)
- EX based spinlocks (!CONFIG_ARC_HAS_LLSC) still needs the extra
smp_mb(), not due to lkmm, but due to some hardware shenanigans.
W/o that, hackbench triggers RCU stall splat so extra DMB is retained
!LLSC based systems are not realistic Linux sstem anyways so they can
afford to be a nit suboptimal ;-)
| [ARCLinux]# for i in (seq 1 1 5) ; do hackbench; done
| Running with 10 groups 400 process
| INFO: task hackbench:158 blocked for more than 10 seconds.
| Not tainted 4.20.0-00005-g96b18288a88e-dirty #117
| "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
| hackbench D 0 158 135 0x00000000
|
| Stack Trace:
| watchdog: BUG: soft lockup - CPU#3 stuck for 59s! [hackbench:469]
| Modules linked in:
| Path: (null)
| CPU: 3 PID: 469 Comm: hackbench Not tainted 4.20.0-00005-g96b18288a88e-dirty
|
| [ECR ]: 0x00000000 => Check Programmer's Manual
| [EFA ]: 0x00000000
| [BLINK ]: do_exit+0x4a6/0x7d0
| [ERET ]: _raw_write_unlock_irq+0x44/0x5c
- And while at it, remove the extar smp_mb() from EX based
arch_read_trylock() since the spin lock there guarantees a full
barrier anyways
- For LLSC case, hackbench threads improves with this patch (HAPS @ 50MHz)
---- before ----
|
| [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done
| Running with 10 groups 400 threads
| Time: 16.253
| Time: 16.445
| Time: 16.590
| Time: 16.721
| Time: 16.544
---- after ----
|
| [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done
| Running with 10 groups 400 threads
| Time: 15.638
| Time: 15.730
| Time: 15.870
| Time: 15.842
| Time: 15.729
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2019-03-08 06:29:59 +09:00
|
|
|
* see pairing version/comment in arch_spin_lock above
|
2014-11-20 19:12:09 +09:00
|
|
|
*/
|
2013-01-18 18:42:18 +09:00
|
|
|
smp_mb();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read-write spinlocks, allowing multiple readers but only one writer.
|
2015-07-16 14:01:45 +09:00
|
|
|
* Unfair locking as Writers could be starved indefinitely by Reader(s)
|
2013-01-18 18:42:18 +09:00
|
|
|
*
|
|
|
|
* The spinlock itself is contained in @counter and access to it is
|
|
|
|
* serialized with @lock_mutex.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* 1 - lock taken successfully */
|
|
|
|
static inline int arch_read_trylock(arch_rwlock_t *rw)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
2015-06-09 20:05:50 +09:00
|
|
|
unsigned long flags;
|
2013-01-18 18:42:18 +09:00
|
|
|
|
2015-06-09 20:05:50 +09:00
|
|
|
local_irq_save(flags);
|
2013-01-18 18:42:18 +09:00
|
|
|
arch_spin_lock(&(rw->lock_mutex));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* zero means writer holds the lock exclusively, deny Reader.
|
|
|
|
* Otherwise grant lock to first/subseq reader
|
|
|
|
*/
|
|
|
|
if (rw->counter > 0) {
|
|
|
|
rw->counter--;
|
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
arch_spin_unlock(&(rw->lock_mutex));
|
2015-06-09 20:05:50 +09:00
|
|
|
local_irq_restore(flags);
|
2013-01-18 18:42:18 +09:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* 1 - lock taken successfully */
|
|
|
|
static inline int arch_write_trylock(arch_rwlock_t *rw)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
2015-06-09 20:05:50 +09:00
|
|
|
unsigned long flags;
|
2013-01-18 18:42:18 +09:00
|
|
|
|
2015-06-09 20:05:50 +09:00
|
|
|
local_irq_save(flags);
|
2013-01-18 18:42:18 +09:00
|
|
|
arch_spin_lock(&(rw->lock_mutex));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
|
|
|
|
* deny writer. Otherwise if unlocked grant to writer
|
|
|
|
* Hence the claim that Linux rwlocks are unfair to writers.
|
|
|
|
* (can be starved for an indefinite time by readers).
|
|
|
|
*/
|
|
|
|
if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
|
|
|
|
rw->counter = 0;
|
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
arch_spin_unlock(&(rw->lock_mutex));
|
2015-06-09 20:05:50 +09:00
|
|
|
local_irq_restore(flags);
|
2013-01-18 18:42:18 +09:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void arch_read_lock(arch_rwlock_t *rw)
|
|
|
|
{
|
|
|
|
while (!arch_read_trylock(rw))
|
|
|
|
cpu_relax();
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void arch_write_lock(arch_rwlock_t *rw)
|
|
|
|
{
|
|
|
|
while (!arch_write_trylock(rw))
|
|
|
|
cpu_relax();
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void arch_read_unlock(arch_rwlock_t *rw)
|
|
|
|
{
|
2015-06-09 20:05:50 +09:00
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
local_irq_save(flags);
|
2013-01-18 18:42:18 +09:00
|
|
|
arch_spin_lock(&(rw->lock_mutex));
|
|
|
|
rw->counter++;
|
|
|
|
arch_spin_unlock(&(rw->lock_mutex));
|
2015-06-09 20:05:50 +09:00
|
|
|
local_irq_restore(flags);
|
2013-01-18 18:42:18 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void arch_write_unlock(arch_rwlock_t *rw)
|
|
|
|
{
|
2015-06-09 20:05:50 +09:00
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
local_irq_save(flags);
|
2013-01-18 18:42:18 +09:00
|
|
|
arch_spin_lock(&(rw->lock_mutex));
|
|
|
|
rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
|
|
|
|
arch_spin_unlock(&(rw->lock_mutex));
|
2015-06-09 20:05:50 +09:00
|
|
|
local_irq_restore(flags);
|
2013-01-18 18:42:18 +09:00
|
|
|
}
|
|
|
|
|
2015-07-16 14:01:45 +09:00
|
|
|
#endif
|
|
|
|
|
2013-01-18 18:42:18 +09:00
|
|
|
#endif /* __ASM_SPINLOCK_H */
|