Merge LKMM and RCU commits

This commit is contained in:
Paul E. McKenney 2019-06-24 09:12:39 -07:00
commit b989ff0705
49 changed files with 752 additions and 447 deletions

View File

@ -12,6 +12,7 @@ please read on.
Reference counting on elements of lists which are protected by traditional
reader/writer spinlocks or semaphores are straightforward:
CODE LISTING A:
1. 2.
add() search_and_reference()
{ {
@ -28,7 +29,8 @@ add() search_and_reference()
release_referenced() delete()
{ {
... write_lock(&list_lock);
atomic_dec(&el->rc, relfunc) ...
if(atomic_dec_and_test(&el->rc)) ...
kfree(el);
... remove_element
} write_unlock(&list_lock);
...
@ -44,6 +46,7 @@ search_and_reference() could potentially hold reference to an element which
has already been deleted from the list/array. Use atomic_inc_not_zero()
in this scenario as follows:
CODE LISTING B:
1. 2.
add() search_and_reference()
{ {
@ -79,6 +82,7 @@ search_and_reference() code path. In such cases, the
atomic_dec_and_test() may be moved from delete() to el_free()
as follows:
CODE LISTING C:
1. 2.
add() search_and_reference()
{ {
@ -114,6 +118,17 @@ element can therefore safely be freed. This in turn guarantees that if
any reader finds the element, that reader may safely acquire a reference
without checking the value of the reference counter.
A clear advantage of the RCU-based pattern in listing C over the one
in listing B is that any call to search_and_reference() that locates
a given object will succeed in obtaining a reference to that object,
even given a concurrent invocation of delete() for that same object.
Similarly, a clear advantage of both listings B and C over listing A is
that a call to delete() is not delayed even if there are an arbitrarily
large number of calls to search_and_reference() searching for the same
object that delete() was invoked on. Instead, all that is delayed is
the eventual invocation of kfree(), which is usually not a problem on
modern computer systems, even the small ones.
In cases where delete() can sleep, synchronize_rcu() can be called from
delete(), so that el_free() can be subsumed into delete as follows:
@ -130,3 +145,7 @@ delete()
kfree(el);
...
}
As additional examples in the kernel, the pattern in listing C is used by
reference counting of struct pid, while the pattern in listing B is used by
struct posix_acl.

View File

@ -153,7 +153,7 @@ rcupdate.rcu_task_stall_timeout
This boot/sysfs parameter controls the RCU-tasks stall warning
interval. A value of zero or less suppresses RCU-tasks stall
warnings. A positive value sets the stall-warning interval
in jiffies. An RCU-tasks stall warning starts with the line:
in seconds. An RCU-tasks stall warning starts with the line:
INFO: rcu_tasks detected stalls on tasks:

View File

@ -212,7 +212,7 @@ synchronize_rcu()
rcu_assign_pointer()
typeof(p) rcu_assign_pointer(p, typeof(p) v);
void rcu_assign_pointer(p, typeof(p) v);
Yes, rcu_assign_pointer() -is- implemented as a macro, though it
would be cool to be able to declare a function in this manner.
@ -220,9 +220,9 @@ rcu_assign_pointer()
The updater uses this function to assign a new value to an
RCU-protected pointer, in order to safely communicate the change
in value from the updater to the reader. This function returns
the new value, and also executes any memory-barrier instructions
required for a given CPU architecture.
in value from the updater to the reader. This macro does not
evaluate to an rvalue, but it does execute any memory-barrier
instructions required for a given CPU architecture.
Perhaps just as important, it serves to document (1) which
pointers are protected by RCU and (2) the point at which a

View File

@ -3752,6 +3752,12 @@
the propagation of recent CPU-hotplug changes up
the rcu_node combining tree.
rcutree.use_softirq= [KNL]
If set to zero, move all RCU_SOFTIRQ processing to
per-CPU rcuc kthreads. Defaults to a non-zero
value, meaning that RCU_SOFTIRQ is used by default.
Specify rcutree.use_softirq=0 to use rcuc kthreads.
rcutree.rcu_fanout_exact= [KNL]
Disable autobalancing of the rcu_node combining
tree. This is used by rcutorture, and might

View File

@ -3,7 +3,7 @@ Circular Buffers
================
:Author: David Howells <dhowells@redhat.com>
:Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
:Author: Paul E. McKenney <paulmck@linux.ibm.com>
Linux provides a number of features that can be used to implement circular

View File

@ -3,7 +3,7 @@
============================
By: David Howells <dhowells@redhat.com>
Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Paul E. McKenney <paulmck@linux.ibm.com>
Will Deacon <will.deacon@arm.com>
Peter Zijlstra <peterz@infradead.org>

View File

@ -24,7 +24,7 @@ Documentation/memory-barriers.txt
=========================
저자: David Howells <dhowells@redhat.com>
Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Paul E. McKenney <paulmck@linux.ibm.com>
Will Deacon <will.deacon@arm.com>
Peter Zijlstra <peterz@infradead.org>

View File

@ -632,11 +632,18 @@ do { \
"IRQs not disabled as expected\n"); \
} while (0)
#define lockdep_assert_in_irq() do { \
WARN_ONCE(debug_locks && !current->lockdep_recursion && \
!current->hardirq_context, \
"Not in hardirq as expected\n"); \
} while (0)
#else
# define might_lock(lock) do { } while (0)
# define might_lock_read(lock) do { } while (0)
# define lockdep_assert_irqs_enabled() do { } while (0)
# define lockdep_assert_irqs_disabled() do { } while (0)
# define lockdep_assert_in_irq() do { } while (0)
#endif
#ifdef CONFIG_LOCKDEP

View File

@ -21,6 +21,7 @@
#include <linux/rbtree_latch.h>
#include <linux/error-injection.h>
#include <linux/tracepoint-defs.h>
#include <linux/srcu.h>
#include <linux/percpu.h>
#include <asm/module.h>
@ -450,6 +451,10 @@ struct module {
unsigned int num_tracepoints;
tracepoint_ptr_t *tracepoints_ptrs;
#endif
#ifdef CONFIG_TREE_SRCU
unsigned int num_srcu_structs;
struct srcu_struct **srcu_struct_ptrs;
#endif
#ifdef CONFIG_BPF_EVENTS
unsigned int num_bpf_raw_events;
struct bpf_raw_event_map *bpf_raw_events;

View File

@ -17,14 +17,18 @@ struct percpu_rw_semaphore {
int readers_block;
};
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
#define __DEFINE_PERCPU_RWSEM(name, is_static) \
static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
static struct percpu_rw_semaphore name = { \
.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \
is_static struct percpu_rw_semaphore name = { \
.rss = __RCU_SYNC_INITIALIZER(name.rss), \
.read_count = &__percpu_rwsem_rc_##name, \
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
.writer = __RCUWAIT_INITIALIZER(name.writer), \
}
#define DEFINE_PERCPU_RWSEM(name) \
__DEFINE_PERCPU_RWSEM(name, /* not static */)
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
__DEFINE_PERCPU_RWSEM(name, static)
extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
extern void __percpu_up_read(struct percpu_rw_semaphore *);

View File

@ -13,62 +13,44 @@
#include <linux/wait.h>
#include <linux/rcupdate.h>
enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
/* Structure to mediate between updaters and fastpath-using readers. */
struct rcu_sync {
int gp_state;
int gp_count;
wait_queue_head_t gp_wait;
int cb_state;
struct rcu_head cb_head;
enum rcu_sync_type gp_type;
};
extern void rcu_sync_lockdep_assert(struct rcu_sync *);
/**
* rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
* @rsp: Pointer to rcu_sync structure to use for synchronization
*
* Returns true if readers are permitted to use their fastpaths.
* Must be invoked within an RCU read-side critical section whose
* flavor matches that of the rcu_sync struture.
* Returns true if readers are permitted to use their fastpaths. Must be
* invoked within some flavor of RCU read-side critical section.
*/
static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
{
#ifdef CONFIG_PROVE_RCU
rcu_sync_lockdep_assert(rsp);
#endif
return !rsp->gp_state; /* GP_IDLE */
RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
!rcu_read_lock_bh_held() &&
!rcu_read_lock_sched_held(),
"suspicious rcu_sync_is_idle() usage");
return !READ_ONCE(rsp->gp_state); /* GP_IDLE */
}
extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
extern void rcu_sync_init(struct rcu_sync *);
extern void rcu_sync_enter_start(struct rcu_sync *);
extern void rcu_sync_enter(struct rcu_sync *);
extern void rcu_sync_exit(struct rcu_sync *);
extern void rcu_sync_dtor(struct rcu_sync *);
#define __RCU_SYNC_INITIALIZER(name, type) { \
#define __RCU_SYNC_INITIALIZER(name) { \
.gp_state = 0, \
.gp_count = 0, \
.gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \
.cb_state = 0, \
.gp_type = type, \
}
#define __DEFINE_RCU_SYNC(name, type) \
struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type)
#define DEFINE_RCU_SYNC(name) \
__DEFINE_RCU_SYNC(name, RCU_SYNC)
#define DEFINE_RCU_SCHED_SYNC(name) \
__DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC)
#define DEFINE_RCU_BH_SYNC(name) \
__DEFINE_RCU_SYNC(name, RCU_BH_SYNC)
#define DEFINE_RCU_SYNC(name) \
struct rcu_sync name = __RCU_SYNC_INITIALIZER(name)
#endif /* _LINUX_RCU_SYNC_H_ */

View File

@ -367,16 +367,15 @@ static inline void rcu_preempt_sleep_check(void) { }
* other macros that it invokes.
*/
#define rcu_assign_pointer(p, v) \
({ \
do { \
uintptr_t _r_a_p__v = (uintptr_t)(v); \
rcu_check_sparse(p, __rcu); \
rcu_check_sparse(p, __rcu); \
\
if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \
WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \
else \
smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \
_r_a_p__v; \
})
} while (0)
/**
* rcu_swap_protected() - swap an RCU and a regular pointer
@ -588,7 +587,7 @@ static inline void rcu_preempt_sleep_check(void) { }
* read-side critical sections may be preempted and they may also block, but
* only when acquiring spinlocks that are subject to priority inheritance.
*/
static inline void rcu_read_lock(void)
static __always_inline void rcu_read_lock(void)
{
__rcu_read_lock();
__acquire(RCU);
@ -805,7 +804,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
/**
* kfree_rcu() - kfree an object after a grace period.
* @ptr: pointer to kfree
* @rcu_head: the name of the struct rcu_head within the type of @ptr.
* @rhf: the name of the struct rcu_head within the type of @ptr.
*
* Many rcu callbacks functions just call kfree() on the base structure.
* These functions are trivial, but their size adds up, and furthermore
@ -828,9 +827,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* The BUILD_BUG_ON check must not involve any function calls, hence the
* checks are done in macros here.
*/
#define kfree_rcu(ptr, rcu_head) \
__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
#define kfree_rcu(ptr, rhf) \
do { \
typeof (ptr) ___p = (ptr); \
\
if (___p) \
__kfree_rcu(&((___p)->rhf), offsetof(typeof(*(ptr)), rhf)); \
} while (0)
/*
* Place this after a lock-acquisition primitive to guarantee that

View File

@ -565,7 +565,7 @@ union rcu_special {
u8 blocked;
u8 need_qs;
u8 exp_hint; /* Hint for performance. */
u8 pad; /* No garbage from compiler! */
u8 deferred_qs;
} b; /* Bits. */
u32 s; /* Set of bits. */
};

View File

@ -120,9 +120,17 @@ struct srcu_struct {
*
* See include/linux/percpu-defs.h for the rules on per-CPU variables.
*/
#define __DEFINE_SRCU(name, is_static) \
static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\
is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_data)
#ifdef MODULE
# define __DEFINE_SRCU(name, is_static) \
is_static struct srcu_struct name; \
struct srcu_struct * const __srcu_struct_##name \
__section("___srcu_struct_ptrs") = &name
#else
# define __DEFINE_SRCU(name, is_static) \
static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \
is_static struct srcu_struct name = \
__SRCU_STRUCT_INIT(name, name##_srcu_data)
#endif
#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)

View File

@ -66,7 +66,7 @@ int torture_shutdown_init(int ssecs, void (*cleanup)(void));
/* Task stuttering, which forces load/no-load transitions. */
bool stutter_wait(const char *title);
int torture_stutter_init(int s);
int torture_stutter_init(int s, int sgap);
/* Initialization and cleanup. */
bool torture_init_begin(char *ttype, int v);

View File

@ -101,7 +101,7 @@ static DEFINE_SPINLOCK(cgroup_idr_lock);
*/
static DEFINE_SPINLOCK(cgroup_file_kn_lock);
struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
#define cgroup_assert_mutex_or_rcu_locked() \
RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
@ -5616,7 +5616,6 @@ int __init cgroup_init(void)
int ssid;
BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));

View File

@ -46,7 +46,7 @@ static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
static struct percpu_rw_semaphore dup_mmap_sem;
DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem);
/* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0
@ -2302,7 +2302,5 @@ void __init uprobes_init(void)
for (i = 0; i < UPROBES_HASH_SZ; i++)
mutex_init(&uprobes_mmap_mutex[i]);
BUG_ON(percpu_init_rwsem(&dup_mmap_sem));
BUG_ON(register_die_notifier(&uprobe_exception_nb));
}

View File

@ -975,7 +975,7 @@ static int __init lock_torture_init(void)
goto unwind;
}
if (stutter > 0) {
firsterr = torture_stutter_init(stutter);
firsterr = torture_stutter_init(stutter, stutter);
if (firsterr)
goto unwind;
}

View File

@ -17,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
return -ENOMEM;
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
rcu_sync_init(&sem->rss);
__init_rwsem(&sem->rw_sem, name, rwsem_key);
rcuwait_init(&sem->writer);
sem->readers_block = 0;

View File

@ -3095,6 +3095,11 @@ static int find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->tracepoints_ptrs),
&mod->num_tracepoints);
#endif
#ifdef CONFIG_TREE_SRCU
mod->srcu_struct_ptrs = section_objs(info, "___srcu_struct_ptrs",
sizeof(*mod->srcu_struct_ptrs),
&mod->num_srcu_structs);
#endif
#ifdef CONFIG_BPF_EVENTS
mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map",
sizeof(*mod->bpf_raw_events),

View File

@ -446,6 +446,7 @@ void rcu_request_urgent_qs_task(struct task_struct *t);
enum rcutorture_type {
RCU_FLAVOR,
RCU_TASKS_FLAVOR,
RCU_TRIVIAL_FLAVOR,
SRCU_FLAVOR,
INVALID_RCU_FLAVOR
};
@ -479,6 +480,10 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
#endif
#endif
#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask);
#endif
#ifdef CONFIG_TINY_SRCU
static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,

View File

@ -299,6 +299,7 @@ struct rcu_torture_ops {
int irq_capable;
int can_boost;
int extendables;
int slow_gps;
const char *name;
};
@ -667,9 +668,51 @@ static struct rcu_torture_ops tasks_ops = {
.fqs = NULL,
.stats = NULL,
.irq_capable = 1,
.slow_gps = 1,
.name = "tasks"
};
/*
* Definitions for trivial CONFIG_PREEMPT=n-only torture testing.
* This implementation does not necessarily work well with CPU hotplug.
*/
static void synchronize_rcu_trivial(void)
{
int cpu;
for_each_online_cpu(cpu) {
rcutorture_sched_setaffinity(current->pid, cpumask_of(cpu));
WARN_ON_ONCE(raw_smp_processor_id() != cpu);
}
}
static int rcu_torture_read_lock_trivial(void) __acquires(RCU)
{
preempt_disable();
return 0;
}
static void rcu_torture_read_unlock_trivial(int idx) __releases(RCU)
{
preempt_enable();
}
static struct rcu_torture_ops trivial_ops = {
.ttype = RCU_TRIVIAL_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = rcu_torture_read_lock_trivial,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_torture_read_unlock_trivial,
.get_gp_seq = rcu_no_completed,
.sync = synchronize_rcu_trivial,
.exp_sync = synchronize_rcu_trivial,
.fqs = NULL,
.stats = NULL,
.irq_capable = 1,
.name = "trivial"
};
static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
{
if (!cur_ops->gp_diff)
@ -1010,10 +1053,17 @@ rcu_torture_writer(void *arg)
!rcu_gp_is_normal();
}
rcu_torture_writer_state = RTWS_STUTTER;
if (stutter_wait("rcu_torture_writer"))
if (stutter_wait("rcu_torture_writer") &&
!READ_ONCE(rcu_fwd_cb_nodelay) &&
!cur_ops->slow_gps &&
!torture_must_stop())
for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
if (list_empty(&rcu_tortures[i].rtort_free))
WARN_ON_ONCE(1);
if (list_empty(&rcu_tortures[i].rtort_free) &&
rcu_access_pointer(rcu_torture_current) !=
&rcu_tortures[i]) {
rcu_ftrace_dump(DUMP_ALL);
WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
}
} while (!torture_must_stop());
/* Reset expediting back to unexpedited. */
if (expediting > 0)
@ -1358,8 +1408,9 @@ rcu_torture_stats_print(void)
}
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
pr_cont("rtc: %p %s: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
rcu_torture_current,
rcu_torture_current ? "ver" : "VER",
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
atomic_read(&n_rcu_torture_alloc),
@ -1661,6 +1712,17 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp)
spin_unlock_irqrestore(&rcu_fwd_lock, flags);
}
// Give the scheduler a chance, even on nohz_full CPUs.
static void rcu_torture_fwd_prog_cond_resched(void)
{
if (IS_ENABLED(CONFIG_PREEMPT) && IS_ENABLED(CONFIG_NO_HZ_FULL)) {
if (need_resched())
schedule();
} else {
cond_resched();
}
}
/*
* Free all callbacks on the rcu_fwd_cb_head list, either because the
* test is over or because we hit an OOM event.
@ -1674,16 +1736,18 @@ static unsigned long rcu_torture_fwd_prog_cbfree(void)
for (;;) {
spin_lock_irqsave(&rcu_fwd_lock, flags);
rfcp = rcu_fwd_cb_head;
if (!rfcp)
if (!rfcp) {
spin_unlock_irqrestore(&rcu_fwd_lock, flags);
break;
}
rcu_fwd_cb_head = rfcp->rfc_next;
if (!rcu_fwd_cb_head)
rcu_fwd_cb_tail = &rcu_fwd_cb_head;
spin_unlock_irqrestore(&rcu_fwd_lock, flags);
kfree(rfcp);
freed++;
rcu_torture_fwd_prog_cond_resched();
}
spin_unlock_irqrestore(&rcu_fwd_lock, flags);
return freed;
}
@ -1707,6 +1771,8 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
}
/* Tight loop containing cond_resched(). */
WRITE_ONCE(rcu_fwd_cb_nodelay, true);
cur_ops->sync(); /* Later readers see above write. */
if (selfpropcb) {
WRITE_ONCE(fcs.stop, 0);
cur_ops->call(&fcs.rh, rcu_torture_fwd_prog_cb);
@ -1724,7 +1790,7 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
udelay(10);
cur_ops->readunlock(idx);
if (!fwd_progress_need_resched || need_resched())
cond_resched();
rcu_torture_fwd_prog_cond_resched();
}
(*tested_tries)++;
if (!time_before(jiffies, stopat) &&
@ -1745,6 +1811,8 @@ static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries)
WARN_ON(READ_ONCE(fcs.stop) != 2);
destroy_rcu_head_on_stack(&fcs.rh);
}
schedule_timeout_uninterruptible(HZ / 10); /* Let kthreads recover. */
WRITE_ONCE(rcu_fwd_cb_nodelay, false);
}
/* Carry out call_rcu() forward-progress testing. */
@ -1765,6 +1833,8 @@ static void rcu_torture_fwd_prog_cr(void)
if (READ_ONCE(rcu_fwd_emergency_stop))
return; /* Get out of the way quickly, no GP wait! */
if (!cur_ops->call)
return; /* Can't do call_rcu() fwd prog without ->call. */
/* Loop continuously posting RCU callbacks. */
WRITE_ONCE(rcu_fwd_cb_nodelay, true);
@ -1805,7 +1875,7 @@ static void rcu_torture_fwd_prog_cr(void)
rfcp->rfc_gps = 0;
}
cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr);
cond_resched();
rcu_torture_fwd_prog_cond_resched();
}
stoppedat = jiffies;
n_launders_cb_snap = READ_ONCE(n_launders_cb);
@ -1814,7 +1884,6 @@ static void rcu_torture_fwd_prog_cr(void)
cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */
(void)rcu_torture_fwd_prog_cbfree();
WRITE_ONCE(rcu_fwd_cb_nodelay, false);
if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop)) {
WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED);
pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n",
@ -1825,6 +1894,8 @@ static void rcu_torture_fwd_prog_cr(void)
n_max_gps, n_max_cbs, cver, gps);
rcu_torture_fwd_cb_hist();
}
schedule_timeout_uninterruptible(HZ); /* Let CBs drain. */
WRITE_ONCE(rcu_fwd_cb_nodelay, false);
}
@ -2240,7 +2311,7 @@ rcu_torture_init(void)
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] = {
&rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
&busted_srcud_ops, &tasks_ops,
&busted_srcud_ops, &tasks_ops, &trivial_ops,
};
if (!torture_init_begin(torture_type, verbose))
@ -2363,7 +2434,10 @@ rcu_torture_init(void)
if (stutter < 0)
stutter = 0;
if (stutter) {
firsterr = torture_stutter_init(stutter * HZ);
int t;
t = cur_ops->stall_dur ? cur_ops->stall_dur() : stutter * HZ;
firsterr = torture_stutter_init(stutter * HZ, t);
if (firsterr)
goto unwind;
}

View File

@ -831,8 +831,8 @@ static void srcu_leak_callback(struct rcu_head *rhp)
* srcu_read_lock(), and srcu_read_unlock() that are all passed the same
* srcu_struct structure.
*/
void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
rcu_callback_t func, bool do_norm)
static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp,
rcu_callback_t func, bool do_norm)
{
unsigned long flags;
int idx;
@ -1310,3 +1310,68 @@ void __init srcu_init(void)
queue_work(rcu_gp_wq, &ssp->work.work);
}
}
#ifdef CONFIG_MODULES
/* Initialize any global-scope srcu_struct structures used by this module. */
static int srcu_module_coming(struct module *mod)
{
int i;
struct srcu_struct **sspp = mod->srcu_struct_ptrs;
int ret;
for (i = 0; i < mod->num_srcu_structs; i++) {
ret = init_srcu_struct(*(sspp++));
if (WARN_ON_ONCE(ret))
return ret;
}
return 0;
}
/* Clean up any global-scope srcu_struct structures used by this module. */
static void srcu_module_going(struct module *mod)
{
int i;
struct srcu_struct **sspp = mod->srcu_struct_ptrs;
for (i = 0; i < mod->num_srcu_structs; i++)
cleanup_srcu_struct(*(sspp++));
}
/* Handle one module, either coming or going. */
static int srcu_module_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct module *mod = data;
int ret = 0;
switch (val) {
case MODULE_STATE_COMING:
ret = srcu_module_coming(mod);
break;
case MODULE_STATE_GOING:
srcu_module_going(mod);
break;
default:
break;
}
return ret;
}
static struct notifier_block srcu_module_nb = {
.notifier_call = srcu_module_notify,
.priority = 0,
};
static __init int init_srcu_module_notifier(void)
{
int ret;
ret = register_module_notifier(&srcu_module_nb);
if (ret)
pr_warn("Failed to register srcu module notifier\n");
return ret;
}
late_initcall(init_srcu_module_notifier);
#endif /* #ifdef CONFIG_MODULES */

View File

@ -10,65 +10,18 @@
#include <linux/rcu_sync.h>
#include <linux/sched.h>
#ifdef CONFIG_PROVE_RCU
#define __INIT_HELD(func) .held = func,
#else
#define __INIT_HELD(func)
#endif
static const struct {
void (*sync)(void);
void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
void (*wait)(void);
#ifdef CONFIG_PROVE_RCU
int (*held)(void);
#endif
} gp_ops[] = {
[RCU_SYNC] = {
.sync = synchronize_rcu,
.call = call_rcu,
.wait = rcu_barrier,
__INIT_HELD(rcu_read_lock_held)
},
[RCU_SCHED_SYNC] = {
.sync = synchronize_rcu,
.call = call_rcu,
.wait = rcu_barrier,
__INIT_HELD(rcu_read_lock_sched_held)
},
[RCU_BH_SYNC] = {
.sync = synchronize_rcu,
.call = call_rcu,
.wait = rcu_barrier,
__INIT_HELD(rcu_read_lock_bh_held)
},
};
enum { GP_IDLE = 0, GP_PENDING, GP_PASSED };
enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY };
enum { GP_IDLE = 0, GP_ENTER, GP_PASSED, GP_EXIT, GP_REPLAY };
#define rss_lock gp_wait.lock
#ifdef CONFIG_PROVE_RCU
void rcu_sync_lockdep_assert(struct rcu_sync *rsp)
{
RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
"suspicious rcu_sync_is_idle() usage");
}
EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert);
#endif
/**
* rcu_sync_init() - Initialize an rcu_sync structure
* @rsp: Pointer to rcu_sync structure to be initialized
* @type: Flavor of RCU with which to synchronize rcu_sync structure
*/
void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type)
void rcu_sync_init(struct rcu_sync *rsp)
{
memset(rsp, 0, sizeof(*rsp));
init_waitqueue_head(&rsp->gp_wait);
rsp->gp_type = type;
}
/**
@ -86,6 +39,70 @@ void rcu_sync_enter_start(struct rcu_sync *rsp)
rsp->gp_state = GP_PASSED;
}
static void rcu_sync_func(struct rcu_head *rhp);
static void rcu_sync_call(struct rcu_sync *rsp)
{
call_rcu(&rsp->cb_head, rcu_sync_func);
}
/**
* rcu_sync_func() - Callback function managing reader access to fastpath
* @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization
*
* This function is passed to call_rcu() function by rcu_sync_enter() and
* rcu_sync_exit(), so that it is invoked after a grace period following the
* that invocation of enter/exit.
*
* If it is called by rcu_sync_enter() it signals that all the readers were
* switched onto slow path.
*
* If it is called by rcu_sync_exit() it takes action based on events that
* have taken place in the meantime, so that closely spaced rcu_sync_enter()
* and rcu_sync_exit() pairs need not wait for a grace period.
*
* If another rcu_sync_enter() is invoked before the grace period
* ended, reset state to allow the next rcu_sync_exit() to let the
* readers back onto their fastpaths (after a grace period). If both
* another rcu_sync_enter() and its matching rcu_sync_exit() are invoked
* before the grace period ended, re-invoke call_rcu() on behalf of that
* rcu_sync_exit(). Otherwise, set all state back to idle so that readers
* can again use their fastpaths.
*/
static void rcu_sync_func(struct rcu_head *rhp)
{
struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head);
unsigned long flags;
WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE);
WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED);
spin_lock_irqsave(&rsp->rss_lock, flags);
if (rsp->gp_count) {
/*
* We're at least a GP after the GP_IDLE->GP_ENTER transition.
*/
WRITE_ONCE(rsp->gp_state, GP_PASSED);
wake_up_locked(&rsp->gp_wait);
} else if (rsp->gp_state == GP_REPLAY) {
/*
* A new rcu_sync_exit() has happened; requeue the callback to
* catch a later GP.
*/
WRITE_ONCE(rsp->gp_state, GP_EXIT);
rcu_sync_call(rsp);
} else {
/*
* We're at least a GP after the last rcu_sync_exit(); eveybody
* will now have observed the write side critical section.
* Let 'em rip!.
*/
WRITE_ONCE(rsp->gp_state, GP_IDLE);
}
spin_unlock_irqrestore(&rsp->rss_lock, flags);
}
/**
* rcu_sync_enter() - Force readers onto slowpath
* @rsp: Pointer to rcu_sync structure to use for synchronization
@ -103,84 +120,43 @@ void rcu_sync_enter_start(struct rcu_sync *rsp)
*/
void rcu_sync_enter(struct rcu_sync *rsp)
{
bool need_wait, need_sync;
int gp_state;
spin_lock_irq(&rsp->rss_lock);
need_wait = rsp->gp_count++;
need_sync = rsp->gp_state == GP_IDLE;
if (need_sync)
rsp->gp_state = GP_PENDING;
gp_state = rsp->gp_state;
if (gp_state == GP_IDLE) {
WRITE_ONCE(rsp->gp_state, GP_ENTER);
WARN_ON_ONCE(rsp->gp_count);
/*
* Note that we could simply do rcu_sync_call(rsp) here and
* avoid the "if (gp_state == GP_IDLE)" block below.
*
* However, synchronize_rcu() can be faster if rcu_expedited
* or rcu_blocking_is_gp() is true.
*
* Another reason is that we can't wait for rcu callback if
* we are called at early boot time but this shouldn't happen.
*/
}
rsp->gp_count++;
spin_unlock_irq(&rsp->rss_lock);
WARN_ON_ONCE(need_wait && need_sync);
if (need_sync) {
gp_ops[rsp->gp_type].sync();
rsp->gp_state = GP_PASSED;
wake_up_all(&rsp->gp_wait);
} else if (need_wait) {
wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED);
} else {
if (gp_state == GP_IDLE) {
/*
* Possible when there's a pending CB from a rcu_sync_exit().
* Nobody has yet been allowed the 'fast' path and thus we can
* avoid doing any sync(). The callback will get 'dropped'.
* See the comment above, this simply does the "synchronous"
* call_rcu(rcu_sync_func) which does GP_ENTER -> GP_PASSED.
*/
WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
synchronize_rcu();
rcu_sync_func(&rsp->cb_head);
/* Not really needed, wait_event() would see GP_PASSED. */
return;
}
wait_event(rsp->gp_wait, READ_ONCE(rsp->gp_state) >= GP_PASSED);
}
/**
* rcu_sync_func() - Callback function managing reader access to fastpath
* @rhp: Pointer to rcu_head in rcu_sync structure to use for synchronization
*
* This function is passed to one of the call_rcu() functions by
* rcu_sync_exit(), so that it is invoked after a grace period following the
* that invocation of rcu_sync_exit(). It takes action based on events that
* have taken place in the meantime, so that closely spaced rcu_sync_enter()
* and rcu_sync_exit() pairs need not wait for a grace period.
*
* If another rcu_sync_enter() is invoked before the grace period
* ended, reset state to allow the next rcu_sync_exit() to let the
* readers back onto their fastpaths (after a grace period). If both
* another rcu_sync_enter() and its matching rcu_sync_exit() are invoked
* before the grace period ended, re-invoke call_rcu() on behalf of that
* rcu_sync_exit(). Otherwise, set all state back to idle so that readers
* can again use their fastpaths.
*/
static void rcu_sync_func(struct rcu_head *rhp)
{
struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head);
unsigned long flags;
WARN_ON_ONCE(rsp->gp_state != GP_PASSED);
WARN_ON_ONCE(rsp->cb_state == CB_IDLE);
spin_lock_irqsave(&rsp->rss_lock, flags);
if (rsp->gp_count) {
/*
* A new rcu_sync_begin() has happened; drop the callback.
*/
rsp->cb_state = CB_IDLE;
} else if (rsp->cb_state == CB_REPLAY) {
/*
* A new rcu_sync_exit() has happened; requeue the callback
* to catch a later GP.
*/
rsp->cb_state = CB_PENDING;
gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
} else {
/*
* We're at least a GP after rcu_sync_exit(); eveybody will now
* have observed the write side critical section. Let 'em rip!.
*/
rsp->cb_state = CB_IDLE;
rsp->gp_state = GP_IDLE;
}
spin_unlock_irqrestore(&rsp->rss_lock, flags);
}
/**
* rcu_sync_exit() - Allow readers back onto fast patch after grace period
* rcu_sync_exit() - Allow readers back onto fast path after grace period
* @rsp: Pointer to rcu_sync structure to use for synchronization
*
* This function is used by updaters who have completed, and can therefore
@ -191,13 +167,16 @@ static void rcu_sync_func(struct rcu_head *rhp)
*/
void rcu_sync_exit(struct rcu_sync *rsp)
{
WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE);
WARN_ON_ONCE(READ_ONCE(rsp->gp_count) == 0);
spin_lock_irq(&rsp->rss_lock);
if (!--rsp->gp_count) {
if (rsp->cb_state == CB_IDLE) {
rsp->cb_state = CB_PENDING;
gp_ops[rsp->gp_type].call(&rsp->cb_head, rcu_sync_func);
} else if (rsp->cb_state == CB_PENDING) {
rsp->cb_state = CB_REPLAY;
if (rsp->gp_state == GP_PASSED) {
WRITE_ONCE(rsp->gp_state, GP_EXIT);
rcu_sync_call(rsp);
} else if (rsp->gp_state == GP_EXIT) {
WRITE_ONCE(rsp->gp_state, GP_REPLAY);
}
}
spin_unlock_irq(&rsp->rss_lock);
@ -209,18 +188,19 @@ void rcu_sync_exit(struct rcu_sync *rsp)
*/
void rcu_sync_dtor(struct rcu_sync *rsp)
{
int cb_state;
int gp_state;
WARN_ON_ONCE(rsp->gp_count);
WARN_ON_ONCE(READ_ONCE(rsp->gp_count));
WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED);
spin_lock_irq(&rsp->rss_lock);
if (rsp->cb_state == CB_REPLAY)
rsp->cb_state = CB_PENDING;
cb_state = rsp->cb_state;
if (rsp->gp_state == GP_REPLAY)
WRITE_ONCE(rsp->gp_state, GP_EXIT);
gp_state = rsp->gp_state;
spin_unlock_irq(&rsp->rss_lock);
if (cb_state != CB_IDLE) {
gp_ops[rsp->gp_type].wait();
WARN_ON_ONCE(rsp->cb_state != CB_IDLE);
if (gp_state != GP_IDLE) {
rcu_barrier();
WARN_ON_ONCE(rsp->gp_state != GP_IDLE);
}
}

View File

@ -51,6 +51,12 @@
#include <linux/tick.h>
#include <linux/sysrq.h>
#include <linux/kprobes.h>
#include <linux/gfp.h>
#include <linux/oom.h>
#include <linux/smpboot.h>
#include <linux/jiffies.h>
#include <linux/sched/isolation.h>
#include "../time/tick-internal.h"
#include "tree.h"
#include "rcu.h"
@ -92,6 +98,9 @@ struct rcu_state rcu_state = {
/* Dump rcu_node combining tree at boot to verify correct setup. */
static bool dump_tree;
module_param(dump_tree, bool, 0444);
/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
static bool use_softirq = 1;
module_param(use_softirq, bool, 0444);
/* Control rcu_node-tree auto-balancing at boot time. */
static bool rcu_fanout_exact;
module_param(rcu_fanout_exact, bool, 0444);
@ -138,7 +147,6 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
static void invoke_rcu_core(void);
static void invoke_rcu_callbacks(struct rcu_data *rdp);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
static void sync_sched_exp_online_cleanup(int cpu);
@ -368,19 +376,33 @@ static void __maybe_unused rcu_momentary_dyntick_idle(void)
}
/**
* rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
* rcu_is_cpu_rrupt_from_idle - see if interrupted from idle
*
* If the current CPU is idle or running at a first-level (not nested)
* If the current CPU is idle and running at a first-level (not nested)
* interrupt from idle, return true. The caller must have at least
* disabled preemption.
*/
static int rcu_is_cpu_rrupt_from_idle(void)
{
return __this_cpu_read(rcu_data.dynticks_nesting) <= 0 &&
__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 1;
/* Called only from within the scheduling-clock interrupt */
lockdep_assert_in_irq();
/* Check for counter underflows */
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nesting) < 0,
"RCU dynticks_nesting counter underflow!");
RCU_LOCKDEP_WARN(__this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 0,
"RCU dynticks_nmi_nesting counter underflow/zero!");
/* Are we at first interrupt nesting level? */
if (__this_cpu_read(rcu_data.dynticks_nmi_nesting) != 1)
return false;
/* Does CPU appear to be idle from an RCU standpoint? */
return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
}
#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch. */
#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */
#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */
static long blimit = DEFAULT_RCU_BLIMIT;
#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
static long qhimark = DEFAULT_RCU_QHIMARK;
@ -2113,7 +2135,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
/* Reinstate batch limit if we have worked down the excess. */
count = rcu_segcblist_n_cbs(&rdp->cblist);
if (rdp->blimit == LONG_MAX && count <= qlowmark)
if (rdp->blimit >= DEFAULT_MAX_RCU_BLIMIT && count <= qlowmark)
rdp->blimit = blimit;
/* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
@ -2253,7 +2275,7 @@ void rcu_force_quiescent_state(void)
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
/* Perform RCU core processing work for the current CPU. */
static __latent_entropy void rcu_core(struct softirq_action *unused)
static __latent_entropy void rcu_core(void)
{
unsigned long flags;
struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
@ -2287,38 +2309,127 @@ static __latent_entropy void rcu_core(struct softirq_action *unused)
rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
/* If there are callbacks ready, invoke them. */
if (rcu_segcblist_ready_cbs(&rdp->cblist))
invoke_rcu_callbacks(rdp);
if (rcu_segcblist_ready_cbs(&rdp->cblist) &&
likely(READ_ONCE(rcu_scheduler_fully_active)))
rcu_do_batch(rdp);
/* Do any needed deferred wakeups of rcuo kthreads. */
do_nocb_deferred_wakeup(rdp);
trace_rcu_utilization(TPS("End RCU core"));
}
/*
* Schedule RCU callback invocation. If the running implementation of RCU
* does not support RCU priority boosting, just do a direct call, otherwise
* wake up the per-CPU kernel kthread. Note that because we are running
* on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
* cannot disappear out from under us.
*/
static void invoke_rcu_callbacks(struct rcu_data *rdp)
static void rcu_core_si(struct softirq_action *h)
{
if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
return;
if (likely(!rcu_state.boost)) {
rcu_do_batch(rdp);
return;
}
invoke_rcu_callbacks_kthread();
rcu_core();
}
static void rcu_wake_cond(struct task_struct *t, int status)
{
/*
* If the thread is yielding, only wake it when this
* is invoked from idle
*/
if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
wake_up_process(t);
}
static void invoke_rcu_core_kthread(void)
{
struct task_struct *t;
unsigned long flags;
local_irq_save(flags);
__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
t = __this_cpu_read(rcu_data.rcu_cpu_kthread_task);
if (t != NULL && t != current)
rcu_wake_cond(t, __this_cpu_read(rcu_data.rcu_cpu_kthread_status));
local_irq_restore(flags);
}
/*
* Wake up this CPU's rcuc kthread to do RCU core processing.
*/
static void invoke_rcu_core(void)
{
if (cpu_online(smp_processor_id()))
if (!cpu_online(smp_processor_id()))
return;
if (use_softirq)
raise_softirq(RCU_SOFTIRQ);
else
invoke_rcu_core_kthread();
}
static void rcu_cpu_kthread_park(unsigned int cpu)
{
per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
}
static int rcu_cpu_kthread_should_run(unsigned int cpu)
{
return __this_cpu_read(rcu_data.rcu_cpu_has_work);
}
/*
* Per-CPU kernel thread that invokes RCU callbacks. This replaces
* the RCU softirq used in configurations of RCU that do not support RCU
* priority boosting.
*/
static void rcu_cpu_kthread(unsigned int cpu)
{
unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
int spincnt;
for (spincnt = 0; spincnt < 10; spincnt++) {
trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
local_bh_disable();
*statusp = RCU_KTHREAD_RUNNING;
local_irq_disable();
work = *workp;
*workp = 0;
local_irq_enable();
if (work)
rcu_core();
local_bh_enable();
if (*workp == 0) {
trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
*statusp = RCU_KTHREAD_WAITING;
return;
}
}
*statusp = RCU_KTHREAD_YIELDING;
trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
schedule_timeout_interruptible(2);
trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
*statusp = RCU_KTHREAD_WAITING;
}
static struct smp_hotplug_thread rcu_cpu_thread_spec = {
.store = &rcu_data.rcu_cpu_kthread_task,
.thread_should_run = rcu_cpu_kthread_should_run,
.thread_fn = rcu_cpu_kthread,
.thread_comm = "rcuc/%u",
.setup = rcu_cpu_kthread_setup,
.park = rcu_cpu_kthread_park,
};
/*
* Spawn per-CPU RCU core processing kthreads.
*/
static int __init rcu_spawn_core_kthreads(void)
{
int cpu;
for_each_possible_cpu(cpu)
per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
if (!IS_ENABLED(CONFIG_RCU_BOOST) && use_softirq)
return 0;
WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec),
"%s: Could not start rcuc kthread, OOM is now expected behavior\n", __func__);
return 0;
}
early_initcall(rcu_spawn_core_kthreads);
/*
* Handle any core-RCU processing required by a call_rcu() invocation.
*/
@ -2354,7 +2465,7 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
rcu_accelerate_cbs_unlocked(rdp->mynode, rdp);
} else {
/* Give the grace period a kick. */
rdp->blimit = LONG_MAX;
rdp->blimit = DEFAULT_MAX_RCU_BLIMIT;
if (rcu_state.n_force_qs == rdp->n_force_qs_snap &&
rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
rcu_force_quiescent_state();
@ -3355,7 +3466,8 @@ void __init rcu_init(void)
rcu_init_one();
if (dump_tree)
rcu_dump_rcu_node_tree();
open_softirq(RCU_SOFTIRQ, rcu_core);
if (use_softirq)
open_softirq(RCU_SOFTIRQ, rcu_core_si);
/*
* We don't need protection against CPU-hotplug here because

View File

@ -154,13 +154,15 @@ struct rcu_data {
bool core_needs_qs; /* Core waits for quiesc state. */
bool beenonline; /* CPU online at least once. */
bool gpwrap; /* Possible ->gp_seq wrap. */
bool deferred_qs; /* This CPU awaiting a deferred QS? */
bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
unsigned long ticks_this_gp; /* The number of scheduling-clock */
/* ticks this CPU has handled */
/* during and after the last grace */
/* period it is aware of. */
struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
bool defer_qs_iw_pending; /* Scheduler attention pending? */
/* 2) batch handling */
struct rcu_segcblist cblist; /* Segmented callback list, with */
@ -407,8 +409,8 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void);
static void rcu_cpu_kthread_setup(unsigned int cpu);
static void __init rcu_spawn_boost_kthreads(void);
static void rcu_prepare_kthreads(int cpu);
static void rcu_cleanup_after_idle(void);

View File

@ -250,7 +250,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
*/
static void rcu_report_exp_rdp(struct rcu_data *rdp)
{
WRITE_ONCE(rdp->deferred_qs, false);
WRITE_ONCE(rdp->exp_deferred_qs, false);
rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true);
}
@ -259,8 +259,7 @@ static bool sync_exp_work_done(unsigned long s)
{
if (rcu_exp_gp_seq_done(s)) {
trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done"));
/* Ensure test happens before caller kfree(). */
smp_mb__before_atomic(); /* ^^^ */
smp_mb(); /* Ensure test happens before caller kfree(). */
return true;
}
return false;
@ -384,7 +383,12 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
mask_ofl_test |= mask;
continue;
}
if (get_cpu() == cpu) {
put_cpu();
continue;
}
ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
put_cpu();
if (!ret) {
mask_ofl_ipi &= ~mask;
continue;
@ -611,7 +615,7 @@ static void rcu_exp_handler(void *unused)
rcu_dynticks_curr_cpu_in_eqs()) {
rcu_report_exp_rdp(rdp);
} else {
rdp->deferred_qs = true;
rdp->exp_deferred_qs = true;
set_tsk_need_resched(t);
set_preempt_need_resched();
}
@ -633,7 +637,7 @@ static void rcu_exp_handler(void *unused)
if (t->rcu_read_lock_nesting > 0) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmask & rdp->grpmask) {
rdp->deferred_qs = true;
rdp->exp_deferred_qs = true;
t->rcu_read_unlock_special.b.exp_hint = true;
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@ -656,7 +660,7 @@ static void rcu_exp_handler(void *unused)
*
* Otherwise, force a context switch after the CPU enables everything.
*/
rdp->deferred_qs = true;
rdp->exp_deferred_qs = true;
if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs())) {
rcu_preempt_deferred_qs(t);
@ -694,6 +698,16 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
#else /* #ifdef CONFIG_PREEMPT_RCU */
/* Request an expedited quiescent state. */
static void rcu_exp_need_qs(void)
{
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
/* Store .exp before .rcu_urgent_qs. */
smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
set_tsk_need_resched(current);
set_preempt_need_resched();
}
/* Invoked on each online non-idle CPU for expedited quiescent state. */
static void rcu_exp_handler(void *unused)
{
@ -709,25 +723,38 @@ static void rcu_exp_handler(void *unused)
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
return;
}
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
/* Store .exp before .rcu_urgent_qs. */
smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
set_tsk_need_resched(current);
set_preempt_need_resched();
rcu_exp_need_qs();
}
/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
static void sync_sched_exp_online_cleanup(int cpu)
{
unsigned long flags;
int my_cpu;
struct rcu_data *rdp;
int ret;
struct rcu_node *rnp;
rdp = per_cpu_ptr(&rcu_data, cpu);
rnp = rdp->mynode;
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
my_cpu = get_cpu();
/* Quiescent state either not needed or already requested, leave. */
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
__this_cpu_read(rcu_data.cpu_no_qs.b.exp)) {
put_cpu();
return;
}
/* Quiescent state needed on current CPU, so set it up locally. */
if (my_cpu == cpu) {
local_irq_save(flags);
rcu_exp_need_qs();
local_irq_restore(flags);
put_cpu();
return;
}
/* Quiescent state needed on some other CPU, send IPI. */
ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
put_cpu();
WARN_ON_ONCE(ret);
}
@ -765,7 +792,6 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
*/
void synchronize_rcu_expedited(void)
{
struct rcu_data *rdp;
struct rcu_exp_work rew;
struct rcu_node *rnp;
unsigned long s;
@ -802,7 +828,6 @@ void synchronize_rcu_expedited(void)
}
/* Wait for expedited grace period to complete. */
rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
rnp = rcu_get_root();
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(s));

View File

@ -11,29 +11,7 @@
* Paul E. McKenney <paulmck@linux.ibm.com>
*/
#include <linux/delay.h>
#include <linux/gfp.h>
#include <linux/oom.h>
#include <linux/sched/debug.h>
#include <linux/smpboot.h>
#include <linux/sched/isolation.h>
#include <uapi/linux/sched/types.h>
#include "../time/tick-internal.h"
#ifdef CONFIG_RCU_BOOST
#include "../locking/rtmutex_common.h"
#else /* #ifdef CONFIG_RCU_BOOST */
/*
* Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST,
* all uses are in dead code. Provide a definition to keep the compiler
* happy, but add WARN_ON_ONCE() to complain if used in the wrong place.
* This probably needs to be excluded from -rt builds.
*/
#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
#endif /* #else #ifdef CONFIG_RCU_BOOST */
#ifdef CONFIG_RCU_NOCB_CPU
static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
@ -94,6 +72,8 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
if (gp_cleanup_delay)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
if (!use_softirq)
pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
pr_info("\tRCU debug extended QS entry/exit.\n");
rcupdate_announce_bootup_oddness();
@ -257,10 +237,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
* no need to check for a subsequent expedited GP. (Though we are
* still in a quiescent state in any case.)
*/
if (blkd_state & RCU_EXP_BLKD && rdp->deferred_qs)
if (blkd_state & RCU_EXP_BLKD && rdp->exp_deferred_qs)
rcu_report_exp_rdp(rdp);
else
WARN_ON_ONCE(rdp->deferred_qs);
WARN_ON_ONCE(rdp->exp_deferred_qs);
}
/*
@ -357,7 +337,7 @@ void rcu_note_context_switch(bool preempt)
* means that we continue to block the current grace period.
*/
rcu_qs();
if (rdp->deferred_qs)
if (rdp->exp_deferred_qs)
rcu_report_exp_rdp(rdp);
trace_rcu_utilization(TPS("End context switch"));
barrier(); /* Avoid RCU read-side critical sections leaking up. */
@ -471,14 +451,15 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/
special = t->rcu_read_unlock_special;
rdp = this_cpu_ptr(&rcu_data);
if (!special.s && !rdp->deferred_qs) {
if (!special.s && !rdp->exp_deferred_qs) {
local_irq_restore(flags);
return;
}
t->rcu_read_unlock_special.b.deferred_qs = false;
if (special.b.need_qs) {
rcu_qs();
t->rcu_read_unlock_special.b.need_qs = false;
if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
if (!t->rcu_read_unlock_special.s && !rdp->exp_deferred_qs) {
local_irq_restore(flags);
return;
}
@ -490,7 +471,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
* tasks are handled when removing the task from the
* blocked-tasks list below.
*/
if (rdp->deferred_qs) {
if (rdp->exp_deferred_qs) {
rcu_report_exp_rdp(rdp);
if (!t->rcu_read_unlock_special.s) {
local_irq_restore(flags);
@ -579,7 +560,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/
static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return (__this_cpu_read(rcu_data.deferred_qs) ||
return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
t->rcu_read_lock_nesting <= 0;
}
@ -606,6 +587,17 @@ static void rcu_preempt_deferred_qs(struct task_struct *t)
t->rcu_read_lock_nesting += RCU_NEST_BIAS;
}
/*
* Minimal handler to give the scheduler a chance to re-evaluate.
*/
static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
{
struct rcu_data *rdp;
rdp = container_of(iwp, struct rcu_data, defer_qs_iw);
rdp->defer_qs_iw_pending = false;
}
/*
* Handle special cases during rcu_read_unlock(), such as needing to
* notify RCU core processing or task having blocked during the RCU
@ -625,16 +617,41 @@ static void rcu_read_unlock_special(struct task_struct *t)
local_irq_save(flags);
irqs_were_disabled = irqs_disabled_flags(flags);
if (preempt_bh_were_disabled || irqs_were_disabled) {
WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false);
/* Need to defer quiescent state until everything is enabled. */
if (irqs_were_disabled) {
/* Enabling irqs does not reschedule, so... */
bool exp;
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode;
t->rcu_read_unlock_special.b.exp_hint = false;
exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) ||
(rdp->grpmask & rnp->expmask) ||
tick_nohz_full_cpu(rdp->cpu);
// Need to defer quiescent state until everything is enabled.
if ((exp || in_irq()) && irqs_were_disabled && use_softirq &&
(in_irq() || !t->rcu_read_unlock_special.b.deferred_qs)) {
// Using softirq, safe to awaken, and we get
// no help from enabling irqs, unlike bh/preempt.
raise_softirq_irqoff(RCU_SOFTIRQ);
} else if (exp && irqs_were_disabled && !use_softirq &&
!t->rcu_read_unlock_special.b.deferred_qs) {
// Safe to awaken and we get no help from enabling
// irqs, unlike bh/preempt.
invoke_rcu_core();
} else {
/* Enabling BH or preempt does reschedule, so... */
// Enabling BH or preempt does reschedule, so...
// Also if no expediting or NO_HZ_FULL, slow is OK.
set_tsk_need_resched(current);
set_preempt_need_resched();
if (IS_ENABLED(CONFIG_IRQ_WORK) &&
!rdp->defer_qs_iw_pending && exp) {
// Get scheduler to re-evaluate and call hooks.
// If !IRQ_WORK, FQS scan will eventually IPI.
init_irq_work(&rdp->defer_qs_iw,
rcu_preempt_deferred_qs_handler);
rdp->defer_qs_iw_pending = true;
irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
}
}
t->rcu_read_unlock_special.b.deferred_qs = true;
local_irq_restore(flags);
return;
}
@ -760,7 +777,7 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
i = 0;
list_for_each(lhp, &rnp->blkd_tasks) {
pr_cont(" %p", lhp);
if (++i >= 10)
if (++i >= ncheck)
break;
}
pr_cont("\n");
@ -944,18 +961,21 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOST
static void rcu_wake_cond(struct task_struct *t, int status)
/*
* If boosting, set rcuc kthreads to realtime priority.
*/
static void rcu_cpu_kthread_setup(unsigned int cpu)
{
/*
* If the thread is yielding, only wake it when this
* is invoked from idle
*/
if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
wake_up_process(t);
#ifdef CONFIG_RCU_BOOST
struct sched_param sp;
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
#endif /* #ifdef CONFIG_RCU_BOOST */
}
#ifdef CONFIG_RCU_BOOST
/*
* Carry out RCU priority boosting on the task indicated by ->exp_tasks
* or ->boost_tasks, advancing the pointer to the next task in the
@ -1090,23 +1110,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
}
}
/*
* Wake up the per-CPU kthread to invoke RCU callbacks.
*/
static void invoke_rcu_callbacks_kthread(void)
{
unsigned long flags;
local_irq_save(flags);
__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
__this_cpu_read(rcu_data.rcu_cpu_kthread_status));
}
local_irq_restore(flags);
}
/*
* Is the current CPU running the RCU-callbacks kthread?
* Caller must have preemption disabled.
@ -1160,59 +1163,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
return 0;
}
static void rcu_cpu_kthread_setup(unsigned int cpu)
{
struct sched_param sp;
sp.sched_priority = kthread_prio;
sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
}
static void rcu_cpu_kthread_park(unsigned int cpu)
{
per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
}
static int rcu_cpu_kthread_should_run(unsigned int cpu)
{
return __this_cpu_read(rcu_data.rcu_cpu_has_work);
}
/*
* Per-CPU kernel thread that invokes RCU callbacks. This replaces
* the RCU softirq used in configurations of RCU that do not support RCU
* priority boosting.
*/
static void rcu_cpu_kthread(unsigned int cpu)
{
unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
int spincnt;
for (spincnt = 0; spincnt < 10; spincnt++) {
trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
local_bh_disable();
*statusp = RCU_KTHREAD_RUNNING;
local_irq_disable();
work = *workp;
*workp = 0;
local_irq_enable();
if (work)
rcu_do_batch(this_cpu_ptr(&rcu_data));
local_bh_enable();
if (*workp == 0) {
trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
*statusp = RCU_KTHREAD_WAITING;
return;
}
}
*statusp = RCU_KTHREAD_YIELDING;
trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
schedule_timeout_interruptible(2);
trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
*statusp = RCU_KTHREAD_WAITING;
}
/*
* Set the per-rcu_node kthread's affinity to cover all CPUs that are
* served by the rcu_node in question. The CPU hotplug lock is still
@ -1243,27 +1193,13 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
free_cpumask_var(cm);
}
static struct smp_hotplug_thread rcu_cpu_thread_spec = {
.store = &rcu_data.rcu_cpu_kthread_task,
.thread_should_run = rcu_cpu_kthread_should_run,
.thread_fn = rcu_cpu_kthread,
.thread_comm = "rcuc/%u",
.setup = rcu_cpu_kthread_setup,
.park = rcu_cpu_kthread_park,
};
/*
* Spawn boost kthreads -- called as soon as the scheduler is running.
*/
static void __init rcu_spawn_boost_kthreads(void)
{
struct rcu_node *rnp;
int cpu;
for_each_possible_cpu(cpu)
per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
return;
rcu_for_each_leaf_node(rnp)
(void)rcu_spawn_one_boost_kthread(rnp);
}
@ -1286,11 +1222,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
static void invoke_rcu_callbacks_kthread(void)
{
WARN_ON_ONCE(1);
}
static bool rcu_is_callbacks_kthread(void)
{
return false;

View File

@ -630,7 +630,9 @@ static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp,
time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
time_before(j, rcu_state.gp_activity + gpssdelay) ||
atomic_xchg(&warned, 1)) {
raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
if (rnp_root != rnp)
/* irqs remain disabled. */
raw_spin_unlock_rcu_node(rnp_root);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}

View File

@ -423,6 +423,19 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read);
do { } while (0)
#endif
#if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST)
/* Get rcutorture access to sched_setaffinity(). */
long rcutorture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
{
int ret;
ret = sched_setaffinity(pid, in_mask);
WARN_ONCE(ret, "%s: sched_setaffinity() returned %d\n", __func__, ret);
return ret;
}
EXPORT_SYMBOL_GPL(rcutorture_sched_setaffinity);
#endif
#ifdef CONFIG_RCU_STALL_COMMON
int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);

View File

@ -570,6 +570,7 @@ static void torture_shutdown_cleanup(void)
static struct task_struct *stutter_task;
static int stutter_pause_test;
static int stutter;
static int stutter_gap;
/*
* Block until the stutter interval ends. This must be called periodically
@ -578,10 +579,12 @@ static int stutter;
bool stutter_wait(const char *title)
{
int spt;
bool ret = false;
cond_resched_tasks_rcu_qs();
spt = READ_ONCE(stutter_pause_test);
for (; spt; spt = READ_ONCE(stutter_pause_test)) {
ret = true;
if (spt == 1) {
schedule_timeout_interruptible(1);
} else if (spt == 2) {
@ -592,7 +595,7 @@ bool stutter_wait(const char *title)
}
torture_shutdown_absorb(title);
}
return !!spt;
return ret;
}
EXPORT_SYMBOL_GPL(stutter_wait);
@ -602,17 +605,24 @@ EXPORT_SYMBOL_GPL(stutter_wait);
*/
static int torture_stutter(void *arg)
{
int wtime;
VERBOSE_TOROUT_STRING("torture_stutter task started");
do {
if (!torture_must_stop() && stutter > 1) {
WRITE_ONCE(stutter_pause_test, 1);
schedule_timeout_interruptible(stutter - 1);
wtime = stutter;
if (stutter > HZ + 1) {
WRITE_ONCE(stutter_pause_test, 1);
wtime = stutter - HZ - 1;
schedule_timeout_interruptible(wtime);
wtime = HZ + 1;
}
WRITE_ONCE(stutter_pause_test, 2);
schedule_timeout_interruptible(1);
schedule_timeout_interruptible(wtime);
}
WRITE_ONCE(stutter_pause_test, 0);
if (!torture_must_stop())
schedule_timeout_interruptible(stutter);
schedule_timeout_interruptible(stutter_gap);
torture_shutdown_absorb("torture_stutter");
} while (!torture_must_stop());
torture_kthread_stopping("torture_stutter");
@ -622,9 +632,10 @@ static int torture_stutter(void *arg)
/*
* Initialize and kick off the torture_stutter kthread.
*/
int torture_stutter_init(const int s)
int torture_stutter_init(const int s, const int sgap)
{
stutter = s;
stutter_gap = sgap;
return torture_create_kthread(torture_stutter, NULL, stutter_task);
}
EXPORT_SYMBOL_GPL(torture_stutter_init);

View File

@ -19,7 +19,7 @@ static inline bool rcu_is_watching(void)
return false;
}
#define rcu_assign_pointer(p, v) ((p) = (v))
#define RCU_INIT_POINTER(p, v) p=(v)
#define rcu_assign_pointer(p, v) do { (p) = (v); } while (0)
#define RCU_INIT_POINTER(p, v) do { (p) = (v); } while (0)
#endif

View File

@ -7,6 +7,6 @@
#define rcu_dereference_raw(p) rcu_dereference(p)
#define rcu_dereference_protected(p, cond) rcu_dereference(p)
#define rcu_dereference_check(p, cond) rcu_dereference(p)
#define RCU_INIT_POINTER(p, v) (p) = (v)
#define RCU_INIT_POINTER(p, v) do { (p) = (v); } while (0)
#endif

View File

@ -0,0 +1,3 @@
# SPDX-License-Identifier: GPL-2.0+
all:
( cd ../../../..; tools/testing/selftests/rcutorture/bin/kvm.sh --duration 10 --configs TREE01 )

View File

@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
# Usage: configinit.sh config-spec-file build-output-dir results-dir
# Usage: configinit.sh config-spec-file results-dir
#
# Create a .config file from the spec file. Run from the kernel source tree.
# Exits with 0 if all went well, with 1 if all went well but the config
@ -11,10 +11,6 @@
# desired settings, for example, "CONFIG_NO_HZ=y". For best results,
# this should be a full pathname.
#
# The second argument is a optional path to a build output directory,
# for example, "O=/tmp/foo". If this argument is omitted, the .config
# file will be generated directly in the current directory.
#
# Copyright (C) IBM Corporation, 2013
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
@ -26,34 +22,23 @@ mkdir $T
# Capture config spec file.
c=$1
buildloc=$2
resdir=$3
builddir=
if echo $buildloc | grep -q '^O='
then
builddir=`echo $buildloc | sed -e 's/^O=//'`
if test ! -d $builddir
then
mkdir $builddir
fi
else
echo Bad build directory: \"$buildloc\"
exit 2
fi
resdir=$2
sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh
sed -e 's/^\(CONFIG[0-9A-Z_]*=\).*$/grep -v \1 |/' < $c >> $T/u.sh
grep '^grep' < $T/u.sh > $T/upd.sh
echo "cat - $c" >> $T/upd.sh
make mrproper
make $buildloc distclean > $resdir/Make.distclean 2>&1
make $buildloc $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
mv $builddir/.config $builddir/.config.sav
sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
cp $builddir/.config $builddir/.config.new
yes '' | make $buildloc oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
if test -z "$TORTURE_TRUST_MAKE"
then
make clean > $resdir/Make.clean 2>&1
fi
make $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
mv .config .config.sav
sh $T/upd.sh < .config.sav > .config
cp .config .config.new
yes '' | make oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
# verify new config matches specification.
configcheck.sh $builddir/.config $c
configcheck.sh .config $c
exit 0

View File

@ -9,6 +9,11 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
if test -n "$TORTURE_ALLOTED_CPUS"
then
echo $TORTURE_ALLOTED_CPUS
exit 0
fi
ncpus=`grep '^processor' /proc/cpuinfo | wc -l`
idlecpus=`mpstat | tail -1 | \
awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`

View File

@ -172,7 +172,7 @@ identify_qemu_append () {
local console=ttyS0
case "$1" in
qemu-system-x86_64|qemu-system-i386)
echo noapic selinux=0 initcall_debug debug
echo selinux=0 initcall_debug debug
;;
qemu-system-aarch64)
console=ttyAMA0
@ -191,8 +191,19 @@ identify_qemu_append () {
# Output arguments for qemu arguments based on the TORTURE_QEMU_MAC
# and TORTURE_QEMU_INTERACTIVE environment variables.
identify_qemu_args () {
local KVM_CPU=""
case "$1" in
qemu-system-x86_64)
KVM_CPU=kvm64
;;
qemu-system-i386)
KVM_CPU=kvm32
;;
esac
case "$1" in
qemu-system-x86_64|qemu-system-i386)
echo -machine q35,accel=kvm
echo -cpu ${KVM_CPU}
;;
qemu-system-aarch64)
echo -machine virt,gic-version=host -cpu host

View File

@ -34,10 +34,15 @@ do
exit 0;
fi
# Set affinity to randomly selected CPU
cpus=`ls /sys/devices/system/cpu/*/online |
sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' |
grep -v '^0*$'`
# Set affinity to randomly selected online CPU
cpus=`grep 1 /sys/devices/system/cpu/*/online |
sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'`
# Do not leave out poor old cpu0 which may not be hot-pluggable
if [ ! -f "/sys/devices/system/cpu/cpu0/online" ]; then
cpus="0 $cpus"
fi
cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
srand(n + me + systime());
ncpus = split(cpus, ca);

View File

@ -3,7 +3,7 @@
#
# Build a kvm-ready Linux kernel from the tree in the current directory.
#
# Usage: kvm-build.sh config-template build-dir resdir
# Usage: kvm-build.sh config-template resdir
#
# Copyright (C) IBM Corporation, 2011
#
@ -15,8 +15,7 @@ then
echo "kvm-build.sh :$config_template: Not a readable file"
exit 1
fi
builddir=${2}
resdir=${3}
resdir=${2}
T=${TMPDIR-/tmp}/test-linux.sh.$$
trap 'rm -rf $T' 0
@ -29,14 +28,14 @@ CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_CONSOLE=y
___EOF___
configinit.sh $T/config O=$builddir $resdir
configinit.sh $T/config $resdir
retval=$?
if test $retval -gt 1
then
exit 2
fi
ncpus=`cpus2use.sh`
make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
make -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
retval=$?
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
then

View File

@ -11,6 +11,7 @@
#
# The "directory" above should end with the date/time directory, for example,
# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
# Returns error status reflecting the success (or not) of the specified run.
#
# Copyright (C) IBM Corporation, 2018
#
@ -56,6 +57,8 @@ done
if test -n "$files"
then
$editor $files
exit 1
else
echo No errors in console logs.
exit 0
fi

View File

@ -7,6 +7,8 @@
#
# Usage: kvm-recheck.sh resdir ...
#
# Returns status reflecting the success or not of the last run specified.
#
# Copyright (C) IBM Corporation, 2011
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
@ -28,8 +30,16 @@ do
TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
rm -f $i/console.log.*.diags
kvm-recheck-${TORTURE_SUITE}.sh $i
if test -f "$i/console.log"
if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
then
echo QEMU error, output:
cat $i/qemu-output
elif test -f "$i/console.log"
then
if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -eq 137
then
echo QEMU killed
fi
configcheck.sh $i/.config $i/ConfigFragment
if test -r $i/Make.oldconfig.err
then
@ -58,3 +68,4 @@ do
fi
done
done
EDITOR=echo kvm-find-errors.sh "${@: -1}" > /dev/null 2>&1

View File

@ -36,11 +36,6 @@ config_template=${1}
config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'`
title=`echo $config_template | sed -e 's/^.*\///'`
builddir=${2}
if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir"
then
echo "kvm-test-1-run.sh :$builddir: Not a writable directory, cannot build into it"
exit 1
fi
resdir=${3}
if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir"
then
@ -85,18 +80,18 @@ then
ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
# Arch-independent indicator
touch $resdir/builtkernel
elif kvm-build.sh $T/Kc2 $builddir $resdir
elif kvm-build.sh $T/Kc2 $resdir
then
# Had to build a kernel for this test.
QEMU="`identify_qemu $builddir/vmlinux`"
QEMU="`identify_qemu vmlinux`"
BOOT_IMAGE="`identify_boot_image $QEMU`"
cp $builddir/vmlinux $resdir
cp $builddir/.config $resdir
cp $builddir/Module.symvers $resdir > /dev/null || :
cp $builddir/System.map $resdir > /dev/null || :
cp vmlinux $resdir
cp .config $resdir
cp Module.symvers $resdir > /dev/null || :
cp System.map $resdir > /dev/null || :
if test -n "$BOOT_IMAGE"
then
cp $builddir/$BOOT_IMAGE $resdir
cp $BOOT_IMAGE $resdir
KERNEL=$resdir/${BOOT_IMAGE##*/}
# Arch-independent indicator
touch $resdir/builtkernel
@ -107,7 +102,7 @@ then
parse-build.sh $resdir/Make.out $title
else
# Build failed.
cp $builddir/.config $resdir || :
cp .config $resdir || :
echo Build failed, not running KVM, see $resdir.
if test -f $builddir.wait
then
@ -165,7 +160,7 @@ then
fi
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args" > $resdir/qemu-output 2>&1 & echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
sleep 10 # Give qemu's pid a chance to reach the file
if test -s "$resdir/qemu_pid"

View File

@ -24,6 +24,7 @@ dur=$((30*60))
dryrun=""
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
PATH=${KVM}/bin:$PATH; export PATH
TORTURE_ALLOTED_CPUS=""
TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
@ -32,6 +33,7 @@ TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
TORTURE_TRUST_MAKE=""
resdir=""
configs=""
cpus=0
@ -62,6 +64,7 @@ usage () {
echo " --qemu-cmd qemu-system-..."
echo " --results absolute-pathname"
echo " --torture rcu"
echo " --trust-make"
exit 1
}
@ -89,6 +92,7 @@ do
--cpus)
checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
cpus=$2
TORTURE_ALLOTED_CPUS="$2"
shift
;;
--datestamp)
@ -173,6 +177,9 @@ do
jitter=0
fi
;;
--trust-make)
TORTURE_TRUST_MAKE="y"
;;
*)
echo Unknown argument $1
usage
@ -285,6 +292,7 @@ cat << ___EOF___ > $T/script
CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
KVM="$KVM"; export KVM
PATH="$PATH"; export PATH
TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS
TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
@ -297,6 +305,7 @@ TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
TORTURE_TRUST_MAKE="$TORTURE_TRUST_MAKE"; export TORTURE_TRUST_MAKE
if ! test -e $resdir
then
mkdir -p "$resdir" || :
@ -342,7 +351,7 @@ function dump(first, pastlast, batchnum)
print "needqemurun="
jn=1
for (j = first; j < pastlast; j++) {
builddir=KVM "/b1"
builddir=KVM "/b" j - first + 1
cpusr[jn] = cpus[j];
if (cfrep[cf[j]] == "") {
cfr[jn] = cf[j];
@ -358,7 +367,6 @@ function dump(first, pastlast, batchnum)
print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
print "rm -f " builddir ".*";
print "touch " builddir ".wait";
print "mkdir " builddir " > /dev/null 2>&1 || :";
print "mkdir " rd cfr[jn] " || :";
print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
@ -464,3 +472,5 @@ else
fi
# Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier
# Function-graph tracing: ftrace=function_graph ftrace_graph_filter=sched_setaffinity,migration_cpu_stop
# Also --kconfig "CONFIG_FUNCTION_TRACER=y CONFIG_FUNCTION_GRAPH_TRACER=y"

View File

@ -21,7 +21,7 @@ mkdir $T
. functions.sh
if grep -q CC < $F
if grep -q CC < $F || test -n "$TORTURE_TRUST_MAKE"
then
:
else

View File

@ -106,6 +106,7 @@ fi | tee -a $file.diags
egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
grep -v 'ODEBUG: ' |
grep -v 'This means that this is a DEBUG kernel and it is' |
grep -v 'Warning: unable to open an initial console' > $T.diags
if test -s $T.diags
then

View File

@ -1,2 +1,5 @@
CONFIG_RCU_TORTURE_TEST=y
CONFIG_PRINTK_TIME=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
CONFIG_KVM_GUEST=y

View File

@ -3,3 +3,4 @@ rcutree.gp_preinit_delay=3
rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcu_nocbs=0
rcutorture.fwd_progress=0

View File

@ -0,0 +1,14 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=8
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_HOTPLUG_CPU=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y

View File

@ -0,0 +1,3 @@
rcutorture.torture_type=trivial
rcutorture.onoff_interval=0
rcutorture.shuffle_interval=0