Commit 45975c7d authored by Paul E. McKenney's avatar Paul E. McKenney

rcu: Define RCU-sched API in terms of RCU for Tree RCU PREEMPT builds

Now that RCU-preempt knows about preemption disabling, its implementation
of synchronize_rcu() works for synchronize_sched(), and likewise for the
other RCU-sched update-side API members.  This commit therefore confines
the RCU-sched update-side code to CONFIG_PREEMPT=n builds, and defines
RCU-sched's update-side API members in terms of those of RCU-preempt.

This means that any given build of the Linux kernel has only one
update-side flavor of RCU, namely RCU-preempt for CONFIG_PREEMPT=y builds
and RCU-sched for CONFIG_PREEMPT=n builds.  This in turn means that kernels
built with CONFIG_RCU_NOCB_CPU=y have only one rcuo kthread per CPU.
Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
parent 4cf439a2
...@@ -49,11 +49,11 @@ ...@@ -49,11 +49,11 @@
/* Exported common interfaces */ /* Exported common interfaces */
#ifdef CONFIG_PREEMPT_RCU #ifdef CONFIG_TINY_RCU
void call_rcu(struct rcu_head *head, rcu_callback_t func);
#else /* #ifdef CONFIG_PREEMPT_RCU */
#define call_rcu call_rcu_sched #define call_rcu call_rcu_sched
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ #else
void call_rcu(struct rcu_head *head, rcu_callback_t func);
#endif
void call_rcu_sched(struct rcu_head *head, rcu_callback_t func); void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
void synchronize_sched(void); void synchronize_sched(void);
...@@ -92,11 +92,6 @@ static inline void __rcu_read_unlock(void) ...@@ -92,11 +92,6 @@ static inline void __rcu_read_unlock(void)
preempt_enable(); preempt_enable();
} }
static inline void synchronize_rcu(void)
{
synchronize_sched();
}
static inline int rcu_preempt_depth(void) static inline int rcu_preempt_depth(void)
{ {
return 0; return 0;
...@@ -107,7 +102,6 @@ static inline int rcu_preempt_depth(void) ...@@ -107,7 +102,6 @@ static inline int rcu_preempt_depth(void)
/* Internal to kernel */ /* Internal to kernel */
void rcu_init(void); void rcu_init(void);
extern int rcu_scheduler_active __read_mostly; extern int rcu_scheduler_active __read_mostly;
void rcu_sched_qs(void);
void rcu_check_callbacks(int user); void rcu_check_callbacks(int user);
void rcu_report_dead(unsigned int cpu); void rcu_report_dead(unsigned int cpu);
void rcutree_migrate_callbacks(int cpu); void rcutree_migrate_callbacks(int cpu);
......
...@@ -36,6 +36,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) ...@@ -36,6 +36,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
/* Never flag non-existent other CPUs! */ /* Never flag non-existent other CPUs! */
static inline bool rcu_eqs_special_set(int cpu) { return false; } static inline bool rcu_eqs_special_set(int cpu) { return false; }
static inline void synchronize_rcu(void)
{
synchronize_sched();
}
static inline unsigned long get_state_synchronize_rcu(void) static inline unsigned long get_state_synchronize_rcu(void)
{ {
return 0; return 0;
...@@ -94,6 +99,8 @@ static inline void kfree_call_rcu(struct rcu_head *head, ...@@ -94,6 +99,8 @@ static inline void kfree_call_rcu(struct rcu_head *head,
call_rcu(head, func); call_rcu(head, func);
} }
void rcu_sched_qs(void);
static inline void rcu_softirq_qs(void) static inline void rcu_softirq_qs(void)
{ {
rcu_sched_qs(); rcu_sched_qs();
......
...@@ -45,14 +45,19 @@ static inline void rcu_virt_note_context_switch(int cpu) ...@@ -45,14 +45,19 @@ static inline void rcu_virt_note_context_switch(int cpu)
rcu_note_context_switch(false); rcu_note_context_switch(false);
} }
void synchronize_rcu(void);
static inline void synchronize_rcu_bh(void) static inline void synchronize_rcu_bh(void)
{ {
synchronize_rcu(); synchronize_rcu();
} }
void synchronize_sched_expedited(void);
void synchronize_rcu_expedited(void); void synchronize_rcu_expedited(void);
static inline void synchronize_sched_expedited(void)
{
synchronize_rcu_expedited();
}
void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
/** /**
......
...@@ -92,24 +92,29 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var ...@@ -92,24 +92,29 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var
#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
DEFINE_RCU_TPS(sname) \ DEFINE_RCU_TPS(sname) \
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data); \
struct rcu_state sname##_state = { \ struct rcu_state rcu_state = { \
.level = { &sname##_state.node[0] }, \ .level = { &rcu_state.node[0] }, \
.rda = &sname##_data, \ .rda = &rcu_data, \
.call = cr, \ .call = cr, \
.gp_state = RCU_GP_IDLE, \ .gp_state = RCU_GP_IDLE, \
.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, \ .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, \
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex), \
.name = RCU_STATE_NAME(sname), \ .name = RCU_STATE_NAME(sname), \
.abbr = sabbr, \ .abbr = sabbr, \
.exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \ .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex), \
.exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \ .exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex), \
.ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \ .ofl_lock = __SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock), \
} }
RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); #ifdef CONFIG_PREEMPT_RCU
RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
#else
RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu);
#endif
static struct rcu_state *const rcu_state_p; static struct rcu_state *const rcu_state_p = &rcu_state;
static struct rcu_data __percpu *const rcu_data_p = &rcu_data;
LIST_HEAD(rcu_struct_flavors); LIST_HEAD(rcu_struct_flavors);
/* Dump rcu_node combining tree at boot to verify correct setup. */ /* Dump rcu_node combining tree at boot to verify correct setup. */
...@@ -220,31 +225,9 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) ...@@ -220,31 +225,9 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
return rcu_seq_state(rcu_seq_current(&rsp->gp_seq)); return rcu_seq_state(rcu_seq_current(&rsp->gp_seq));
} }
/*
* Note a quiescent state. Because we do not need to know
* how many quiescent states passed, just if there was at least
* one since the start of the grace period, this just sets a flag.
* The caller must have disabled preemption.
*/
void rcu_sched_qs(void)
{
RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!");
if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
return;
trace_rcu_grace_period(TPS("rcu_sched"),
__this_cpu_read(rcu_sched_data.gp_seq),
TPS("cpuqs"));
__this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
return;
__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
rcu_report_exp_rdp(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
}
void rcu_softirq_qs(void) void rcu_softirq_qs(void)
{ {
rcu_sched_qs(); rcu_qs();
rcu_preempt_qs();
rcu_preempt_deferred_qs(current); rcu_preempt_deferred_qs(current);
} }
...@@ -418,31 +401,18 @@ static void rcu_momentary_dyntick_idle(void) ...@@ -418,31 +401,18 @@ static void rcu_momentary_dyntick_idle(void)
rcu_preempt_deferred_qs(current); rcu_preempt_deferred_qs(current);
} }
/* /**
* Note a context switch. This is a quiescent state for RCU-sched, * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
* and requires special handling for preemptible RCU. *
* The caller must have disabled interrupts. * If the current CPU is idle or running at a first-level (not nested)
* interrupt from idle, return true. The caller must have at least
* disabled preemption.
*/ */
void rcu_note_context_switch(bool preempt) static int rcu_is_cpu_rrupt_from_idle(void)
{ {
barrier(); /* Avoid RCU read-side critical sections leaking down. */ return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
trace_rcu_utilization(TPS("Start context switch")); __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
rcu_sched_qs();
rcu_preempt_note_context_switch(preempt);
/* Load rcu_urgent_qs before other flags. */
if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
goto out;
this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
rcu_momentary_dyntick_idle();
this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
if (!preempt)
rcu_tasks_qs(current);
out:
trace_rcu_utilization(TPS("End context switch"));
barrier(); /* Avoid RCU read-side critical sections leaking up. */
} }
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
/* /*
* Register a quiescent state for all RCU flavors. If there is an * Register a quiescent state for all RCU flavors. If there is an
...@@ -476,8 +446,8 @@ void rcu_all_qs(void) ...@@ -476,8 +446,8 @@ void rcu_all_qs(void)
rcu_momentary_dyntick_idle(); rcu_momentary_dyntick_idle();
local_irq_restore(flags); local_irq_restore(flags);
} }
if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) if (unlikely(raw_cpu_read(rcu_data.cpu_no_qs.b.exp)))
rcu_sched_qs(); rcu_qs();
this_cpu_inc(rcu_dynticks.rcu_qs_ctr); this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
barrier(); /* Avoid RCU read-side critical sections leaking up. */ barrier(); /* Avoid RCU read-side critical sections leaking up. */
preempt_enable(); preempt_enable();
...@@ -558,7 +528,7 @@ EXPORT_SYMBOL_GPL(rcu_get_gp_seq); ...@@ -558,7 +528,7 @@ EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
*/ */
unsigned long rcu_sched_get_gp_seq(void) unsigned long rcu_sched_get_gp_seq(void)
{ {
return READ_ONCE(rcu_sched_state.gp_seq); return rcu_get_gp_seq();
} }
EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq); EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);
...@@ -590,7 +560,7 @@ EXPORT_SYMBOL_GPL(rcu_exp_batches_completed); ...@@ -590,7 +560,7 @@ EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
*/ */
unsigned long rcu_exp_batches_completed_sched(void) unsigned long rcu_exp_batches_completed_sched(void)
{ {
return rcu_sched_state.expedited_sequence; return rcu_state.expedited_sequence;
} }
EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);
...@@ -617,7 +587,7 @@ EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); ...@@ -617,7 +587,7 @@ EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
*/ */
void rcu_sched_force_quiescent_state(void) void rcu_sched_force_quiescent_state(void)
{ {
force_quiescent_state(&rcu_sched_state); rcu_force_quiescent_state();
} }
EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
...@@ -668,10 +638,8 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, ...@@ -668,10 +638,8 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
switch (test_type) { switch (test_type) {
case RCU_FLAVOR: case RCU_FLAVOR:
case RCU_BH_FLAVOR: case RCU_BH_FLAVOR:
rsp = rcu_state_p;
break;
case RCU_SCHED_FLAVOR: case RCU_SCHED_FLAVOR:
rsp = &rcu_sched_state; rsp = rcu_state_p;
break; break;
default: default:
break; break;
...@@ -1107,19 +1075,6 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); ...@@ -1107,19 +1075,6 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
/**
* rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
*
* If the current CPU is idle or running at a first-level (not nested)
* interrupt from idle, return true. The caller must have at least
* disabled preemption.
*/
static int rcu_is_cpu_rrupt_from_idle(void)
{
return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
__this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
}
/* /*
* We are reporting a quiescent state on behalf of some other CPU, so * We are reporting a quiescent state on behalf of some other CPU, so
* it is our responsibility to check for and handle potential overflow * it is our responsibility to check for and handle potential overflow
...@@ -2364,7 +2319,7 @@ rcu_report_unblock_qs_rnp(struct rcu_state *rsp, ...@@ -2364,7 +2319,7 @@ rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
struct rcu_node *rnp_p; struct rcu_node *rnp_p;
raw_lockdep_assert_held_rcu_node(rnp); raw_lockdep_assert_held_rcu_node(rnp);
if (WARN_ON_ONCE(rcu_state_p == &rcu_sched_state) || if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)) ||
WARN_ON_ONCE(rsp != rcu_state_p) || WARN_ON_ONCE(rsp != rcu_state_p) ||
WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) || WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) ||
rnp->qsmask != 0) { rnp->qsmask != 0) {
...@@ -2650,25 +2605,7 @@ void rcu_check_callbacks(int user) ...@@ -2650,25 +2605,7 @@ void rcu_check_callbacks(int user)
{ {
trace_rcu_utilization(TPS("Start scheduler-tick")); trace_rcu_utilization(TPS("Start scheduler-tick"));
increment_cpu_stall_ticks(); increment_cpu_stall_ticks();
if (user || rcu_is_cpu_rrupt_from_idle()) { rcu_flavor_check_callbacks(user);
/*
* Get here if this CPU took its interrupt from user
* mode or from the idle loop, and if this is not a
* nested interrupt. In this case, the CPU is in
* a quiescent state, so note it.
*
* No memory barrier is required here because
* rcu_sched_qs() references only CPU-local variables
* that other CPUs neither access nor modify, at least
* not while the corresponding CPU is online.
*/
rcu_sched_qs();
rcu_note_voluntary_context_switch(current);
}
rcu_preempt_check_callbacks();
if (rcu_pending()) if (rcu_pending())
invoke_rcu_core(); invoke_rcu_core();
...@@ -2694,7 +2631,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp)) ...@@ -2694,7 +2631,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
mask = 0; mask = 0;
raw_spin_lock_irqsave_rcu_node(rnp, flags); raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask == 0) { if (rnp->qsmask == 0) {
if (rcu_state_p == &rcu_sched_state || if (!IS_ENABLED(CONFIG_PREEMPT) ||
rsp != rcu_state_p || rsp != rcu_state_p ||
rcu_preempt_blocked_readers_cgp(rnp)) { rcu_preempt_blocked_readers_cgp(rnp)) {
/* /*
...@@ -3028,28 +2965,56 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, ...@@ -3028,28 +2965,56 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
} }
/** /**
* call_rcu_sched() - Queue an RCU for invocation after sched grace period. * call_rcu() - Queue an RCU callback for invocation after a grace period.
* @head: structure to be used for queueing the RCU updates. * @head: structure to be used for queueing the RCU updates.
* @func: actual callback function to be invoked after the grace period * @func: actual callback function to be invoked after the grace period
* *
* The callback function will be invoked some time after a full grace * The callback function will be invoked some time after a full grace
* period elapses, in other words after all currently executing RCU * period elapses, in other words after all pre-existing RCU read-side
* read-side critical sections have completed. call_rcu_sched() assumes * critical sections have completed. However, the callback function
* that the read-side critical sections end on enabling of preemption * might well execute concurrently with RCU read-side critical sections
* or on voluntary preemption. * that started after call_rcu() was invoked. RCU read-side critical
* RCU read-side critical sections are delimited by: * sections are delimited by rcu_read_lock() and rcu_read_unlock(), and
* * may be nested. In addition, regions of code across which interrupts,
* - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR * preemption, or softirqs have been disabled also serve as RCU read-side
* - anything that disables preemption. * critical sections. This includes hardware interrupt handlers, softirq
* * handlers, and NMI handlers.
* These may be nested. *
* Note that all CPUs must agree that the grace period extended beyond
* all pre-existing RCU read-side critical section. On systems with more
* than one CPU, this means that when "func()" is invoked, each CPU is
* guaranteed to have executed a full memory barrier since the end of its
* last RCU read-side critical section whose beginning preceded the call
* to call_rcu(). It also means that each CPU executing an RCU read-side
* critical section that continues beyond the start of "func()" must have
* executed a memory barrier after the call_rcu() but before the beginning
* of that RCU read-side critical section. Note that these guarantees
* include CPUs that are offline, idle, or executing in user mode, as
* well as CPUs that are executing in the kernel.
*
* Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
* resulting RCU callback function "func()", then both CPU A and CPU B are
* guaranteed to execute a full memory barrier during the time interval
* between the call to call_rcu() and the invocation of "func()" -- even
* if CPU A and CPU B are the same CPU (but again only if the system has
* more than one CPU).
*/
void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func, rcu_state_p, -1, 0);
}
EXPORT_SYMBOL_GPL(call_rcu);
/**
* call_rcu_sched() - Queue an RCU for invocation after sched grace period.
* @head: structure to be used for queueing the RCU updates.
* @func: actual callback function to be invoked after the grace period
* *
* See the description of call_rcu() for more detailed information on * This is transitional.
* memory ordering guarantees.
*/ */
void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
{ {
__call_rcu(head, func, &rcu_sched_state, -1, 0); call_rcu(head, func);
} }
EXPORT_SYMBOL_GPL(call_rcu_sched); EXPORT_SYMBOL_GPL(call_rcu_sched);
...@@ -3067,73 +3032,14 @@ void kfree_call_rcu(struct rcu_head *head, ...@@ -3067,73 +3032,14 @@ void kfree_call_rcu(struct rcu_head *head,
} }
EXPORT_SYMBOL_GPL(kfree_call_rcu); EXPORT_SYMBOL_GPL(kfree_call_rcu);
/*
* Because a context switch is a grace period for RCU-sched, any blocking
* grace-period wait automatically implies a grace period if there
* is only one CPU online at any point time during execution of either
* synchronize_sched() or synchronize_rcu_bh(). It is OK to occasionally
* incorrectly indicate that there are multiple CPUs online when there
* was in fact only one the whole time, as this just adds some overhead:
* RCU still operates correctly.
*/
static int rcu_blocking_is_gp(void)
{
int ret;
might_sleep(); /* Check for RCU read-side critical section. */
preempt_disable();
ret = num_online_cpus() <= 1;
preempt_enable();
return ret;
}
/** /**
* synchronize_sched - wait until an rcu-sched grace period has elapsed. * synchronize_sched - wait until an rcu-sched grace period has elapsed.
* *
* Control will return to the caller some time after a full rcu-sched * This is transitional.
* grace period has elapsed, in other words after all currently executing
* rcu-sched read-side critical sections have completed. These read-side
* critical sections are delimited by rcu_read_lock_sched() and
* rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
* local_irq_disable(), and so on may be used in place of
* rcu_read_lock_sched().
*
* This means that all preempt_disable code sequences, including NMI and
* non-threaded hardware-interrupt handlers, in progress on entry will
* have completed before this primitive returns. However, this does not
* guarantee that softirq handlers will have completed, since in some
* kernels, these handlers can run in process context, and can block.
*
* Note that this guarantee implies further memory-ordering guarantees.
* On systems with more than one CPU, when synchronize_sched() returns,
* each CPU is guaranteed to have executed a full memory barrier since the
* end of its last RCU-sched read-side critical section whose beginning
* preceded the call to synchronize_sched(). In addition, each CPU having
* an RCU read-side critical section that extends beyond the return from
* synchronize_sched() is guaranteed to have executed a full memory barrier
* after the beginning of synchronize_sched() and before the beginning of
* that RCU read-side critical section. Note that these guarantees include
* CPUs that are offline, idle, or executing in user mode, as well as CPUs
* that are executing in the kernel.
*
* Furthermore, if CPU A invoked synchronize_sched(), which returned
* to its caller on CPU B, then both CPU A and CPU B are guaranteed
* to have executed a full memory barrier during the execution of
* synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
* again only if the system has more than one CPU).
*/ */
void synchronize_sched(void) void synchronize_sched(void)
{ {
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || synchronize_rcu();
lock_is_held(&rcu_lock_map) ||
lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_sched() in RCU-sched read-side critical section");
if (rcu_blocking_is_gp())
return;
if (rcu_gp_is_expedited())
synchronize_sched_expedited();
else
wait_rcu_gp(call_rcu_sched);
} }
EXPORT_SYMBOL_GPL(synchronize_sched); EXPORT_SYMBOL_GPL(synchronize_sched);
...@@ -3181,41 +3087,23 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu); ...@@ -3181,41 +3087,23 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
/** /**
* get_state_synchronize_sched - Snapshot current RCU-sched state * get_state_synchronize_sched - Snapshot current RCU-sched state
* *
* Returns a cookie that is used by a later call to cond_synchronize_sched() * This is transitional, and only used by rcutorture.
* to determine whether or not a full grace period has elapsed in the
* meantime.
*/ */
unsigned long get_state_synchronize_sched(void) unsigned long get_state_synchronize_sched(void)
{ {
/* return get_state_synchronize_rcu();
* Any prior manipulation of RCU-protected data must happen
* before the load from ->gp_seq.
*/
smp_mb(); /* ^^^ */
return rcu_seq_snap(&rcu_sched_state.gp_seq);
} }
EXPORT_SYMBOL_GPL(get_state_synchronize_sched); EXPORT_SYMBOL_GPL(get_state_synchronize_sched);
/** /**
* cond_synchronize_sched - Conditionally wait for an RCU-sched grace period * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period
*
* @oldstate: return value from earlier call to get_state_synchronize_sched() * @oldstate: return value from earlier call to get_state_synchronize_sched()
* *
* If a full RCU-sched grace period has elapsed since the earlier call to * This is transitional and only used by rcutorture.
* get_state_synchronize_sched(), just return. Otherwise, invoke
* synchronize_sched() to wait for a full grace period.
*
* Yes, this function does not take counter wrap into account. But
* counter wrap is harmless. If the counter wraps, we have waited for
* more than 2 billion grace periods (and way more on a 64-bit system!),
* so waiting for one additional grace period should be just fine.
*/ */
void cond_synchronize_sched(unsigned long oldstate) void cond_synchronize_sched(unsigned long oldstate)
{ {
if (!rcu_seq_done(&rcu_sched_state.gp_seq, oldstate)) cond_synchronize_rcu(oldstate);
synchronize_sched();
else
smp_mb(); /* Ensure GP ends before subsequent accesses. */
} }
EXPORT_SYMBOL_GPL(cond_synchronize_sched); EXPORT_SYMBOL_GPL(cond_synchronize_sched);
...@@ -3452,12 +3340,28 @@ void rcu_barrier_bh(void) ...@@ -3452,12 +3340,28 @@ void rcu_barrier_bh(void)
} }
EXPORT_SYMBOL_GPL(rcu_barrier_bh); EXPORT_SYMBOL_GPL(rcu_barrier_bh);
/**
* rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
*
* Note that this primitive does not necessarily wait for an RCU grace period
* to complete. For example, if there are no RCU callbacks queued anywhere
* in the system, then rcu_barrier() is within its rights to return
* immediately, without waiting for anything, much less an RCU grace period.
*/
void rcu_barrier(void)
{
_rcu_barrier(rcu_state_p);
}
EXPORT_SYMBOL_GPL(rcu_barrier);
/** /**
* rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
*
* This is transitional.
*/ */
void rcu_barrier_sched(void) void rcu_barrier_sched(void)
{ {
_rcu_barrier(&rcu_sched_state); rcu_barrier();
} }
EXPORT_SYMBOL_GPL(rcu_barrier_sched); EXPORT_SYMBOL_GPL(rcu_barrier_sched);
...@@ -3756,7 +3660,7 @@ void rcu_report_dead(unsigned int cpu) ...@@ -3756,7 +3660,7 @@ void rcu_report_dead(unsigned int cpu)
/* QS for any half-done expedited RCU-sched GP. */ /* QS for any half-done expedited RCU-sched GP. */
preempt_disable(); preempt_disable();
rcu_report_exp_rdp(&rcu_sched_state, this_cpu_ptr(rcu_sched_state.rda)); rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(rcu_state.rda));
preempt_enable(); preempt_enable();
rcu_preempt_deferred_qs(current); rcu_preempt_deferred_qs(current);
for_each_rcu_flavor(rsp) for_each_rcu_flavor(rsp)
...@@ -4098,10 +4002,9 @@ void __init rcu_init(void) ...@@ -4098,10 +4002,9 @@ void __init rcu_init(void)
rcu_bootup_announce(); rcu_bootup_announce();
rcu_init_geometry(); rcu_init_geometry();
rcu_init_one(&rcu_sched_state); rcu_init_one(&rcu_state);
if (dump_tree) if (dump_tree)
rcu_dump_rcu_node_tree(&rcu_sched_state); rcu_dump_rcu_node_tree(&rcu_state);
__rcu_init_preempt();
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
/* /*
......
...@@ -225,9 +225,6 @@ struct rcu_data { ...@@ -225,9 +225,6 @@ struct rcu_data {
/* 5) _rcu_barrier(), OOM callbacks, and expediting. */ /* 5) _rcu_barrier(), OOM callbacks, and expediting. */
struct rcu_head barrier_head; struct rcu_head barrier_head;
#ifdef CONFIG_RCU_FAST_NO_HZ
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
int exp_dynticks_snap; /* Double-check need for IPI. */ int exp_dynticks_snap; /* Double-check need for IPI. */
/* 6) Callback offloading. */ /* 6) Callback offloading. */
...@@ -433,8 +430,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); ...@@ -433,8 +430,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
/* Forward declarations for rcutree_plugin.h */ /* Forward declarations for rcutree_plugin.h */
static void rcu_bootup_announce(void); static void rcu_bootup_announce(void);
static void rcu_preempt_qs(void); static void rcu_qs(void);
static void rcu_preempt_note_context_switch(bool preempt);
static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
static bool rcu_preempt_has_tasks(struct rcu_node *rnp); static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
...@@ -444,9 +440,8 @@ static int rcu_print_task_stall(struct rcu_node *rnp); ...@@ -444,9 +440,8 @@ static int rcu_print_task_stall(struct rcu_node *rnp);
static int rcu_print_task_exp_stall(struct rcu_node *rnp); static int rcu_print_task_exp_stall(struct rcu_node *rnp);
static void rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, static void rcu_preempt_check_blocked_tasks(struct rcu_state *rsp,
struct rcu_node *rnp); struct rcu_node *rnp);
static void rcu_preempt_check_callbacks(void); static void rcu_flavor_check_callbacks(int user);
void call_rcu(struct rcu_head *head, rcu_callback_t func); void call_rcu(struct rcu_head *head, rcu_callback_t func);
static void __init __rcu_init_preempt(void);
static void dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, static void dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp,
int ncheck); int ncheck);
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
......
...@@ -265,7 +265,7 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp) ...@@ -265,7 +265,7 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp)
rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, true); rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, true);
} }
/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ /* Common code for work-done checking. */
static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s) static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
{ {
if (rcu_exp_gp_seq_done(rsp, s)) { if (rcu_exp_gp_seq_done(rsp, s)) {
...@@ -337,45 +337,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) ...@@ -337,45 +337,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
return false; return false;
} }
/* Invoked on each online non-idle CPU for expedited quiescent state. */
static void sync_sched_exp_handler(void *data)
{
struct rcu_data *rdp;
struct rcu_node *rnp;
struct rcu_state *rsp = data;
rdp = this_cpu_ptr(rsp->rda);
rnp = rdp->mynode;
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
return;
if (rcu_is_cpu_rrupt_from_idle()) {
rcu_report_exp_rdp(&rcu_sched_state,
this_cpu_ptr(&rcu_sched_data));
return;
}
__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
/* Store .exp before .rcu_urgent_qs. */
smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
resched_cpu(smp_processor_id());
}
/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
static void sync_sched_exp_online_cleanup(int cpu)
{
struct rcu_data *rdp;
int ret;
struct rcu_node *rnp;
struct rcu_state *rsp = &rcu_sched_state;
rdp = per_cpu_ptr(rsp->rda, cpu);
rnp = rdp->mynode;
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
return;
ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
WARN_ON_ONCE(ret);
}
/* /*
* Select the CPUs within the specified rcu_node that the upcoming * Select the CPUs within the specified rcu_node that the upcoming
* expedited grace period needs to wait for. * expedited grace period needs to wait for.
...@@ -691,39 +652,6 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp, ...@@ -691,39 +652,6 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
mutex_unlock(&rsp->exp_mutex); mutex_unlock(&rsp->exp_mutex);
} }
/**
* synchronize_sched_expedited - Brute-force RCU-sched grace period
*
* Wait for an RCU-sched grace period to elapse, but use a "big hammer"
* approach to force the grace period to end quickly. This consumes
* significant time on all CPUs and is unfriendly to real-time workloads,
* so is thus not recommended for any sort of common-case code. In fact,
* if you are using synchronize_sched_expedited() in a loop, please
* restructure your code to batch your updates, and then use a single
* synchronize_sched() instead.
*
* This implementation can be thought of as an application of sequence
* locking to expedited grace periods, but using the sequence counter to
* determine when someone else has already done the work instead of for
* retrying readers.
*/
void synchronize_sched_expedited(void)
{
struct rcu_state *rsp = &rcu_sched_state;
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
lock_is_held(&rcu_lock_map) ||
lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_sched_expedited() in RCU read-side critical section");
/* If only one CPU, this is automatically a grace period. */
if (rcu_blocking_is_gp())
return;
_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#ifdef CONFIG_PREEMPT_RCU #ifdef CONFIG_PREEMPT_RCU
/* /*
...@@ -801,6 +729,11 @@ static void sync_rcu_exp_handler(void *info) ...@@ -801,6 +729,11 @@ static void sync_rcu_exp_handler(void *info)
resched_cpu(rdp->cpu); resched_cpu(rdp->cpu);
} }
/* PREEMPT=y, so no RCU-sched to clean up after. */
static void sync_sched_exp_online_cleanup(int cpu)
{
}
/** /**
* synchronize_rcu_expedited - Brute-force RCU grace period * synchronize_rcu_expedited - Brute-force RCU grace period
* *
...@@ -818,6 +751,8 @@ static void sync_rcu_exp_handler(void *info) ...@@ -818,6 +751,8 @@ static void sync_rcu_exp_handler(void *info)
* you are using synchronize_rcu_expedited() in a loop, please restructure * you are using synchronize_rcu_expedited() in a loop, please restructure
* your code to batch your updates, and then Use a single synchronize_rcu() * your code to batch your updates, and then Use a single synchronize_rcu()
* instead. * instead.
*
* This has the same semantics as (but is more brutal than) synchronize_rcu().
*/ */
void synchronize_rcu_expedited(void) void synchronize_rcu_expedited(void)
{ {
...@@ -836,13 +771,79 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); ...@@ -836,13 +771,79 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
#else /* #ifdef CONFIG_PREEMPT_RCU */ #else /* #ifdef CONFIG_PREEMPT_RCU */
/* Invoked on each online non-idle CPU for expedited quiescent state. */
static void sync_sched_exp_handler(void *data)
{
struct rcu_data *rdp;
struct rcu_node *rnp;
struct rcu_state *rsp = data;
rdp = this_cpu_ptr(rsp->rda);
rnp = rdp->mynode;
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
return;
if (rcu_is_cpu_rrupt_from_idle()) {
rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(&rcu_data));
return;
}
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
/* Store .exp before .rcu_urgent_qs. */
smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
resched_cpu(smp_processor_id());
}
/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
static void sync_sched_exp_online_cleanup(int cpu)
{
struct rcu_data *rdp;
int ret;
struct rcu_node *rnp;
struct rcu_state *rsp = &rcu_state;
rdp = per_cpu_ptr(rsp->rda, cpu);
rnp = rdp->mynode;
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
return;
ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
WARN_ON_ONCE(ret);
}
/* /*
* Wait for an rcu-preempt grace period, but make it happen quickly. * Because a context switch is a grace period for RCU-sched, any blocking
* But because preemptible RCU does not exist, map to rcu-sched. * grace-period wait automatically implies a grace period if there
*/ * is only one CPU online at any point time during execution of either
* synchronize_sched() or synchronize_rcu_bh(). It is OK to occasionally
* incorrectly indicate that there are multiple CPUs online when there
* was in fact only one the whole time, as this just adds some overhead:
* RCU still operates correctly.
*/
static int rcu_blocking_is_gp(void)
{
int ret;
might_sleep(); /* Check for RCU read-side critical section. */
preempt_disable();
ret = num_online_cpus() <= 1;
preempt_enable();
return ret;
}
/* PREEMPT=n implementation of synchronize_rcu_expedited(). */
void synchronize_rcu_expedited(void) void synchronize_rcu_expedited(void)
{ {
synchronize_sched_expedited(); struct rcu_state *rsp = &rcu_state;
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
lock_is_held(&rcu_lock_map) ||
lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_sched_expedited() in RCU read-side critical section");
/* If only one CPU, this is automatically a grace period. */
if (rcu_blocking_is_gp())
return;
_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
} }
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
......
...@@ -123,10 +123,6 @@ static void __init rcu_bootup_announce_oddness(void) ...@@ -123,10 +123,6 @@ static void __init rcu_bootup_announce_oddness(void)
#ifdef CONFIG_PREEMPT_RCU #ifdef CONFIG_PREEMPT_RCU
RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
bool wake); bool wake);
static void rcu_read_unlock_special(struct task_struct *t); static void rcu_read_unlock_special(struct task_struct *t);
...@@ -303,15 +299,15 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) ...@@ -303,15 +299,15 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
* *
* Callers to this function must disable preemption. * Callers to this function must disable preemption.
*/ */
static void rcu_preempt_qs(void) static void rcu_qs(void)
{ {
RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n"); RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) { if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
trace_rcu_grace_period(TPS("rcu_preempt"), trace_rcu_grace_period(TPS("rcu_preempt"),
__this_cpu_read(rcu_data_p->gp_seq), __this_cpu_read(rcu_data_p->gp_seq),
TPS("cpuqs")); TPS("cpuqs"));
__this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false); __this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);
barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ barrier(); /* Coordinate with rcu_flavor_check_callbacks(). */
current->rcu_read_unlock_special.b.need_qs = false; current->rcu_read_unlock_special.b.need_qs = false;
} }
} }
...@@ -329,12 +325,14 @@ static void rcu_preempt_qs(void) ...@@ -329,12 +325,14 @@ static void rcu_preempt_qs(void)
* *
* Caller must disable interrupts. * Caller must disable interrupts.
*/ */
static void rcu_preempt_note_context_switch(bool preempt) void rcu_note_context_switch(bool preempt)
{ {
struct task_struct *t = current; struct task_struct *t = current;
struct rcu_data *rdp = this_cpu_ptr(rcu_state_p->rda); struct rcu_data *rdp = this_cpu_ptr(rcu_state_p->rda);
struct rcu_node *rnp; struct rcu_node *rnp;
barrier(); /* Avoid RCU read-side critical sections leaking down. */
trace_rcu_utilization(TPS("Start context switch"));
lockdep_assert_irqs_disabled(); lockdep_assert_irqs_disabled();
WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0); WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
if (t->rcu_read_lock_nesting > 0 && if (t->rcu_read_lock_nesting > 0 &&
...@@ -381,10 +379,13 @@ static void rcu_preempt_note_context_switch(bool preempt) ...@@ -381,10 +379,13 @@ static void rcu_preempt_note_context_switch(bool preempt)
* grace period, then the fact that the task has been enqueued * grace period, then the fact that the task has been enqueued
* means that we continue to block the current grace period. * means that we continue to block the current grace period.
*/ */
rcu_preempt_qs(); rcu_qs();
if (rdp->deferred_qs) if (rdp->deferred_qs)
rcu_report_exp_rdp(rcu_state_p, rdp); rcu_report_exp_rdp(rcu_state_p, rdp);
trace_rcu_utilization(TPS("End context switch"));
barrier(); /* Avoid RCU read-side critical sections leaking up. */
} }
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
/* /*
* Check for preempted RCU readers blocking the current grace period * Check for preempted RCU readers blocking the current grace period
...@@ -493,7 +494,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) ...@@ -493,7 +494,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
return; return;
} }
if (special.b.need_qs) { if (special.b.need_qs) {
rcu_preempt_qs(); rcu_qs();
t->rcu_read_unlock_special.b.need_qs = false; t->rcu_read_unlock_special.b.need_qs = false;
if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) { if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
local_irq_restore(flags); local_irq_restore(flags);
...@@ -596,7 +597,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) ...@@ -596,7 +597,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
*/ */
static bool rcu_preempt_need_deferred_qs(struct task_struct *t) static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{ {
return (this_cpu_ptr(&rcu_preempt_data)->deferred_qs || return (this_cpu_ptr(&rcu_data)->deferred_qs ||
READ_ONCE(t->rcu_read_unlock_special.s)) && READ_ONCE(t->rcu_read_unlock_special.s)) &&
t->rcu_read_lock_nesting <= 0; t->rcu_read_lock_nesting <= 0;
} }
...@@ -781,11 +782,14 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp) ...@@ -781,11 +782,14 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
* *
* Caller must disable hard irqs. * Caller must disable hard irqs.
*/ */
static void rcu_preempt_check_callbacks(void) static void rcu_flavor_check_callbacks(int user)
{ {
struct rcu_state *rsp = &rcu_preempt_state; struct rcu_state *rsp = &rcu_state;
struct task_struct *t = current; struct task_struct *t = current;
if (user || rcu_is_cpu_rrupt_from_idle()) {
rcu_note_voluntary_context_switch(current);
}
if (t->rcu_read_lock_nesting > 0 || if (t->rcu_read_lock_nesting > 0 ||
(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) { (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
/* No QS, force context switch if deferred. */ /* No QS, force context switch if deferred. */
...@@ -795,7 +799,7 @@ static void rcu_preempt_check_callbacks(void) ...@@ -795,7 +799,7 @@ static void rcu_preempt_check_callbacks(void)
rcu_preempt_deferred_qs(t); /* Report deferred QS. */ rcu_preempt_deferred_qs(t); /* Report deferred QS. */
return; return;
} else if (!t->rcu_read_lock_nesting) { } else if (!t->rcu_read_lock_nesting) {
rcu_preempt_qs(); /* Report immediate QS. */ rcu_qs(); /* Report immediate QS. */
return; return;
} }
...@@ -808,44 +812,6 @@ static void rcu_preempt_check_callbacks(void) ...@@ -808,44 +812,6 @@ static void rcu_preempt_check_callbacks(void)
t->rcu_read_unlock_special.b.need_qs = true; t->rcu_read_unlock_special.b.need_qs = true;
} }
/**
* call_rcu() - Queue an RCU callback for invocation after a grace period.
* @head: structure to be used for queueing the RCU updates.
* @func: actual callback function to be invoked after the grace period
*
* The callback function will be invoked some time after a full grace
* period elapses, in other words after all pre-existing RCU read-side
* critical sections have completed. However, the callback function
* might well execute concurrently with RCU read-side critical sections
* that started after call_rcu() was invoked. RCU read-side critical
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
* and may be nested.
*
* Note that all CPUs must agree that the grace period extended beyond
* all pre-existing RCU read-side critical section. On systems with more
* than one CPU, this means that when "func()" is invoked, each CPU is
* guaranteed to have executed a full memory barrier since the end of its
* last RCU read-side critical section whose beginning preceded the call
* to call_rcu(). It also means that each CPU executing an RCU read-side
* critical section that continues beyond the start of "func()" must have
* executed a memory barrier after the call_rcu() but before the beginning
* of that RCU read-side critical section. Note that these guarantees
* include CPUs that are offline, idle, or executing in user mode, as
* well as CPUs that are executing in the kernel.
*
* Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
* resulting RCU callback function "func()", then both CPU A and CPU B are
* guaranteed to execute a full memory barrier during the time interval
* between the call to call_rcu() and the invocation of "func()" -- even
* if CPU A and CPU B are the same CPU (but again only if the system has
* more than one CPU).
*/
void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
__call_rcu(head, func, rcu_state_p, -1, 0);
}
EXPORT_SYMBOL_GPL(call_rcu);
/** /**
* synchronize_rcu - wait until a grace period has elapsed. * synchronize_rcu - wait until a grace period has elapsed.
* *
...@@ -856,14 +822,28 @@ EXPORT_SYMBOL_GPL(call_rcu); ...@@ -856,14 +822,28 @@ EXPORT_SYMBOL_GPL(call_rcu);
* concurrently with new RCU read-side critical sections that began while * concurrently with new RCU read-side critical sections that began while
* synchronize_rcu() was waiting. RCU read-side critical sections are * synchronize_rcu() was waiting. RCU read-side critical sections are
* delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
* In addition, regions of code across which interrupts, preemption, or
* softirqs have been disabled also serve as RCU read-side critical
* sections. This includes hardware interrupt handlers, softirq handlers,
* and NMI handlers.
*
* Note that this guarantee implies further memory-ordering guarantees.
* On systems with more than one CPU, when synchronize_rcu() returns,
* each CPU is guaranteed to have executed a full memory barrier since the
* end of its last RCU-sched read-side critical section whose beginning
* preceded the call to synchronize_rcu(). In addition, each CPU having
* an RCU read-side critical section that extends beyond the return from
* synchronize_rcu() is guaranteed to have executed a full memory barrier
* after the beginning of synchronize_rcu() and before the beginning of
* that RCU read-side critical section. Note that these guarantees include
* CPUs that are offline, idle, or executing in user mode, as well as CPUs
* that are executing in the kernel.
* *
* See the description of synchronize_sched() for more detailed * Furthermore, if CPU A invoked synchronize_rcu(), which returned
* information on memory-ordering guarantees. However, please note * to its caller on CPU B, then both CPU A and CPU B are guaranteed
* that -only- the memory-ordering guarantees apply. For example, * to have executed a full memory barrier during the execution of
* synchronize_rcu() is -not- guaranteed to wait on things like code * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
* protected by preempt_disable(), instead, synchronize_rcu() is -only- * again only if the system has more than one CPU).
* guaranteed to wait on RCU read-side critical sections, that is, sections
* of code protected by rcu_read_lock().
*/ */
void synchronize_rcu(void) void synchronize_rcu(void)
{ {
...@@ -880,28 +860,6 @@ void synchronize_rcu(void) ...@@ -880,28 +860,6 @@ void synchronize_rcu(void)
} }
EXPORT_SYMBOL_GPL(synchronize_rcu); EXPORT_SYMBOL_GPL(synchronize_rcu);
/**
* rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
*
* Note that this primitive does not necessarily wait for an RCU grace period
* to complete. For example, if there are no RCU callbacks queued anywhere
* in the system, then rcu_barrier() is within its rights to return
* immediately, without waiting for anything, much less an RCU grace period.
*/
void rcu_barrier(void)
{
_rcu_barrier(rcu_state_p);
}
EXPORT_SYMBOL_GPL(rcu_barrier);
/*
* Initialize preemptible RCU's state structures.
*/
static void __init __rcu_init_preempt(void)
{
rcu_init_one(rcu_state_p);
}
/* /*
* Check for a task exiting while in a preemptible-RCU read-side * Check for a task exiting while in a preemptible-RCU read-side
* critical section, clean up if so. No need to issue warnings, * critical section, clean up if so. No need to issue warnings,
...@@ -964,8 +922,6 @@ dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck) ...@@ -964,8 +922,6 @@ dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)
#else /* #ifdef CONFIG_PREEMPT_RCU */ #else /* #ifdef CONFIG_PREEMPT_RCU */
static struct rcu_state *const rcu_state_p = &rcu_sched_state;
/* /*
* Tell them what RCU they are running. * Tell them what RCU they are running.
*/ */
...@@ -975,18 +931,48 @@ static void __init rcu_bootup_announce(void) ...@@ -975,18 +931,48 @@ static void __init rcu_bootup_announce(void)
rcu_bootup_announce_oddness(); rcu_bootup_announce_oddness();
} }
/* Because preemptible RCU does not exist, we can ignore its QSes. */ /*
static void rcu_preempt_qs(void) * Note a quiescent state for PREEMPT=n. Because we do not need to know
* how many quiescent states passed, just if there was at least one since
* the start of the grace period, this just sets a flag. The caller must
* have disabled preemption.
*/
static void rcu_qs(void)
{ {
RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
return;
trace_rcu_grace_period(TPS("rcu_sched"),
__this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
return;
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(&rcu_data));
} }
/* /*
* Because preemptible RCU does not exist, we never have to check for * Note a PREEMPT=n context switch. The caller must have disabled interrupts.
* CPUs being in quiescent states.
*/ */
static void rcu_preempt_note_context_switch(bool preempt) void rcu_note_context_switch(bool preempt)
{ {
barrier(); /* Avoid RCU read-side critical sections leaking down. */
trace_rcu_utilization(TPS("Start context switch"));
rcu_qs();
/* Load rcu_urgent_qs before other flags. */
if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
goto out;
this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
rcu_momentary_dyntick_idle();
this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
if (!preempt)
rcu_tasks_qs(current);
out:
trace_rcu_utilization(TPS("End context switch"));
barrier(); /* Avoid RCU read-side critical sections leaking up. */
} }
EXPORT_SYMBOL_GPL(rcu_note_context_switch);
/* /*
* Because preemptible RCU does not exist, there are never any preempted * Because preemptible RCU does not exist, there are never any preempted
...@@ -1054,29 +1040,48 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp) ...@@ -1054,29 +1040,48 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
} }
/* /*
* Because preemptible RCU does not exist, it never has any callbacks * Check to see if this CPU is in a non-context-switch quiescent state
* to check. * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
* Also schedule RCU core processing.
*
* This function must be called from hardirq context. It is normally
* invoked from the scheduling-clock interrupt.
*/ */
static void rcu_preempt_check_callbacks(void) static void rcu_flavor_check_callbacks(int user)
{ {
} if (user || rcu_is_cpu_rrupt_from_idle()) {
/* /*
* Because preemptible RCU does not exist, rcu_barrier() is just * Get here if this CPU took its interrupt from user
* another name for rcu_barrier_sched(). * mode or from the idle loop, and if this is not a
* nested interrupt. In this case, the CPU is in
* a quiescent state, so note it.
*
* No memory barrier is required here because rcu_qs()
* references only CPU-local variables that other CPUs
* neither access nor modify, at least not while the
* corresponding CPU is online.
*/ */
void rcu_barrier(void)
{ rcu_qs();
rcu_barrier_sched(); }
} }
EXPORT_SYMBOL_GPL(rcu_barrier);
/* /* PREEMPT=n implementation of synchronize_rcu(). */
* Because preemptible RCU does not exist, it need not be initialized. void synchronize_rcu(void)
*/
static void __init __rcu_init_preempt(void)
{ {
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
lock_is_held(&rcu_lock_map) ||
lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_rcu() in RCU-sched read-side critical section");
if (rcu_blocking_is_gp())
return;
if (rcu_gp_is_expedited())
synchronize_rcu_expedited();
else
wait_rcu_gp(call_rcu);
} }
EXPORT_SYMBOL_GPL(synchronize_rcu);
/* /*
* Because preemptible RCU does not exist, tasks cannot possibly exit * Because preemptible RCU does not exist, tasks cannot possibly exit
...@@ -1319,8 +1324,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, ...@@ -1319,8 +1324,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
static void rcu_kthread_do_work(void) static void rcu_kthread_do_work(void)
{ {
rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); rcu_do_batch(&rcu_state, this_cpu_ptr(&rcu_data));
rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
} }
static void rcu_cpu_kthread_setup(unsigned int cpu) static void rcu_cpu_kthread_setup(unsigned int cpu)
...@@ -1727,87 +1731,6 @@ static void rcu_idle_count_callbacks_posted(void) ...@@ -1727,87 +1731,6 @@ static void rcu_idle_count_callbacks_posted(void)
__this_cpu_add(rcu_dynticks.nonlazy_posted, 1); __this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
} }
/*
* Data for flushing lazy RCU callbacks at OOM time.
*/
static atomic_t oom_callback_count;
static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
/*
* RCU OOM callback -- decrement the outstanding count and deliver the
* wake-up if we are the last one.
*/
static void rcu_oom_callback(struct rcu_head *rhp)
{
if (atomic_dec_and_test(&oom_callback_count))
wake_up(&oom_callback_wq);
}
/*
* Post an rcu_oom_notify callback on the current CPU if it has at
* least one lazy callback. This will unnecessarily post callbacks
* to CPUs that already have a non-lazy callback at the end of their
* callback list, but this is an infrequent operation, so accept some
* extra overhead to keep things simple.
*/
static void rcu_oom_notify_cpu(void *unused)
{
struct rcu_state *rsp;
struct rcu_data *rdp;
for_each_rcu_flavor(rsp) {
rdp = raw_cpu_ptr(rsp->rda);
if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {
atomic_inc(&oom_callback_count);
rsp->call(&rdp->oom_head, rcu_oom_callback);
}
}
}
/*
* If low on memory, ensure that each CPU has a non-lazy callback.
* This will wake up CPUs that have only lazy callbacks, in turn
* ensuring that they free up the corresponding memory in a timely manner.
* Because an uncertain amount of memory will be freed in some uncertain
* timeframe, we do not claim to have freed anything.
*/
static int rcu_oom_notify(struct notifier_block *self,
unsigned long notused, void *nfreed)
{
int cpu;
/* Wait for callbacks from earlier instance to complete. */
wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
smp_mb(); /* Ensure callback reuse happens after callback invocation. */
/*
* Prevent premature wakeup: ensure that all increments happen
* before there is a chance of the counter reaching zero.
*/
atomic_set(&oom_callback_count, 1);
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
cond_resched_tasks_rcu_qs();
}
/* Unconditionally decrement: no need to wake ourselves up. */
atomic_dec(&oom_callback_count);
return NOTIFY_OK;
}
static struct notifier_block rcu_oom_nb = {
.notifier_call = rcu_oom_notify
};
static int __init rcu_register_oom_notifier(void)
{
register_oom_notifier(&rcu_oom_nb);
return 0;
}
early_initcall(rcu_register_oom_notifier);
#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
#ifdef CONFIG_RCU_FAST_NO_HZ #ifdef CONFIG_RCU_FAST_NO_HZ
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment