Commit 7fbe67e4 authored by Paul E. McKenney's avatar Paul E. McKenney

Merge branch 'strictgp.2020.08.24a' into HEAD

strictgp.2020.08.24a: Strict grace periods for KASAN testing.
parents f511ce14 cfeac397
......@@ -4152,6 +4152,15 @@
This wake_up() will be accompanied by a
WARN_ONCE() splat and an ftrace_dump().
rcutree.rcu_unlock_delay= [KNL]
In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,
this specifies an rcu_read_unlock()-time delay
in microseconds. This defaults to zero.
Larger delays increase the probability of
catching RCU pointer leaks, that is, buggy use
of RCU-protected pointers after the relevant
rcu_read_unlock() has completed.
rcutree.sysrq_rcu= [KNL]
Commandeer a sysrq key to dump out Tree RCU's
rcu_node tree with an eye towards determining
......
......@@ -55,6 +55,12 @@ void __rcu_read_unlock(void);
#else /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TINY_RCU
#define rcu_read_unlock_strict() do { } while (0)
#else
void rcu_read_unlock_strict(void);
#endif
static inline void __rcu_read_lock(void)
{
preempt_disable();
......@@ -63,6 +69,7 @@ static inline void __rcu_read_lock(void)
static inline void __rcu_read_unlock(void)
{
preempt_enable();
rcu_read_unlock_strict();
}
static inline int rcu_preempt_depth(void)
......
......@@ -135,10 +135,12 @@ config RCU_FANOUT
config RCU_FANOUT_LEAF
int "Tree-based hierarchical RCU leaf-level fanout value"
range 2 64 if 64BIT
range 2 32 if !64BIT
range 2 64 if 64BIT && !RCU_STRICT_GRACE_PERIOD
range 2 32 if !64BIT && !RCU_STRICT_GRACE_PERIOD
range 2 3 if RCU_STRICT_GRACE_PERIOD
depends on TREE_RCU && RCU_EXPERT
default 16
default 16 if !RCU_STRICT_GRACE_PERIOD
default 2 if RCU_STRICT_GRACE_PERIOD
help
This option controls the leaf-level fanout of hierarchical
implementations of RCU, and allows trading off cache misses
......
......@@ -114,4 +114,19 @@ config RCU_EQS_DEBUG
Say N here if you need ultimate kernel/user switch latencies
Say Y if you are unsure
config RCU_STRICT_GRACE_PERIOD
bool "Provide debug RCU implementation with short grace periods"
depends on DEBUG_KERNEL && RCU_EXPERT
default n
select PREEMPT_COUNT if PREEMPT=n
help
Select this option to build an RCU variant that is strict about
grace periods, making them as short as it can. This limits
scalability, destroys real-time response, degrades battery
lifetime and kills performance. Don't try this on large
machines, as in systems with more than about 10 or 20 CPUs.
But in conjunction with tools like KASAN, it can be helpful
when looking for certain types of RCU usage bugs, for example,
too-short RCU read-side critical sections.
endmenu # "RCU Debugging"
......@@ -165,6 +165,12 @@ module_param(gp_init_delay, int, 0444);
static int gp_cleanup_delay;
module_param(gp_cleanup_delay, int, 0444);
// Add delay to rcu_read_unlock() for strict grace periods.
static int rcu_unlock_delay;
#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
module_param(rcu_unlock_delay, int, 0444);
#endif
/*
* This rcu parameter is runtime-read-only. It reflects
* a minimum allowed number of objects which can be cached
......@@ -455,24 +461,25 @@ static int rcu_is_cpu_rrupt_from_idle(void)
return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
}
#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */
#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */
#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
// Maximum callbacks per rcu_do_batch ...
#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.
static long blimit = DEFAULT_RCU_BLIMIT;
#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.
static long qhimark = DEFAULT_RCU_QHIMARK;
#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */
#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.
static long qlowmark = DEFAULT_RCU_QLOMARK;
#define DEFAULT_RCU_QOVLD_MULT 2
#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */
static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */
static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.
static long qovld_calc = -1; // No pre-initialization lock acquisitions!
module_param(blimit, long, 0444);
module_param(qhimark, long, 0444);
module_param(qlowmark, long, 0444);
module_param(qovld, long, 0444);
static ulong jiffies_till_first_fqs = ULONG_MAX;
static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX;
static bool rcu_kick_kthreads;
static int rcu_divisor = 7;
......@@ -1571,6 +1578,19 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
raw_spin_unlock_rcu_node(rnp);
}
/*
* In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a
* quiescent state. This is intended to be invoked when the CPU notices
* a new grace period.
*/
static void rcu_strict_gp_check_qs(void)
{
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
rcu_read_lock();
rcu_read_unlock();
}
}
/*
* Update CPU-local rcu_data state to record the beginnings and ends of
* grace periods. The caller must hold the ->lock of the leaf rcu_node
......@@ -1641,6 +1661,7 @@ static void note_gp_changes(struct rcu_data *rdp)
}
needwake = __note_gp_changes(rnp, rdp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
rcu_strict_gp_check_qs();
if (needwake)
rcu_gp_kthread_wake();
}
......@@ -1678,6 +1699,15 @@ static void rcu_gp_torture_wait(void)
}
}
/*
* Handler for on_each_cpu() to invoke the target CPU's RCU core
* processing.
*/
static void rcu_strict_gp_boundary(void *unused)
{
invoke_rcu_core();
}
/*
* Initialize a new grace period. Return false if no grace period required.
*/
......@@ -1809,6 +1839,10 @@ static bool rcu_gp_init(void)
WRITE_ONCE(rcu_state.gp_activity, jiffies);
}
// If strict, make all CPUs aware of new grace period.
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
return true;
}
......@@ -2025,6 +2059,10 @@ static void rcu_gp_cleanup(void)
rcu_state.gp_flags & RCU_GP_FLAG_INIT);
}
raw_spin_unlock_irq_rcu_node(rnp);
// If strict, make all CPUs aware of the end of the old grace period.
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
}
/*
......@@ -2203,7 +2241,7 @@ rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
* structure. This must be called from the specified CPU.
*/
static void
rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
rcu_report_qs_rdp(struct rcu_data *rdp)
{
unsigned long flags;
unsigned long mask;
......@@ -2212,6 +2250,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
rcu_segcblist_is_offloaded(&rdp->cblist);
struct rcu_node *rnp;
WARN_ON_ONCE(rdp->cpu != smp_processor_id());
rnp = rdp->mynode;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq ||
......@@ -2228,8 +2267,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
return;
}
mask = rdp->grpmask;
if (rdp->cpu == smp_processor_id())
rdp->core_needs_qs = false;
rdp->core_needs_qs = false;
if ((rnp->qsmask & mask) == 0) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
} else {
......@@ -2278,7 +2316,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
* Tell RCU we are done (but rcu_report_qs_rdp() will be the
* judge of that).
*/
rcu_report_qs_rdp(rdp->cpu, rdp);
rcu_report_qs_rdp(rdp);
}
/*
......@@ -2621,6 +2659,14 @@ void rcu_force_quiescent_state(void)
}
EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
// Workqueue handler for an RCU reader for kernels enforcing struct RCU
// grace periods.
static void strict_work_handler(struct work_struct *work)
{
rcu_read_lock();
rcu_read_unlock();
}
/* Perform RCU core processing work for the current CPU. */
static __latent_entropy void rcu_core(void)
{
......@@ -2665,6 +2711,10 @@ static __latent_entropy void rcu_core(void)
/* Do any needed deferred wakeups of rcuo kthreads. */
do_nocb_deferred_wakeup(rdp);
trace_rcu_utilization(TPS("End RCU core"));
// If strict GPs, schedule an RCU reader in a clean environment.
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);
}
static void rcu_core_si(struct softirq_action *h)
......@@ -3862,6 +3912,7 @@ rcu_boot_init_percpu_data(int cpu)
/* Set up local state, ensuring consistent view of global state. */
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
INIT_WORK(&rdp->strict_work, strict_work_handler);
WARN_ON_ONCE(rdp->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
......
......@@ -165,6 +165,7 @@ struct rcu_data {
/* period it is aware of. */
struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
bool defer_qs_iw_pending; /* Scheduler attention pending? */
struct work_struct strict_work; /* Schedule readers for strict GPs. */
/* 2) batch handling */
struct rcu_segcblist cblist; /* Segmented callback list, with */
......
......@@ -36,6 +36,8 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
if (IS_ENABLED(CONFIG_PROVE_RCU))
pr_info("\tRCU lockdep checking is enabled.\n");
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
pr_info("\tRCU strict (and thus non-scalable) grace periods enabled.\n");
if (RCU_NUM_LVLS >= 4)
pr_info("\tFour(or more)-level hierarchy is enabled.\n");
if (RCU_FANOUT_LEAF != 16)
......@@ -374,6 +376,8 @@ void __rcu_read_lock(void)
rcu_preempt_read_enter();
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
......@@ -455,8 +459,14 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
return;
}
t->rcu_read_unlock_special.s = 0;
if (special.b.need_qs)
rcu_qs();
if (special.b.need_qs) {
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
rcu_report_qs_rdp(rdp);
udelay(rcu_unlock_delay);
} else {
rcu_qs();
}
}
/*
* Respond to a request by an expedited grace period for a
......@@ -768,6 +778,24 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
#else /* #ifdef CONFIG_PREEMPT_RCU */
/*
* If strict grace periods are enabled, and if the calling
* __rcu_read_unlock() marks the beginning of a quiescent state, immediately
* report that quiescent state and, if requested, spin for a bit.
*/
void rcu_read_unlock_strict(void)
{
struct rcu_data *rdp;
if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
return;
rdp = this_cpu_ptr(&rcu_data);
rcu_report_qs_rdp(rdp);
udelay(rcu_unlock_delay);
}
EXPORT_SYMBOL_GPL(rcu_read_unlock_strict);
/*
* Tell them what RCU they are running.
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment