Commit 063f5a4d authored by Paul E. McKenney's avatar Paul E. McKenney

rcutorture: Don't count CPU-stalled time against priority boosting

It will frequently be the case that rcu_torture_boost() will get a
->start_gp_poll() cookie that needs almost all of the current grace period
plus an additional grace period to elapse before ->poll_gp_state() will
return true.  It is quite possible that the current grace period will have
(say) two seconds of stall by a CPU failing to pass through a quiescent
state, followed by 300 milliseconds of delay due to a preempted reader.
The next grace period might suffer only one second of stall by a CPU,
followed by another 300 milliseconds of delay due to a preempted reader.
This is an example of RCU priority boosting doing its job, but the full
elapsed time of 3.6 seconds exceeds the 3.5-second limit.  In addition,
there is no CPU stall in force at the 3.5-second mark, so this would
nevertheless currently be counted as an RCU priority boosting failure.

This commit therefore avoids this sort of false positive by resetting
the gp_state_time timestamp any time that the current grace period is
being blocked by a CPU.  This results in extremely frequent calls to
the ->check_boost_failed() function, so this commit provides a lockless
fastpath that is selected by supplying a NULL CPU-number pointer.
Signed-off-by: default avatarPaul E. McKenney <paulmck@kernel.org>
parent 0260b92e
...@@ -918,17 +918,18 @@ static void rcu_torture_enable_rt_throttle(void) ...@@ -918,17 +918,18 @@ static void rcu_torture_enable_rt_throttle(void)
old_rt_runtime = -1; old_rt_runtime = -1;
} }
static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long start, unsigned long end) static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long *start)
{ {
int cpu; int cpu;
static int dbg_done; static int dbg_done;
unsigned long end = jiffies;
bool gp_done; bool gp_done;
unsigned long j; unsigned long j;
static unsigned long last_persist; static unsigned long last_persist;
unsigned long lp; unsigned long lp;
unsigned long mininterval = test_boost_duration * HZ - HZ / 2; unsigned long mininterval = test_boost_duration * HZ - HZ / 2;
if (end - start > mininterval) { if (end - *start > mininterval) {
// Recheck after checking time to avoid false positives. // Recheck after checking time to avoid false positives.
smp_mb(); // Time check before grace-period check. smp_mb(); // Time check before grace-period check.
if (cur_ops->poll_gp_state(gp_state)) if (cur_ops->poll_gp_state(gp_state))
...@@ -945,7 +946,7 @@ static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long start ...@@ -945,7 +946,7 @@ static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long start
n_rcu_torture_boost_failure++; n_rcu_torture_boost_failure++;
if (!xchg(&dbg_done, 1) && cur_ops->gp_kthread_dbg) { if (!xchg(&dbg_done, 1) && cur_ops->gp_kthread_dbg) {
pr_info("Boost inversion thread ->rt_priority %u gp_state %lu jiffies %lu\n", pr_info("Boost inversion thread ->rt_priority %u gp_state %lu jiffies %lu\n",
current->rt_priority, gp_state, end - start); current->rt_priority, gp_state, end - *start);
cur_ops->gp_kthread_dbg(); cur_ops->gp_kthread_dbg();
// Recheck after print to flag grace period ending during splat. // Recheck after print to flag grace period ending during splat.
gp_done = cur_ops->poll_gp_state(gp_state); gp_done = cur_ops->poll_gp_state(gp_state);
...@@ -955,6 +956,8 @@ static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long start ...@@ -955,6 +956,8 @@ static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long start
} }
return true; // failed return true; // failed
} else if (cur_ops->check_boost_failed && !cur_ops->check_boost_failed(gp_state, NULL)) {
*start = jiffies;
} }
return false; // passed return false; // passed
...@@ -995,7 +998,7 @@ static int rcu_torture_boost(void *arg) ...@@ -995,7 +998,7 @@ static int rcu_torture_boost(void *arg)
while (time_before(jiffies, endtime)) { while (time_before(jiffies, endtime)) {
// Has current GP gone too long? // Has current GP gone too long?
if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state)) if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state))
failed = rcu_torture_boost_failed(gp_state, gp_state_time, jiffies); failed = rcu_torture_boost_failed(gp_state, &gp_state_time);
// If we don't have a grace period in flight, start one. // If we don't have a grace period in flight, start one.
if (!gp_initiated || cur_ops->poll_gp_state(gp_state)) { if (!gp_initiated || cur_ops->poll_gp_state(gp_state)) {
gp_state = cur_ops->start_gp_poll(); gp_state = cur_ops->start_gp_poll();
...@@ -1016,7 +1019,7 @@ static int rcu_torture_boost(void *arg) ...@@ -1016,7 +1019,7 @@ static int rcu_torture_boost(void *arg)
// In case the grace period extended beyond the end of the loop. // In case the grace period extended beyond the end of the loop.
if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state)) if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state))
rcu_torture_boost_failed(gp_state, gp_state_time, jiffies); rcu_torture_boost_failed(gp_state, &gp_state_time);
/* /*
* Set the start time of the next test interval. * Set the start time of the next test interval.
......
...@@ -723,6 +723,10 @@ static void check_cpu_stall(struct rcu_data *rdp) ...@@ -723,6 +723,10 @@ static void check_cpu_stall(struct rcu_data *rdp)
* count this as an RCU priority boosting failure. A return of true says * count this as an RCU priority boosting failure. A return of true says
* RCU priority boosting is to blame, and false says otherwise. If false * RCU priority boosting is to blame, and false says otherwise. If false
* is returned, the first of the CPUs to blame is stored through cpup. * is returned, the first of the CPUs to blame is stored through cpup.
*
* If cpup is NULL, then a lockless quick check is carried out, suitable
* for high-rate usage. On the other hand, if cpup is non-NULL, each
* rcu_node structure's ->lock is acquired, ruling out high-rate usage.
*/ */
bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) bool rcu_check_boost_fail(unsigned long gp_state, int *cpup)
{ {
...@@ -731,6 +735,12 @@ bool rcu_check_boost_fail(unsigned long gp_state, int *cpup) ...@@ -731,6 +735,12 @@ bool rcu_check_boost_fail(unsigned long gp_state, int *cpup)
struct rcu_node *rnp; struct rcu_node *rnp;
rcu_for_each_leaf_node(rnp) { rcu_for_each_leaf_node(rnp) {
if (!cpup) {
if (READ_ONCE(rnp->qsmask))
return false;
else
continue;
}
raw_spin_lock_irqsave_rcu_node(rnp, flags); raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!rnp->qsmask) { if (!rnp->qsmask) {
// No CPUs without quiescent states for this rnp. // No CPUs without quiescent states for this rnp.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment