Commit 881ed593 authored by Paul E. McKenney's avatar Paul E. McKenney

rcuperf: Add ability to performance-test call_rcu() and friends

This commit upgrades rcuperf so that it can do performance testing on
asynchronous grace-period primitives such as call_srcu().  There is
a new rcuperf.gp_async module parameter that specifies this new behavior,
with the pre-existing rcuperf.gp_exp testing expedited grace periods such as
synchronize_rcu_expedited, and with the default being to test synchronous
non-expedited grace periods such as synchronize_rcu().

There is also a new rcuperf.gp_async_max module parameter that specifies
the maximum number of outstanding callbacks per writer kthread, defaulting
to 1,000.  When this limit is exceeded, the writer thread invokes the
appropriate flavor of rcu_barrier() to wait for callbacks to drain.
Signed-off-by: default avatarPaul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Removed the redundant initialization noted by Arnd Bergmann. ]
parent e28371c8
...@@ -3319,6 +3319,17 @@ ...@@ -3319,6 +3319,17 @@
This wake_up() will be accompanied by a This wake_up() will be accompanied by a
WARN_ONCE() splat and an ftrace_dump(). WARN_ONCE() splat and an ftrace_dump().
rcuperf.gp_async= [KNL]
Measure performance of asynchronous
grace-period primitives such as call_rcu().
rcuperf.gp_async_max= [KNL]
Specify the maximum number of outstanding
callbacks per writer thread. When a writer
thread exceeds this limit, it invokes the
corresponding flavor of rcu_barrier() to allow
previously posted callbacks to drain.
rcuperf.gp_exp= [KNL] rcuperf.gp_exp= [KNL]
Measure performance of expedited synchronous Measure performance of expedited synchronous
grace-period primitives. grace-period primitives.
......
...@@ -59,6 +59,8 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>"); ...@@ -59,6 +59,8 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
#define VERBOSE_PERFOUT_ERRSTRING(s) \ #define VERBOSE_PERFOUT_ERRSTRING(s) \
do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0) do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
torture_param(int, nreaders, -1, "Number of RCU reader threads"); torture_param(int, nreaders, -1, "Number of RCU reader threads");
...@@ -86,13 +88,16 @@ static u64 t_rcu_perf_writer_started; ...@@ -86,13 +88,16 @@ static u64 t_rcu_perf_writer_started;
static u64 t_rcu_perf_writer_finished; static u64 t_rcu_perf_writer_finished;
static unsigned long b_rcu_perf_writer_started; static unsigned long b_rcu_perf_writer_started;
static unsigned long b_rcu_perf_writer_finished; static unsigned long b_rcu_perf_writer_finished;
static DEFINE_PER_CPU(atomic_t, n_async_inflight);
static int rcu_perf_writer_state; static int rcu_perf_writer_state;
#define RTWS_INIT 0 #define RTWS_INIT 0
#define RTWS_EXP_SYNC 1 #define RTWS_ASYNC 1
#define RTWS_SYNC 2 #define RTWS_BARRIER 2
#define RTWS_IDLE 2 #define RTWS_EXP_SYNC 3
#define RTWS_STOPPING 3 #define RTWS_SYNC 4
#define RTWS_IDLE 5
#define RTWS_STOPPING 6
#define MAX_MEAS 10000 #define MAX_MEAS 10000
#define MIN_MEAS 100 #define MIN_MEAS 100
...@@ -114,6 +119,8 @@ struct rcu_perf_ops { ...@@ -114,6 +119,8 @@ struct rcu_perf_ops {
unsigned long (*started)(void); unsigned long (*started)(void);
unsigned long (*completed)(void); unsigned long (*completed)(void);
unsigned long (*exp_completed)(void); unsigned long (*exp_completed)(void);
void (*async)(struct rcu_head *head, rcu_callback_t func);
void (*gp_barrier)(void);
void (*sync)(void); void (*sync)(void);
void (*exp_sync)(void); void (*exp_sync)(void);
const char *name; const char *name;
...@@ -153,6 +160,8 @@ static struct rcu_perf_ops rcu_ops = { ...@@ -153,6 +160,8 @@ static struct rcu_perf_ops rcu_ops = {
.started = rcu_batches_started, .started = rcu_batches_started,
.completed = rcu_batches_completed, .completed = rcu_batches_completed,
.exp_completed = rcu_exp_batches_completed, .exp_completed = rcu_exp_batches_completed,
.async = call_rcu,
.gp_barrier = rcu_barrier,
.sync = synchronize_rcu, .sync = synchronize_rcu,
.exp_sync = synchronize_rcu_expedited, .exp_sync = synchronize_rcu_expedited,
.name = "rcu" .name = "rcu"
...@@ -181,6 +190,8 @@ static struct rcu_perf_ops rcu_bh_ops = { ...@@ -181,6 +190,8 @@ static struct rcu_perf_ops rcu_bh_ops = {
.started = rcu_batches_started_bh, .started = rcu_batches_started_bh,
.completed = rcu_batches_completed_bh, .completed = rcu_batches_completed_bh,
.exp_completed = rcu_exp_batches_completed_sched, .exp_completed = rcu_exp_batches_completed_sched,
.async = call_rcu_bh,
.gp_barrier = rcu_barrier_bh,
.sync = synchronize_rcu_bh, .sync = synchronize_rcu_bh,
.exp_sync = synchronize_rcu_bh_expedited, .exp_sync = synchronize_rcu_bh_expedited,
.name = "rcu_bh" .name = "rcu_bh"
...@@ -208,6 +219,16 @@ static unsigned long srcu_perf_completed(void) ...@@ -208,6 +219,16 @@ static unsigned long srcu_perf_completed(void)
return srcu_batches_completed(srcu_ctlp); return srcu_batches_completed(srcu_ctlp);
} }
static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func)
{
call_srcu(srcu_ctlp, head, func);
}
static void srcu_rcu_barrier(void)
{
srcu_barrier(srcu_ctlp);
}
static void srcu_perf_synchronize(void) static void srcu_perf_synchronize(void)
{ {
synchronize_srcu(srcu_ctlp); synchronize_srcu(srcu_ctlp);
...@@ -226,6 +247,8 @@ static struct rcu_perf_ops srcu_ops = { ...@@ -226,6 +247,8 @@ static struct rcu_perf_ops srcu_ops = {
.started = NULL, .started = NULL,
.completed = srcu_perf_completed, .completed = srcu_perf_completed,
.exp_completed = srcu_perf_completed, .exp_completed = srcu_perf_completed,
.async = srcu_call_rcu,
.gp_barrier = srcu_rcu_barrier,
.sync = srcu_perf_synchronize, .sync = srcu_perf_synchronize,
.exp_sync = srcu_perf_synchronize_expedited, .exp_sync = srcu_perf_synchronize_expedited,
.name = "srcu" .name = "srcu"
...@@ -254,6 +277,8 @@ static struct rcu_perf_ops sched_ops = { ...@@ -254,6 +277,8 @@ static struct rcu_perf_ops sched_ops = {
.started = rcu_batches_started_sched, .started = rcu_batches_started_sched,
.completed = rcu_batches_completed_sched, .completed = rcu_batches_completed_sched,
.exp_completed = rcu_exp_batches_completed_sched, .exp_completed = rcu_exp_batches_completed_sched,
.async = call_rcu_sched,
.gp_barrier = rcu_barrier_sched,
.sync = synchronize_sched, .sync = synchronize_sched,
.exp_sync = synchronize_sched_expedited, .exp_sync = synchronize_sched_expedited,
.name = "sched" .name = "sched"
...@@ -281,6 +306,8 @@ static struct rcu_perf_ops tasks_ops = { ...@@ -281,6 +306,8 @@ static struct rcu_perf_ops tasks_ops = {
.readunlock = tasks_perf_read_unlock, .readunlock = tasks_perf_read_unlock,
.started = rcu_no_completed, .started = rcu_no_completed,
.completed = rcu_no_completed, .completed = rcu_no_completed,
.async = call_rcu_tasks,
.gp_barrier = rcu_barrier_tasks,
.sync = synchronize_rcu_tasks, .sync = synchronize_rcu_tasks,
.exp_sync = synchronize_rcu_tasks, .exp_sync = synchronize_rcu_tasks,
.name = "tasks" .name = "tasks"
...@@ -343,6 +370,15 @@ rcu_perf_reader(void *arg) ...@@ -343,6 +370,15 @@ rcu_perf_reader(void *arg)
return 0; return 0;
} }
/*
* Callback function for asynchronous grace periods from rcu_perf_writer().
*/
static void rcu_perf_async_cb(struct rcu_head *rhp)
{
atomic_dec(this_cpu_ptr(&n_async_inflight));
kfree(rhp);
}
/* /*
* RCU perf writer kthread. Repeatedly does a grace period. * RCU perf writer kthread. Repeatedly does a grace period.
*/ */
...@@ -352,6 +388,7 @@ rcu_perf_writer(void *arg) ...@@ -352,6 +388,7 @@ rcu_perf_writer(void *arg)
int i = 0; int i = 0;
int i_max; int i_max;
long me = (long)arg; long me = (long)arg;
struct rcu_head *rhp = NULL;
struct sched_param sp; struct sched_param sp;
bool started = false, done = false, alldone = false; bool started = false, done = false, alldone = false;
u64 t; u64 t;
...@@ -382,7 +419,23 @@ rcu_perf_writer(void *arg) ...@@ -382,7 +419,23 @@ rcu_perf_writer(void *arg)
do { do {
wdp = &wdpp[i]; wdp = &wdpp[i];
*wdp = ktime_get_mono_fast_ns(); *wdp = ktime_get_mono_fast_ns();
if (gp_exp) { if (gp_async) {
retry:
if (!rhp)
rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
rcu_perf_writer_state = RTWS_ASYNC;
atomic_inc(this_cpu_ptr(&n_async_inflight));
cur_ops->async(rhp, rcu_perf_async_cb);
rhp = NULL;
} else if (!kthread_should_stop()) {
rcu_perf_writer_state = RTWS_BARRIER;
cur_ops->gp_barrier();
goto retry;
} else {
kfree(rhp); /* Because we are stopping. */
}
} else if (gp_exp) {
rcu_perf_writer_state = RTWS_EXP_SYNC; rcu_perf_writer_state = RTWS_EXP_SYNC;
cur_ops->exp_sync(); cur_ops->exp_sync();
} else { } else {
...@@ -429,6 +482,10 @@ rcu_perf_writer(void *arg) ...@@ -429,6 +482,10 @@ rcu_perf_writer(void *arg)
i++; i++;
rcu_perf_wait_shutdown(); rcu_perf_wait_shutdown();
} while (!torture_must_stop()); } while (!torture_must_stop());
if (gp_async) {
rcu_perf_writer_state = RTWS_BARRIER;
cur_ops->gp_barrier();
}
rcu_perf_writer_state = RTWS_STOPPING; rcu_perf_writer_state = RTWS_STOPPING;
writer_n_durations[me] = i_max; writer_n_durations[me] = i_max;
torture_kthread_stopping("rcu_perf_writer"); torture_kthread_stopping("rcu_perf_writer");
...@@ -460,6 +517,8 @@ rcu_perf_cleanup(void) ...@@ -460,6 +517,8 @@ rcu_perf_cleanup(void)
VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
if (rcu_gp_is_normal() && gp_exp) if (rcu_gp_is_normal() && gp_exp)
VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
if (gp_exp && gp_async)
VERBOSE_PERFOUT_ERRSTRING("No expedited async GPs, so went with async!");
if (torture_cleanup_begin()) if (torture_cleanup_begin())
return; return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment