Commit 46f81532 authored by Davidlohr Bueso's avatar Davidlohr Bueso Committed by Arnaldo Carvalho de Melo

perf bench futex, requeue: Add --pi parameter

This extends the program to measure WAIT_REQUEUE_PI+CMP_REQUEUE_PI
pairs, which are the underlying machinery behind priority-inheritance
aware condition variables. The defaults are the same as with the regular
non-pi version, requeueing one task at a time, with the exception that
PI will always wakeup the first waiter.
Signed-off-by: default avatarDavidlohr Bueso <dbueso@suse.de>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lore.kernel.org/lkml/20210809043301.66002-8-dave@stgolabs.netSigned-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 6f9661b2
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
* on futex2, N at a time. * on futex2, N at a time.
* *
* This program is particularly useful to measure the latency of nthread * This program is particularly useful to measure the latency of nthread
* requeues without waking up any tasks -- thus mimicking a regular futex_wait. * requeues without waking up any tasks (in the non-pi case) -- thus
* mimicking a regular futex_wait.
*/ */
/* For the CLR_() macros */ /* For the CLR_() macros */
...@@ -54,6 +55,8 @@ static const struct option options[] = { ...@@ -54,6 +55,8 @@ static const struct option options[] = {
OPT_BOOLEAN( 'S', "shared", &params.fshared, "Use shared futexes instead of private ones"), OPT_BOOLEAN( 'S', "shared", &params.fshared, "Use shared futexes instead of private ones"),
OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"), OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
OPT_BOOLEAN( 'B', "broadcast", &params.broadcast, "Requeue all threads at once"), OPT_BOOLEAN( 'B', "broadcast", &params.broadcast, "Requeue all threads at once"),
OPT_BOOLEAN( 'p', "pi", &params.pi, "Use PI-aware variants of FUTEX_CMP_REQUEUE"),
OPT_END() OPT_END()
}; };
...@@ -87,15 +90,31 @@ static void *workerfn(void *arg __maybe_unused) ...@@ -87,15 +90,31 @@ static void *workerfn(void *arg __maybe_unused)
pthread_mutex_unlock(&thread_lock); pthread_mutex_unlock(&thread_lock);
while (1) { while (1) {
if (!params.pi) {
ret = futex_wait(&futex1, 0, NULL, futex_flag); ret = futex_wait(&futex1, 0, NULL, futex_flag);
if (!ret) if (!ret)
break; break;
if (ret && errno != EAGAIN) { if (ret && errno != EAGAIN) {
if (!params.silent) if (!params.silent)
warn("futex_wait"); warnx("futex_wait");
break; break;
} }
} else {
ret = futex_wait_requeue_pi(&futex1, 0, &futex2,
NULL, futex_flag);
if (!ret) {
/* got the lock at futex2 */
futex_unlock_pi(&futex2, futex_flag);
break;
}
if (ret && errno != EAGAIN) {
if (!params.silent)
warnx("futex_wait_requeue_pi");
break;
}
}
} }
return NULL; return NULL;
...@@ -171,9 +190,10 @@ int bench_futex_requeue(int argc, const char **argv) ...@@ -171,9 +190,10 @@ int bench_futex_requeue(int argc, const char **argv)
if (params.broadcast) if (params.broadcast)
params.nrequeue = params.nthreads; params.nrequeue = params.nthreads;
printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), " printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
"%d at a time.\n\n", getpid(), params.nthreads, "%d at a time.\n\n", getpid(), params.nthreads,
params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue); params.fshared ? "shared":"private", &futex1,
params.pi ? "PI ": "", &futex2, params.nrequeue);
init_stats(&requeued_stats); init_stats(&requeued_stats);
init_stats(&requeuetime_stats); init_stats(&requeuetime_stats);
...@@ -183,7 +203,7 @@ int bench_futex_requeue(int argc, const char **argv) ...@@ -183,7 +203,7 @@ int bench_futex_requeue(int argc, const char **argv)
pthread_cond_init(&thread_worker, NULL); pthread_cond_init(&thread_worker, NULL);
for (j = 0; j < bench_repeat && !done; j++) { for (j = 0; j < bench_repeat && !done; j++) {
unsigned int nrequeued = 0; unsigned int nrequeued = 0, wakeups = 0;
struct timeval start, end, runtime; struct timeval start, end, runtime;
/* create, launch & block all threads */ /* create, launch & block all threads */
...@@ -201,13 +221,30 @@ int bench_futex_requeue(int argc, const char **argv) ...@@ -201,13 +221,30 @@ int bench_futex_requeue(int argc, const char **argv)
/* Ok, all threads are patiently blocked, start requeueing */ /* Ok, all threads are patiently blocked, start requeueing */
gettimeofday(&start, NULL); gettimeofday(&start, NULL);
while (nrequeued < params.nthreads) { while (nrequeued < params.nthreads) {
int r;
/* /*
* Do not wakeup any tasks blocked on futex1, allowing * For the regular non-pi case, do not wakeup any tasks
* us to really measure futex_wait functionality. * blocked on futex1, allowing us to really measure
* futex_wait functionality. For the PI case the first
* waiter is always awoken.
*/ */
nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0, if (!params.pi) {
r = futex_cmp_requeue(&futex1, 0, &futex2, 0,
params.nrequeue, params.nrequeue,
futex_flag); futex_flag);
} else {
r = futex_cmp_requeue_pi(&futex1, 0, &futex2,
params.nrequeue,
futex_flag);
wakeups++; /* assume no error */
}
if (r < 0)
err(EXIT_FAILURE, "couldn't requeue from %p to %p",
&futex1, &futex2);
nrequeued += r;
} }
gettimeofday(&end, NULL); gettimeofday(&end, NULL);
...@@ -217,16 +254,29 @@ int bench_futex_requeue(int argc, const char **argv) ...@@ -217,16 +254,29 @@ int bench_futex_requeue(int argc, const char **argv)
update_stats(&requeuetime_stats, runtime.tv_usec); update_stats(&requeuetime_stats, runtime.tv_usec);
if (!params.silent) { if (!params.silent) {
printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n", if (!params.pi)
j + 1, nrequeued, params.nthreads, printf("[Run %d]: Requeued %d of %d threads in "
"%.4f ms\n", j + 1, nrequeued,
params.nthreads,
runtime.tv_usec / (double)USEC_PER_MSEC);
else {
nrequeued -= wakeups;
printf("[Run %d]: Awoke and Requeued (%d+%d) of "
"%d threads in %.4f ms\n",
j + 1, wakeups, nrequeued,
params.nthreads,
runtime.tv_usec / (double)USEC_PER_MSEC); runtime.tv_usec / (double)USEC_PER_MSEC);
} }
}
if (!params.pi) {
/* everybody should be blocked on futex2, wake'em up */ /* everybody should be blocked on futex2, wake'em up */
nrequeued = futex_wake(&futex2, nrequeued, futex_flag); nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
if (params.nthreads != nrequeued) if (params.nthreads != nrequeued)
warnx("couldn't wakeup all tasks (%d/%d)", warnx("couldn't wakeup all tasks (%d/%d)",
nrequeued, params.nthreads); nrequeued, params.nthreads);
}
for (i = 0; i < params.nthreads; i++) { for (i = 0; i < params.nthreads; i++) {
ret = pthread_join(worker[i], NULL); ret = pthread_join(worker[i], NULL);
......
...@@ -18,6 +18,7 @@ struct bench_futex_parameters { ...@@ -18,6 +18,7 @@ struct bench_futex_parameters {
bool fshared; bool fshared;
bool mlockall; bool mlockall;
bool multi; /* lock-pi */ bool multi; /* lock-pi */
bool pi; /* requeue-pi */
bool broadcast; /* requeue */ bool broadcast; /* requeue */
unsigned int runtime; /* seconds*/ unsigned int runtime; /* seconds*/
unsigned int nthreads; unsigned int nthreads;
...@@ -99,4 +100,38 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak ...@@ -99,4 +100,38 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak
return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
val, opflags); val, opflags);
} }
/**
* futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
* @uaddr: non-PI futex source
* @uaddr2: PI futex target
*
* This is the first half of the requeue_pi mechanism. It shall always be
* paired with futex_cmp_requeue_pi().
*/
static inline int
futex_wait_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
struct timespec *timeout, int opflags)
{
return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
opflags);
}
/**
* futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2
* @uaddr: non-PI futex source
* @uaddr2: PI futex target
* @nr_requeue: requeue up to this many tasks
*
* This is the second half of the requeue_pi mechanism. It shall always be
* paired with futex_wait_requeue_pi(). The first waker is always awoken.
*/
static inline int
futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
int nr_requeue, int opflags)
{
return futex(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2,
val, opflags);
}
#endif /* _FUTEX_H */ #endif /* _FUTEX_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment