Commit 66bf2e8e authored by Mark Drayton's avatar Mark Drayton

offcputime: one symbol cache per process, improve pid/tid handling

parent 96483d48
...@@ -40,6 +40,9 @@ Print output in folded stack format. ...@@ -40,6 +40,9 @@ Print output in folded stack format.
\-p PID \-p PID
Trace this process ID only (filtered in-kernel). Trace this process ID only (filtered in-kernel).
.TP .TP
\-t TID
Trace this thread ID only (filtered in-kernel).
.TP
\-u \-u
Only trace user threads (no kernel threads). Only trace user threads (no kernel threads).
.TP .TP
...@@ -52,6 +55,15 @@ Show stacks from user space only (no kernel space stacks). ...@@ -52,6 +55,15 @@ Show stacks from user space only (no kernel space stacks).
\-K \-K
Show stacks from kernel space only (no user space stacks). Show stacks from kernel space only (no user space stacks).
.TP .TP
\-d
Insert delimiter between kernel/user stacks.
.TP
\-f
Output folded format.
.TP
\-\-stack-storage-size STACK_STORAGE_SIZE
Change the number of unique stack traces that can be stored and displayed.
.TP
duration duration
Duration to trace, in seconds. Duration to trace, in seconds.
.SH EXAMPLES .SH EXAMPLES
......
...@@ -41,6 +41,7 @@ examples = """examples: ...@@ -41,6 +41,7 @@ examples = """examples:
./offcputime 5 # trace for 5 seconds only ./offcputime 5 # trace for 5 seconds only
./offcputime -f 5 # 5 seconds, and output in folded format ./offcputime -f 5 # 5 seconds, and output in folded format
./offcputime -p 185 # only trace threads for PID 185 ./offcputime -p 185 # only trace threads for PID 185
./offcputime -t 188 # only trace thread 188
./offcputime -u # only trace user threads (no kernel) ./offcputime -u # only trace user threads (no kernel)
./offcputime -k # only trace kernel threads (no user) ./offcputime -k # only trace kernel threads (no user)
./offcputime -U # only show user space stacks (no kernel) ./offcputime -U # only show user space stacks (no kernel)
...@@ -51,8 +52,12 @@ parser = argparse.ArgumentParser( ...@@ -51,8 +52,12 @@ parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples) epilog=examples)
thread_group = parser.add_mutually_exclusive_group() thread_group = parser.add_mutually_exclusive_group()
thread_group.add_argument("-p", "--pid", type=positive_int, # Note: this script provides --pid and --tid flags but their arguments are
help="trace this PID only") # referred to internally using kernel nomenclature: TGID and PID.
thread_group.add_argument("-p", "--pid", metavar="PID", dest="tgid",
help="trace this PID only", type=positive_int)
thread_group.add_argument("-t", "--tid", metavar="TID", dest="pid",
help="trace this TID only", type=positive_int)
thread_group.add_argument("-u", "--user-threads-only", action="store_true", thread_group.add_argument("-u", "--user-threads-only", action="store_true",
help="user threads only (no kernel threads)") help="user threads only (no kernel threads)")
thread_group.add_argument("-k", "--kernel-threads-only", action="store_true", thread_group.add_argument("-k", "--kernel-threads-only", action="store_true",
...@@ -68,12 +73,14 @@ parser.add_argument("-f", "--folded", action="store_true", ...@@ -68,12 +73,14 @@ parser.add_argument("-f", "--folded", action="store_true",
help="output folded format") help="output folded format")
parser.add_argument("--stack-storage-size", default=1024, parser.add_argument("--stack-storage-size", default=1024,
type=positive_nonzero_int, type=positive_nonzero_int,
help="the number of unique stack traces that can be stored and " \ help="the number of unique stack traces that can be stored and "
"displayed (default 1024)") "displayed (default 1024)")
parser.add_argument("duration", nargs="?", default=99999999, parser.add_argument("duration", nargs="?", default=99999999,
type=positive_nonzero_int, type=positive_nonzero_int,
help="duration of trace, in seconds") help="duration of trace, in seconds")
args = parser.parse_args() args = parser.parse_args()
if args.pid and args.tgid:
parser.error("specify only one of -p and -t")
folded = args.folded folded = args.folded
duration = int(args.duration) duration = int(args.duration)
...@@ -90,6 +97,7 @@ bpf_text = """ ...@@ -90,6 +97,7 @@ bpf_text = """
struct key_t { struct key_t {
u32 pid; u32 pid;
u32 tgid;
int user_stack_id; int user_stack_id;
int kernel_stack_id; int kernel_stack_id;
char name[TASK_COMM_LEN]; char name[TASK_COMM_LEN];
...@@ -100,6 +108,7 @@ BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE) ...@@ -100,6 +108,7 @@ BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE)
int oncpu(struct pt_regs *ctx, struct task_struct *prev) { int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
u32 pid = prev->pid; u32 pid = prev->pid;
u32 tgid = prev->tgid;
u64 ts, *tsp; u64 ts, *tsp;
// record previous thread sleep time // record previous thread sleep time
...@@ -110,6 +119,7 @@ int oncpu(struct pt_regs *ctx, struct task_struct *prev) { ...@@ -110,6 +119,7 @@ int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
// get the current thread's start time // get the current thread's start time
pid = bpf_get_current_pid_tgid(); pid = bpf_get_current_pid_tgid();
tgid = bpf_get_current_pid_tgid() >> 32;
tsp = start.lookup(&pid); tsp = start.lookup(&pid);
if (tsp == 0) { if (tsp == 0) {
return 0; // missed start or filtered return 0; // missed start or filtered
...@@ -128,6 +138,7 @@ int oncpu(struct pt_regs *ctx, struct task_struct *prev) { ...@@ -128,6 +138,7 @@ int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
struct key_t key = {}; struct key_t key = {};
key.pid = pid; key.pid = pid;
key.tgid = tgid;
key.user_stack_id = USER_STACK_GET; key.user_stack_id = USER_STACK_GET;
key.kernel_stack_id = KERNEL_STACK_GET; key.kernel_stack_id = KERNEL_STACK_GET;
bpf_get_current_comm(&key.name, sizeof(key.name)); bpf_get_current_comm(&key.name, sizeof(key.name));
...@@ -140,9 +151,12 @@ int oncpu(struct pt_regs *ctx, struct task_struct *prev) { ...@@ -140,9 +151,12 @@ int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
# set thread filter # set thread filter
thread_context = "" thread_context = ""
if args.pid is not None: if args.tgid is not None:
thread_context = "PID %s" % args.pid thread_context = "PID %d" % args.tgid
thread_filter = 'pid == %s' % args.pid thread_filter = 'tgid == %d' % args.tgid
elif args.pid is not None:
thread_context = "TID %d" % args.pid
thread_filter = 'pid == %d' % args.pid
elif args.user_threads_only: elif args.user_threads_only:
thread_context = "user threads" thread_context = "user threads"
thread_filter = '!(prev->flags & PF_KTHREAD)' thread_filter = '!(prev->flags & PF_KTHREAD)'
...@@ -224,6 +238,8 @@ for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): ...@@ -224,6 +238,8 @@ for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
has_enomem = True has_enomem = True
continue continue
# user stacks will be symbolized by tgid, not pid, to avoid the overhead
# of one symbol resolver per thread
user_stack = [] if k.user_stack_id < 0 else \ user_stack = [] if k.user_stack_id < 0 else \
stack_traces.walk(k.user_stack_id) stack_traces.walk(k.user_stack_id)
kernel_stack = [] if k.kernel_stack_id < 0 else \ kernel_stack = [] if k.kernel_stack_id < 0 else \
...@@ -234,7 +250,7 @@ for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): ...@@ -234,7 +250,7 @@ for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
user_stack = list(user_stack) user_stack = list(user_stack)
kernel_stack = list(kernel_stack) kernel_stack = list(kernel_stack)
line = [k.name.decode()] + \ line = [k.name.decode()] + \
[b.sym(addr, k.pid) for addr in reversed(user_stack)] + \ [b.sym(addr, k.tgid) for addr in reversed(user_stack)] + \
(need_delimiter and ["-"] or []) + \ (need_delimiter and ["-"] or []) + \
[b.ksym(addr) for addr in reversed(kernel_stack)] [b.ksym(addr) for addr in reversed(kernel_stack)]
print("%s %d" % (";".join(line), v.value)) print("%s %d" % (";".join(line), v.value))
...@@ -245,7 +261,7 @@ for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): ...@@ -245,7 +261,7 @@ for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
if need_delimiter: if need_delimiter:
print(" --") print(" --")
for addr in user_stack: for addr in user_stack:
print(" %016x %s" % (addr, b.sym(addr, k.pid))) print(" %016x %s" % (addr, b.sym(addr, k.tgid)))
print(" %-16s %s (%d)" % ("-", k.name, k.pid)) print(" %-16s %s (%d)" % ("-", k.name, k.pid))
print(" %d\n" % v.value) print(" %d\n" % v.value)
......
...@@ -719,7 +719,7 @@ creating your "off-CPU time flame graphs". ...@@ -719,7 +719,7 @@ creating your "off-CPU time flame graphs".
USAGE message: USAGE message:
# ./offcputime -h # ./offcputime -h
usage: offcputime.py [-h] [-p PID | -k | -u] [-K | -U] [-f] usage: offcputime.py [-h] [-p PID | -t TID | -u | -k] [-U | -K] [-d] [-f]
[--stack-storage-size STACK_STORAGE_SIZE] [--stack-storage-size STACK_STORAGE_SIZE]
[duration] [duration]
...@@ -731,6 +731,7 @@ positional arguments: ...@@ -731,6 +731,7 @@ positional arguments:
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-p PID, --pid PID trace this PID only -p PID, --pid PID trace this PID only
-t TID, --tid TID trace this TID only
-u, --user-threads-only -u, --user-threads-only
user threads only (no kernel threads) user threads only (no kernel threads)
-k, --kernel-threads-only -k, --kernel-threads-only
...@@ -741,6 +742,7 @@ optional arguments: ...@@ -741,6 +742,7 @@ optional arguments:
-K, --kernel-stacks-only -K, --kernel-stacks-only
show stacks from kernel space only (no user space show stacks from kernel space only (no user space
stacks) stacks)
-d, --delimited insert delimiter between kernel/user stacks
-f, --folded output folded format -f, --folded output folded format
--stack-storage-size STACK_STORAGE_SIZE --stack-storage-size STACK_STORAGE_SIZE
the number of unique stack traces that can be stored the number of unique stack traces that can be stored
...@@ -751,6 +753,7 @@ examples: ...@@ -751,6 +753,7 @@ examples:
./offcputime 5 # trace for 5 seconds only ./offcputime 5 # trace for 5 seconds only
./offcputime -f 5 # 5 seconds, and output in folded format ./offcputime -f 5 # 5 seconds, and output in folded format
./offcputime -p 185 # only trace threads for PID 185 ./offcputime -p 185 # only trace threads for PID 185
./offcputime -t 188 # only trace thread 188
./offcputime -u # only trace user threads (no kernel) ./offcputime -u # only trace user threads (no kernel)
./offcputime -k # only trace kernel threads (no user) ./offcputime -k # only trace kernel threads (no user)
./offcputime -U # only show user space stacks (no kernel) ./offcputime -U # only show user space stacks (no kernel)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment