Merge tag 'trace-tools-v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing tools updates from Steven Rostedt: - Use total duration to calculate average in rtla osnoise_hist - Use 2 digit precision for displaying average - Print an intuitive auto analysis of timerlat results - Add auto analysis to timerlat top - Add hwnoise, which is the same as osnoise but focuses on hardware - Small clean ups * tag 'trace-tools-v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: Documentation/rtla: Add hwnoise man page rtla: Add hwnoise tool Documentation/rtla: Add timerlat-top auto-analysis options rtla/timerlat: Add auto-analysis support to timerlat top rtla/timerlat: Add auto-analysis core tools/tracing/rtla: osnoise_hist: display average with two-digit precision tools/tracing/rtla: osnoise_hist: use total duration for average calculation tools/rv: Remove unneeded semicolon

Merge tag 'trace-tools-v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull tracing tools updates from Steven Rostedt: - Use total duration to calculate average in rtla osnoise_hist - Use 2 digit precision for displaying average - Print an intuitive auto analysis of timerlat results - Add auto analysis to timerlat top - Add hwnoise, which is the same as osnoise but focuses on hardware - Small clean ups * tag 'trace-tools-v6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: Documentation/rtla: Add hwnoise man page rtla: Add hwnoise tool Documentation/rtla: Add timerlat-top auto-analysis options rtla/timerlat: Add auto-analysis support to timerlat top rtla/timerlat: Add auto-analysis core tools/tracing/rtla: osnoise_hist: display average with two-digit precision tools/tracing/rtla: osnoise_hist: use total duration for average calculation tools/rv: Remove unneeded semicolon
d392e49a · Linus Torvalds · 2562af68 · 5dc3750e · d392e49a · d392e49a
Commit d392e49a authored Feb 23, 2023 by Linus Torvalds
15 changed files
--- a/Documentation/tools/rtla/common_timerlat_aa.rst
+++ b/Documentation/tools/rtla/common_timerlat_aa.rst
+**--dump-tasks**
+
+        prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)
+
+**--no-aa**
+
+        disable auto-analysis, reducing rtla timerlat cpu usage
--- a/Documentation/tools/rtla/index.rst
+++ b/Documentation/tools/rtla/index.rst
@@ -17,6 +17,7 @@ behavior on specific hardware.
   rtla-timerlat
   rtla-timerlat-hist
   rtla-timerlat-top
+   rtla-hwnoise

 .. only::  subproject and html


--- a/Documentation/tools/rtla/rtla-hwnoise.rst
+++ b/Documentation/tools/rtla/rtla-hwnoise.rst
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+rtla-hwnoise
+============
+------------------------------------------
+Detect and quantify hardware-related noise
+------------------------------------------
+
+:Manual section: 1
+
+SYNOPSIS
+========
+
+**rtla hwnoise** [*OPTIONS*]
+
+DESCRIPTION
+===========
+
+**rtla hwnoise** collects the periodic summary from the *osnoise* tracer
+running with *interrupts disabled*. By disabling interrupts, and the scheduling
+of threads as a consequence, only non-maskable interrupts and hardware-related
+noise is allowed.
+
+The tool also allows the configurations of the *osnoise* tracer and the
+collection of the tracer output.
+
+OPTIONS
+=======
+.. include:: common_osnoise_options.rst
+
+.. include:: common_top_options.rst
+
+.. include:: common_options.rst
+
+EXAMPLE
+=======
+In the example below, the **rtla hwnoise** tool is set to run on CPUs *1-7*
+on a system with 8 cores/16 threads with hyper-threading enabled.
+
+The tool is set to detect any noise higher than *one microsecond*,
+to run for *ten minutes*, displaying a summary of the report at the
+end of the session::
+
+  # rtla hwnoise -c 1-7 -T 1 -d 10m -q
+                                          Hardware-related Noise
+  duration:   0 00:10:00 | time is in us
+  CPU Period       Runtime        Noise  % CPU Aval   Max Noise   Max Single          HW          NMI
+    1 #599       599000000          138    99.99997           3            3           4           74
+    2 #599       599000000           85    99.99998           3            3           4           75
+    3 #599       599000000           86    99.99998           4            3           6           75
+    4 #599       599000000           81    99.99998           4            4           2           75
+    5 #599       599000000           85    99.99998           2            2           2           75
+    6 #599       599000000           76    99.99998           2            2           0           75
+    7 #599       599000000           77    99.99998           3            3           0           75
+
+
+The first column shows the *CPU*, and the second column shows how many
+*Periods* the tool ran during the session. The *Runtime* is the time
+the tool effectively runs on the CPU. The *Noise* column is the sum of
+all noise that the tool observed, and the *% CPU Aval* is the relation
+between the *Runtime* and *Noise*.
+
+The *Max Noise* column is the maximum hardware noise the tool detected in a
+single period, and the *Max Single* is the maximum single noise seen.
+
+The *HW* and *NMI* columns show the total number of *hardware* and *NMI* noise
+occurrence observed by the tool.
+
+For example, *CPU 3* ran *599* periods of *1 second Runtime*. The CPU received
+*86 us* of noise during the entire execution, leaving *99.99997 %* of CPU time
+for the application. In the worst single period, the CPU caused *4 us* of
+noise to the application, but it was certainly caused by more than one single
+noise, as the *Max Single* noise was of *3 us*. The CPU has *HW noise,* at a
+rate of *six occurrences*/*ten minutes*. The CPU also has *NMIs*, at a higher
+frequency: around *seven per second*.
+
+The tool should report *0* hardware-related noise in the ideal situation.
+For example, by disabling hyper-threading to remove the hardware noise,
+and disabling the TSC watchdog to remove the NMI (it is possible to identify
+this using tracing options of **rtla hwnoise**), it was possible to reach
+the ideal situation in the same hardware::
+
+  # rtla hwnoise -c 1-7 -T 1 -d 10m -q
+                                          Hardware-related Noise
+  duration:   0 00:10:00 | time is in us
+  CPU Period       Runtime        Noise  % CPU Aval   Max Noise   Max Single          HW          NMI
+    1 #599       599000000            0   100.00000           0            0           0            0
+    2 #599       599000000            0   100.00000           0            0           0            0
+    3 #599       599000000            0   100.00000           0            0           0            0
+    4 #599       599000000            0   100.00000           0            0           0            0
+    5 #599       599000000            0   100.00000           0            0           0            0
+    6 #599       599000000            0   100.00000           0            0           0            0
+    7 #599       599000000            0   100.00000           0            0           0            0
+
+SEE ALSO
+========
+
+**rtla-osnoise**\(1)
+
+Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
+
+AUTHOR
+======
+Written by Daniel Bristot de Oliveira <bristot@kernel.org>
+
+.. include:: common_appendix.rst
--- a/Documentation/tools/rtla/rtla-timerlat-top.rst
+++ b/Documentation/tools/rtla/rtla-timerlat-top.rst
@@ -30,102 +30,84 @@ OPTIONS

 .. include:: common_options.rst

+.. include:: common_timerlat_aa.rst
+
 EXAMPLE
 =======

-In the example below, the *timerlat* tracer is set to capture the stack trace at
-the IRQ handler, printing it to the buffer if the *Thread* timer latency is
-higher than *30 us*. It is also set to stop the session if a *Thread* timer
-latency higher than *30 us* is hit. Finally, it is set to save the trace
-buffer if the stop condition is hit::
+In the example below, the timerlat tracer is dispatched in cpus *1-23* in the
+automatic trace mode, instructing the tracer to stop if a *40 us* latency or
+higher is found::

-  [root@alien ~]# rtla timerlat top -s 30 -T 30 -t
-                   Timer Latency
-    0 00:00:59   |          IRQ Timer Latency (us)        |         Thread Timer Latency (us)
+  # timerlat -a 40 -c 1-23 -q
+                                     Timer Latency
+    0 00:00:12   |          IRQ Timer Latency (us)        |         Thread Timer Latency (us)
  CPU COUNT      |      cur       min       avg       max |      cur       min       avg       max
-    0 #58634     |        1         0         1        10 |       11         2        10        23
-    1 #58634     |        1         0         1         9 |       12         2         9        23
-    2 #58634     |        0         0         1        11 |       10         2         9        23
-    3 #58634     |        1         0         1        11 |       11         2         9        24
-    4 #58634     |        1         0         1        10 |       11         2         9        26
-    5 #58634     |        1         0         1         8 |       10         2         9        25
-    6 #58634     |       12         0         1        12 |       30         2        10        30 <--- CPU with spike
-    7 #58634     |        1         0         1         9 |       11         2         9        23
-    8 #58633     |        1         0         1         9 |       11         2         9        26
-    9 #58633     |        1         0         1         9 |       10         2         9        26
-   10 #58633     |        1         0         1        13 |       11         2         9        28
-   11 #58633     |        1         0         1        13 |       12         2         9        24
-   12 #58633     |        1         0         1         8 |       10         2         9        23
-   13 #58633     |        1         0         1        10 |       10         2         9        22
-   14 #58633     |        1         0         1        18 |       12         2         9        27
-   15 #58633     |        1         0         1        10 |       11         2         9        28
-   16 #58633     |        0         0         1        11 |        7         2         9        26
-   17 #58633     |        1         0         1        13 |       10         2         9        24
-   18 #58633     |        1         0         1         9 |       13         2         9        22
-   19 #58633     |        1         0         1        10 |       11         2         9        23
-   20 #58633     |        1         0         1        12 |       11         2         9        28
-   21 #58633     |        1         0         1        14 |       11         2         9        24
-   22 #58633     |        1         0         1         8 |       11         2         9        22
-   23 #58633     |        1         0         1        10 |       11         2         9        27
-  timerlat hit stop tracing
-  saving trace to timerlat_trace.txt
-  [root@alien bristot]# tail -60 timerlat_trace.txt
-  [...]
-      timerlat/5-79755   [005] .......   426.271226: #58634 context thread timer_latency     10823 ns
-              sh-109404  [006] dnLh213   426.271247: #58634 context    irq timer_latency     12505 ns
-              sh-109404  [006] dNLh313   426.271258: irq_noise: local_timer:236 start 426.271245463 duration 12553 ns
-              sh-109404  [006] d...313   426.271263: thread_noise:       sh:109404 start 426.271245853 duration 4769 ns
-      timerlat/6-79756   [006] .......   426.271264: #58634 context thread timer_latency     30328 ns
-      timerlat/6-79756   [006] ....1..   426.271265: <stack trace>
-  => timerlat_irq
-  => __hrtimer_run_queues
-  => hrtimer_interrupt
-  => __sysvec_apic_timer_interrupt
-  => sysvec_apic_timer_interrupt
-  => asm_sysvec_apic_timer_interrupt
-  => _raw_spin_unlock_irqrestore			<---- spinlock that disabled interrupt.
-  => try_to_wake_up
-  => autoremove_wake_function
-  => __wake_up_common
-  => __wake_up_common_lock
-  => ep_poll_callback
-  => __wake_up_common
-  => __wake_up_common_lock
-  => fsnotify_add_event
-  => inotify_handle_inode_event
-  => fsnotify
-  => __fsnotify_parent
-  => __fput
-  => task_work_run
-  => exit_to_user_mode_prepare
-  => syscall_exit_to_user_mode
-  => do_syscall_64
-  => entry_SYSCALL_64_after_hwframe
-  => 0x7265000001378c
-  => 0x10000cea7
-  => 0x25a00000204a
-  => 0x12e302d00000000
-  => 0x19b51010901b6
-  => 0x283ce00726500
-  => 0x61ea308872
-  => 0x00000fe3
-            bash-109109  [007] d..h...   426.271265: #58634 context    irq timer_latency      1211 ns
-      timerlat/6-79756   [006] .......   426.271267: timerlat_main: stop tracing hit on cpu 6
-
-In the trace, it is possible the notice that the *IRQ* timer latency was
-already high, accounting *12505 ns*. The IRQ delay was caused by the
-*bash-109109* process that disabled IRQs in the wake-up path
-(*_try_to_wake_up()* function). The duration of the IRQ handler that woke
-up the timerlat thread, informed with the **osnoise:irq_noise** event, was
-also high and added more *12553 ns* to the Thread latency. Finally, the
-**osnoise:thread_noise** added by the currently running thread (including
-the scheduling overhead) added more *4769 ns*. Summing up these values,
-the *Thread* timer latency accounted for *30328 ns*.
-
-The primary reason for this high value is the wake-up path that was hit
-twice during this case: when the *bash-109109* was waking up a thread
-and then when the *timerlat* thread was awakened. This information can
-then be used as the starting point of a more fine-grained analysis.
+    1 #12322     |        0         0         1        15 |       10         3         9        31
+    2 #12322     |        3         0         1        12 |       10         3         9        23
+    3 #12322     |        1         0         1        21 |        8         2         8        34
+    4 #12322     |        1         0         1        17 |       10         2        11        33
+    5 #12322     |        0         0         1        12 |        8         3         8        25
+    6 #12322     |        1         0         1        14 |       16         3        11        35
+    7 #12322     |        0         0         1        14 |        9         2         8        29
+    8 #12322     |        1         0         1        22 |        9         3         9        34
+    9 #12322     |        0         0         1        14 |        8         2         8        24
+   10 #12322     |        1         0         0        12 |        9         3         8        24
+   11 #12322     |        0         0         0        15 |        6         2         7        29
+   12 #12321     |        1         0         0        13 |        5         3         8        23
+   13 #12319     |        0         0         1        14 |        9         3         9        26
+   14 #12321     |        1         0         0        13 |        6         2         8        24
+   15 #12321     |        1         0         1        15 |       12         3        11        27
+   16 #12318     |        0         0         1        13 |        7         3        10        24
+   17 #12319     |        0         0         1        13 |       11         3         9        25
+   18 #12318     |        0         0         0        12 |        8         2         8        20
+   19 #12319     |        0         0         1        18 |       10         2         9        28
+   20 #12317     |        0         0         0        20 |        9         3         8        34
+   21 #12318     |        0         0         0        13 |        8         3         8        28
+   22 #12319     |        0         0         1        11 |        8         3        10        22
+   23 #12320     |       28         0         1        28 |       41         3        11        41
+  rtla timerlat hit stop tracing
+  ## CPU 23 hit stop tracing, analyzing it ##
+  IRQ handler delay:                                        27.49 us (65.52 %)
+  IRQ latency:                                              28.13 us
+  Timerlat IRQ duration:                                     9.59 us (22.85 %)
+  Blocking thread:                                           3.79 us (9.03 %)
+                         objtool:49256                       3.79 us
+    Blocking thread stacktrace
+                -> timerlat_irq
+                -> __hrtimer_run_queues
+                -> hrtimer_interrupt
+                -> __sysvec_apic_timer_interrupt
+                -> sysvec_apic_timer_interrupt
+                -> asm_sysvec_apic_timer_interrupt
+                -> _raw_spin_unlock_irqrestore
+                -> cgroup_rstat_flush_locked
+                -> cgroup_rstat_flush_irqsafe
+                -> mem_cgroup_flush_stats
+                -> mem_cgroup_wb_stats
+                -> balance_dirty_pages
+                -> balance_dirty_pages_ratelimited_flags
+                -> btrfs_buffered_write
+                -> btrfs_do_write_iter
+                -> vfs_write
+                -> __x64_sys_pwrite64
+                -> do_syscall_64
+                -> entry_SYSCALL_64_after_hwframe
+  ------------------------------------------------------------------------
+    Thread latency:                                          41.96 us (100%)
+
+  The system has exit from idle latency!
+    Max timerlat IRQ latency from idle: 17.48 us in cpu 4
+  Saving trace to timerlat_trace.txt
+
+In this case, the major factor was the delay suffered by the *IRQ handler*
+that handles **timerlat** wakeup: *65.52%*. This can be caused by the
+current thread masking interrupts, which can be seen in the blocking
+thread stacktrace: the current thread (*objtool:49256*) disabled interrupts
+via *raw spin lock* operations inside mem cgroup, while doing write
+syscall in a btrfs file system.
+
+The raw trace is saved in the **timerlat_trace.txt** file for further analysis.

 Note that **rtla timerlat** was dispatched without changing *timerlat* tracer
 threads' priority. That is generally not needed because these threads hava

--- a/tools/tracing/rtla/Makefile
+++ b/tools/tracing/rtla/Makefile
@@ -119,6 +119,8 @@ install: doc_install
 	$(STRIP) $(DESTDIR)$(BINDIR)/rtla
 	@test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise
 	ln -s rtla $(DESTDIR)$(BINDIR)/osnoise
+	@test ! -f $(DESTDIR)$(BINDIR)/hwnoise || rm $(DESTDIR)$(BINDIR)/hwnoise
+	ln -s rtla $(DESTDIR)$(BINDIR)/hwnoise
 	@test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat
 	ln -s rtla $(DESTDIR)$(BINDIR)/timerlat


--- a/tools/tracing/rtla/src/osnoise.c
+++ b/tools/tracing/rtla/src/osnoise.c
@@ -734,6 +734,113 @@ void osnoise_put_tracing_thresh(struct osnoise_context *context)
 	context->orig_tracing_thresh = OSNOISE_OPTION_INIT_VAL;
 }

+static int osnoise_options_get_option(char *option)
+{
+	char *options = tracefs_instance_file_read(NULL, "osnoise/options", NULL);
+	char no_option[128];
+	int retval = 0;
+	char *opt;
+
+	if (!options)
+		return OSNOISE_OPTION_INIT_VAL;
+
+	/*
+	 * Check first if the option is disabled.
+	 */
+	snprintf(no_option, sizeof(no_option), "NO_%s", option);
+
+	opt = strstr(options, no_option);
+	if (opt)
+		goto out_free;
+
+	/*
+	 * Now that it is not disabled, if the string is there, it is
+	 * enabled. If the string is not there, the option does not exist.
+	 */
+	opt = strstr(options, option);
+	if (opt)
+		retval = 1;
+	else
+		retval = OSNOISE_OPTION_INIT_VAL;
+
+out_free:
+	free(options);
+	return retval;
+}
+
+static int osnoise_options_set_option(char *option, bool onoff)
+{
+	char no_option[128];
+
+	if (onoff)
+		return tracefs_instance_file_write(NULL, "osnoise/options", option);
+
+	snprintf(no_option, sizeof(no_option), "NO_%s", option);
+
+	return tracefs_instance_file_write(NULL, "osnoise/options", no_option);
+}
+
+static int osnoise_get_irq_disable(struct osnoise_context *context)
+{
+	if (context->opt_irq_disable != OSNOISE_OPTION_INIT_VAL)
+		return context->opt_irq_disable;
+
+	if (context->orig_opt_irq_disable != OSNOISE_OPTION_INIT_VAL)
+		return context->orig_opt_irq_disable;
+
+	context->orig_opt_irq_disable = osnoise_options_get_option("OSNOISE_IRQ_DISABLE");
+
+	return context->orig_opt_irq_disable;
+}
+
+int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff)
+{
+	int opt_irq_disable = osnoise_get_irq_disable(context);
+	int retval;
+
+	if (opt_irq_disable == OSNOISE_OPTION_INIT_VAL)
+		return -1;
+
+	if (opt_irq_disable == onoff)
+		return 0;
+
+	retval = osnoise_options_set_option("OSNOISE_IRQ_DISABLE", onoff);
+	if (retval < 0)
+		return -1;
+
+	context->opt_irq_disable = onoff;
+
+	return 0;
+}
+
+static void osnoise_restore_irq_disable(struct osnoise_context *context)
+{
+	int retval;
+
+	if (context->orig_opt_irq_disable == OSNOISE_OPTION_INIT_VAL)
+		return;
+
+	if (context->orig_opt_irq_disable == context->opt_irq_disable)
+		goto out_done;
+
+	retval = osnoise_options_set_option("OSNOISE_IRQ_DISABLE", context->orig_opt_irq_disable);
+	if (retval < 0)
+		err_msg("Could not restore original OSNOISE_IRQ_DISABLE option\n");
+
+out_done:
+	context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL;
+}
+
+static void osnoise_put_irq_disable(struct osnoise_context *context)
+{
+	osnoise_restore_irq_disable(context);
+
+	if (context->orig_opt_irq_disable == OSNOISE_OPTION_INIT_VAL)
+		return;
+
+	context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL;
+}
+
 /*
 * enable_osnoise - enable osnoise tracer in the trace_instance
 */
@@ -798,6 +905,9 @@ struct osnoise_context *osnoise_context_alloc(void)
 	context->orig_tracing_thresh	= OSNOISE_OPTION_INIT_VAL;
 	context->tracing_thresh		= OSNOISE_OPTION_INIT_VAL;

+	context->orig_opt_irq_disable	= OSNOISE_OPTION_INIT_VAL;
+	context->opt_irq_disable	= OSNOISE_OPTION_INIT_VAL;
+
 	osnoise_get_context(context);

 	return context;
@@ -824,6 +934,7 @@ void osnoise_put_context(struct osnoise_context *context)
 	osnoise_put_timerlat_period_us(context);
 	osnoise_put_print_stack(context);
 	osnoise_put_tracing_thresh(context);
+	osnoise_put_irq_disable(context);

 	free(context);
 }
@@ -958,3 +1069,9 @@ int osnoise_main(int argc, char *argv[])
 	osnoise_usage(1);
 	exit(1);
 }
+
+int hwnoise_main(int argc, char *argv[])
+{
+	osnoise_top_main(argc, argv);
+	exit(0);
+}
--- a/tools/tracing/rtla/src/osnoise.h
+++ b/tools/tracing/rtla/src/osnoise.h
@@ -38,6 +38,10 @@ struct osnoise_context {
 	/* -1 as init value because 0 is disabled */
 	long long		orig_print_stack;
 	long long		print_stack;
+
+	/* -1 as init value because 0 is off */
+	int			orig_opt_irq_disable;
+	int			opt_irq_disable;
 };

 /*
@@ -79,6 +83,8 @@ void osnoise_restore_print_stack(struct osnoise_context *context);
 int osnoise_set_print_stack(struct osnoise_context *context,
 			    long long print_stack);

+int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff);
+
 /*
 * osnoise_tool -  osnoise based tool definition.
 */
@@ -97,3 +103,4 @@ struct osnoise_tool *osnoise_init_trace_tool(char *tracer);
 int osnoise_hist_main(int argc, char *argv[]);
 int osnoise_top_main(int argc, char **argv);
 int osnoise_main(int argc, char **argv);
+int hwnoise_main(int argc, char **argv);
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -121,6 +121,7 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
 {
 	struct osnoise_hist_params *params = tool->params;
 	struct osnoise_hist_data *data = tool->data;
+	unsigned long long total_duration;
 	int entries = data->entries;
 	int bucket;
 	int *hist;
@@ -131,10 +132,12 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
 	if (data->bucket_size)
 		bucket = duration / data->bucket_size;

+	total_duration = duration * count;
+
 	hist = data->hist[cpu].samples;
 	data->hist[cpu].count += count;
 	update_min(&data->hist[cpu].min_sample, &duration);
-	update_sum(&data->hist[cpu].sum_sample, &duration);
+	update_sum(&data->hist[cpu].sum_sample, &total_duration);
 	update_max(&data->hist[cpu].max_sample, &duration);

 	if (bucket < entries)
@@ -332,8 +335,8 @@ osnoise_print_summary(struct osnoise_hist_params *params,
 			continue;

 		if (data->hist[cpu].count)
-			trace_seq_printf(trace->seq, "%9llu ",
-					data->hist[cpu].sum_sample / data->hist[cpu].count);
+			trace_seq_printf(trace->seq, "%9.2f ",
+				((double) data->hist[cpu].sum_sample) / data->hist[cpu].count);
 		else
 			trace_seq_printf(trace->seq, "        - ");
 	}

--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -14,6 +14,11 @@
 #include "osnoise.h"
 #include "utils.h"

+enum osnoise_mode {
+	MODE_OSNOISE = 0,
+	MODE_HWNOISE
+};
+
 /*
 * osnoise top parameters
 */
@@ -32,6 +37,7 @@ struct osnoise_top_params {
 	int			set_sched;
 	struct sched_attr	sched_param;
 	struct trace_events	*events;
+	enum osnoise_mode	mode;
 };

 struct osnoise_top_cpu {
@@ -143,15 +149,23 @@ osnoise_top_handler(struct trace_seq *s, struct tep_record *record,
 */
 static void osnoise_top_header(struct osnoise_tool *top)
 {
+	struct osnoise_top_params *params = top->params;
 	struct trace_seq *s = top->trace.seq;
 	char duration[26];

 	get_duration(top->start_time, duration, sizeof(duration));

 	trace_seq_printf(s, "\033[2;37;40m");
-	trace_seq_printf(s, "                                          Operating System Noise");
-	trace_seq_printf(s, "                                     ");
-	trace_seq_printf(s, "                                     ");
+	trace_seq_printf(s, "                                          ");
+
+	if (params->mode == MODE_OSNOISE) {
+		trace_seq_printf(s, "Operating System Noise");
+		trace_seq_printf(s, "                                       ");
+	} else if (params->mode == MODE_HWNOISE) {
+		trace_seq_printf(s, "Hardware-related Noise");
+	}
+
+	trace_seq_printf(s, "                                   ");
 	trace_seq_printf(s, "\033[0;0;0m");
 	trace_seq_printf(s, "\n");

@@ -162,7 +176,14 @@ static void osnoise_top_header(struct osnoise_tool *top)
 	trace_seq_printf(s, "       Noise ");
 	trace_seq_printf(s, " %% CPU Aval ");
 	trace_seq_printf(s, "  Max Noise   Max Single ");
-	trace_seq_printf(s, "         HW          NMI          IRQ      Softirq       Thread");
+	trace_seq_printf(s, "         HW          NMI");
+
+	if (params->mode == MODE_HWNOISE)
+		goto eol;
+
+	trace_seq_printf(s, "          IRQ      Softirq       Thread");
+
+eol:
 	trace_seq_printf(s, "\033[0;0;0m");
 	trace_seq_printf(s, "\n");
 }
@@ -181,6 +202,7 @@ static void clear_terminal(struct trace_seq *seq)
 */
 static void osnoise_top_print(struct osnoise_tool *tool, int cpu)
 {
+	struct osnoise_top_params *params = tool->params;
 	struct trace_seq *s = tool->trace.seq;
 	struct osnoise_top_cpu *cpu_data;
 	struct osnoise_top_data *data;
@@ -205,6 +227,12 @@ static void osnoise_top_print(struct osnoise_tool *tool, int cpu)

 	trace_seq_printf(s, "%12llu ", cpu_data->hw_count);
 	trace_seq_printf(s, "%12llu ", cpu_data->nmi_count);
+
+	if (params->mode == MODE_HWNOISE) {
+		trace_seq_printf(s, "\n");
+		return;
+	}
+
 	trace_seq_printf(s, "%12llu ", cpu_data->irq_count);
 	trace_seq_printf(s, "%12llu ", cpu_data->softirq_count);
 	trace_seq_printf(s, "%12llu\n", cpu_data->thread_count);
@@ -241,12 +269,12 @@ osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top)
 /*
 * osnoise_top_usage - prints osnoise top usage message
 */
-void osnoise_top_usage(char *usage)
+static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
 {
 	int i;

 	static const char * const msg[] = {
-		"  usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\",
+		" [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\",
 		"	  [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\",
 		"	  [-c cpu-list] [-P priority]",
 		"",
@@ -277,9 +305,22 @@ void osnoise_top_usage(char *usage)
 	if (usage)
 		fprintf(stderr, "%s\n", usage);

-	fprintf(stderr, "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n",
+	if (params->mode == MODE_OSNOISE) {
+		fprintf(stderr,
+			"rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n",
 			VERSION);

+		fprintf(stderr, "  usage: rtla osnoise [top]");
+	}
+
+	if (params->mode == MODE_HWNOISE) {
+		fprintf(stderr,
+			"rtla hwnoise: a summary of hardware-related noise (version %s)\n",
+			VERSION);
+
+		fprintf(stderr, "  usage: rtla hwnoise");
+	}
+
 	for (i = 0; msg[i]; i++)
 		fprintf(stderr, "%s\n", msg[i]);
 	exit(1);
@@ -299,6 +340,9 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
 	if (!params)
 		exit(1);

+	if (strcmp(argv[0], "hwnoise") == 0)
+		params->mode = MODE_HWNOISE;
+
 	while (1) {
 		static struct option long_options[] = {
 			{"auto",		required_argument,	0, 'a'},
@@ -345,7 +389,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
 		case 'c':
 			retval = parse_cpu_list(optarg, &params->monitored_cpus);
 			if (retval)
-				osnoise_top_usage("\nInvalid -c cpu list\n");
+				osnoise_top_usage(params, "\nInvalid -c cpu list\n");
 			params->cpus = optarg;
 			break;
 		case 'D':
@@ -354,7 +398,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
 		case 'd':
 			params->duration = parse_seconds_duration(optarg);
 			if (!params->duration)
-				osnoise_top_usage("Invalid -D duration\n");
+				osnoise_top_usage(params, "Invalid -D duration\n");
 			break;
 		case 'e':
 			tevent = trace_event_alloc(optarg);
@@ -370,17 +414,17 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
 			break;
 		case 'h':
 		case '?':
-			osnoise_top_usage(NULL);
+			osnoise_top_usage(params, NULL);
 			break;
 		case 'p':
 			params->period = get_llong_from_str(optarg);
 			if (params->period > 10000000)
-				osnoise_top_usage("Period longer than 10 s\n");
+				osnoise_top_usage(params, "Period longer than 10 s\n");
 			break;
 		case 'P':
 			retval = parse_prio(optarg, &params->sched_param);
 			if (retval == -1)
-				osnoise_top_usage("Invalid -P priority");
+				osnoise_top_usage(params, "Invalid -P priority");
 			params->set_sched = 1;
 			break;
 		case 'q':
@@ -389,7 +433,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
 		case 'r':
 			params->runtime = get_llong_from_str(optarg);
 			if (params->runtime < 100)
-				osnoise_top_usage("Runtime shorter than 100 us\n");
+				osnoise_top_usage(params, "Runtime shorter than 100 us\n");
 			break;
 		case 's':
 			params->stop_us = get_llong_from_str(optarg);
@@ -415,7 +459,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
 					exit(EXIT_FAILURE);
 				}
 			} else {
-				osnoise_top_usage("--trigger requires a previous -e\n");
+				osnoise_top_usage(params, "--trigger requires a previous -e\n");
 			}
 			break;
 		case '1': /* filter */
@@ -426,11 +470,11 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
 					exit(EXIT_FAILURE);
 				}
 			} else {
-				osnoise_top_usage("--filter requires a previous -e\n");
+				osnoise_top_usage(params, "--filter requires a previous -e\n");
 			}
 			break;
 		default:
-			osnoise_top_usage("Invalid option");
+			osnoise_top_usage(params, "Invalid option");
 		}
 	}

@@ -495,6 +539,14 @@ osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *p
 		}
 	}

+	if (params->mode == MODE_HWNOISE) {
+		retval = osnoise_set_irq_disable(tool->context, 1);
+		if (retval) {
+			err_msg("Failed to set OSNOISE_IRQ_DISABLE option\n");
+			goto out_err;
+		}
+	}
+
 	return 0;

 out_err:

--- a/tools/tracing/rtla/src/rtla.c
+++ b/tools/tracing/rtla/src/rtla.c
@@ -26,6 +26,7 @@ static void rtla_usage(int err)
 		"",
 		"  commands:",
 		"     osnoise  - gives information about the operating system noise (osnoise)",
+		"     hwnoise  - gives information about hardware-related noise",
 		"     timerlat - measures the timer irq and thread latency",
 		"",
 		NULL,
@@ -47,6 +48,9 @@ int run_command(int argc, char **argv, int start_position)
 	if (strcmp(argv[start_position], "osnoise") == 0) {
 		osnoise_main(argc-start_position, &argv[start_position]);
 		goto ran;
+	} else if (strcmp(argv[start_position], "hwnoise") == 0) {
+		hwnoise_main(argc-start_position, &argv[start_position]);
+		goto ran;
 	} else if (strcmp(argv[start_position], "timerlat") == 0) {
 		timerlat_main(argc-start_position, &argv[start_position]);
 		goto ran;

--- a/tools/tracing/rtla/src/timerlat_aa.c
+++ b/tools/tracing/rtla/src/timerlat_aa.c
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include "utils.h"
+#include "osnoise.h"
+#include "timerlat.h"
+
+enum timelat_state {
+	TIMERLAT_INIT = 0,
+	TIMERLAT_WAITING_IRQ,
+	TIMERLAT_WAITING_THREAD,
+};
+
+#define MAX_COMM		24
+
+/*
+ * Per-cpu data statistics and data.
+ */
+struct timerlat_aa_data {
+	/* Current CPU state */
+	int			curr_state;
+
+	/* timerlat IRQ latency */
+	unsigned long long	tlat_irq_seqnum;
+	unsigned long long	tlat_irq_latency;
+	unsigned long long	tlat_irq_timstamp;
+
+	/* timerlat Thread latency */
+	unsigned long long	tlat_thread_seqnum;
+	unsigned long long	tlat_thread_latency;
+	unsigned long long	tlat_thread_timstamp;
+
+	/*
+	 * Information about the thread running when the IRQ
+	 * arrived.
+	 *
+	 * This can be blocking or interference, depending on the
+	 * priority of the thread. Assuming timerlat is the highest
+	 * prio, it is blocking. If timerlat has a lower prio, it is
+	 * interference.
+	 * note: "unsigned long long" because they are fetch using tep_get_field_val();
+	 */
+	unsigned long long	run_thread_pid;
+	char			run_thread_comm[MAX_COMM];
+	unsigned long long	thread_blocking_duration;
+	unsigned long long	max_exit_idle_latency;
+
+	/* Information about the timerlat timer irq */
+	unsigned long long	timer_irq_start_time;
+	unsigned long long	timer_irq_start_delay;
+	unsigned long long	timer_irq_duration;
+	unsigned long long	timer_exit_from_idle;
+
+	/*
+	 * Information about the last IRQ before the timerlat irq
+	 * arrived.
+	 *
+	 * If now - timestamp is <= latency, it might have influenced
+	 * in the timerlat irq latency. Otherwise, ignore it.
+	 */
+	unsigned long long	prev_irq_duration;
+	unsigned long long	prev_irq_timstamp;
+
+	/*
+	 * Interference sum.
+	 */
+	unsigned long long	thread_nmi_sum;
+	unsigned long long	thread_irq_sum;
+	unsigned long long	thread_softirq_sum;
+	unsigned long long	thread_thread_sum;
+
+	/*
+	 * Interference task information.
+	 */
+	struct trace_seq	*prev_irqs_seq;
+	struct trace_seq	*nmi_seq;
+	struct trace_seq	*irqs_seq;
+	struct trace_seq	*softirqs_seq;
+	struct trace_seq	*threads_seq;
+	struct trace_seq	*stack_seq;
+
+	/*
+	 * Current thread.
+	 */
+	char			current_comm[MAX_COMM];
+	unsigned long long	current_pid;
+
+	/*
+	 * Is the system running a kworker?
+	 */
+	unsigned long long	kworker;
+	unsigned long long	kworker_func;
+};
+
+/*
+ * The analysis context and system wide view
+ */
+struct timerlat_aa_context {
+	int nr_cpus;
+	int dump_tasks;
+
+	/* per CPU data */
+	struct timerlat_aa_data *taa_data;
+
+	/*
+	 * required to translate function names and register
+	 * events.
+	 */
+	struct osnoise_tool *tool;
+};
+
+/*
+ * The data is stored as a local variable, but accessed via a helper function.
+ *
+ * It could be stored inside the trace context. But every access would
+ * require container_of() + a series of pointers. Do we need it? Not sure.
+ *
+ * For now keep it simple. If needed, store it in the tool, add the *context
+ * as a parameter in timerlat_aa_get_ctx() and do the magic there.
+ */
+static struct timerlat_aa_context *__timerlat_aa_ctx;
+
+static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
+{
+	return __timerlat_aa_ctx;
+}
+
+/*
+ * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
+ */
+static struct timerlat_aa_data
+*timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
+{
+	return &taa_ctx->taa_data[cpu];
+}
+
+/*
+ * timerlat_aa_irq_latency - Handles timerlat IRQ event
+ */
+static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
+				   struct trace_seq *s, struct tep_record *record,
+				   struct tep_event *event)
+{
+	/*
+	 * For interference, we start now looking for things that can delay
+	 * the thread.
+	 */
+	taa_data->curr_state = TIMERLAT_WAITING_THREAD;
+	taa_data->tlat_irq_timstamp = record->ts;
+
+	/*
+	 * Zero values.
+	 */
+	taa_data->thread_nmi_sum = 0;
+	taa_data->thread_irq_sum = 0;
+	taa_data->thread_softirq_sum = 0;
+	taa_data->thread_blocking_duration = 0;
+	taa_data->timer_irq_start_time = 0;
+	taa_data->timer_irq_duration = 0;
+	taa_data->timer_exit_from_idle = 0;
+
+	/*
+	 * Zero interference tasks.
+	 */
+	trace_seq_reset(taa_data->nmi_seq);
+	trace_seq_reset(taa_data->irqs_seq);
+	trace_seq_reset(taa_data->softirqs_seq);
+	trace_seq_reset(taa_data->threads_seq);
+
+	/* IRQ latency values */
+	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
+	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
+
+	/* The thread that can cause blocking */
+	tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
+
+	/*
+	 * Get exit from idle case.
+	 *
+	 * If it is not idle thread:
+	 */
+	if (taa_data->run_thread_pid)
+		return 0;
+
+	/*
+	 * if the latency is shorter than the known exit from idle:
+	 */
+	if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
+		return 0;
+
+	/*
+	 * To be safe, ignore the cases in which an IRQ/NMI could have
+	 * interfered with the timerlat IRQ.
+	 */
+	if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
+	    < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
+		return 0;
+
+	taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
+
+	return 0;
+}
+
+/*
+ * timerlat_aa_thread_latency - Handles timerlat thread event
+ */
+static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
+				      struct trace_seq *s, struct tep_record *record,
+				      struct tep_event *event)
+{
+	/*
+	 * For interference, we start now looking for things that can delay
+	 * the IRQ of the next cycle.
+	 */
+	taa_data->curr_state = TIMERLAT_WAITING_IRQ;
+	taa_data->tlat_thread_timstamp = record->ts;
+
+	/* Thread latency values */
+	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
+	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
+
+	return 0;
+}
+
+/*
+ * timerlat_aa_handler - Handle timerlat events
+ *
+ * This function is called to handle timerlat events recording statistics.
+ *
+ * Returns 0 on success, -1 otherwise.
+ */
+int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
+			struct tep_event *event, void *context)
+{
+	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
+	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
+	unsigned long long thread;
+
+	if (!taa_data)
+		return -1;
+
+	tep_get_field_val(s, event, "context", record, &thread, 1);
+	if (!thread)
+		return timerlat_aa_irq_latency(taa_data, s, record, event);
+	else
+		return timerlat_aa_thread_latency(taa_data, s, record, event);
+}
+
+/*
+ * timerlat_aa_nmi_handler - Handles NMI noise
+ *
+ * It is used to collect information about interferences from NMI. It is
+ * hooked to the osnoise:nmi_noise event.
+ */
+static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
+				   struct tep_event *event, void *context)
+{
+	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
+	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
+	unsigned long long duration;
+	unsigned long long start;
+
+	tep_get_field_val(s, event, "duration", record, &duration, 1);
+	tep_get_field_val(s, event, "start", record, &start, 1);
+
+	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
+		taa_data->prev_irq_duration = duration;
+		taa_data->prev_irq_timstamp = start;
+
+		trace_seq_reset(taa_data->prev_irqs_seq);
+		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s	\t\t\t%9.2f us\n",
+			 "nmi", ns_to_usf(duration));
+		return 0;
+	}
+
+	taa_data->thread_nmi_sum += duration;
+	trace_seq_printf(taa_data->nmi_seq, "	%24s	\t\t\t%9.2f us\n",
+		 "nmi", ns_to_usf(duration));
+
+	return 0;
+}
+
+/*
+ * timerlat_aa_irq_handler - Handles IRQ noise
+ *
+ * It is used to collect information about interferences from IRQ. It is
+ * hooked to the osnoise:irq_noise event.
+ *
+ * It is a little bit more complex than the other because it measures:
+ *	- The IRQs that can delay the timer IRQ before it happened.
+ *	- The Timerlat IRQ handler
+ *	- The IRQs that happened between the timerlat IRQ and the timerlat thread
+ *	  (IRQ interference).
+ */
+static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
+				   struct tep_event *event, void *context)
+{
+	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
+	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
+	unsigned long long expected_start;
+	unsigned long long duration;
+	unsigned long long vector;
+	unsigned long long start;
+	char *desc;
+	int val;
+
+	tep_get_field_val(s, event, "duration", record, &duration, 1);
+	tep_get_field_val(s, event, "start", record, &start, 1);
+	tep_get_field_val(s, event, "vector", record, &vector, 1);
+	desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
+
+	/*
+	 * Before the timerlat IRQ.
+	 */
+	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
+		taa_data->prev_irq_duration = duration;
+		taa_data->prev_irq_timstamp = start;
+
+		trace_seq_reset(taa_data->prev_irqs_seq);
+		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
+				 desc, vector, ns_to_usf(duration));
+		return 0;
+	}
+
+	/*
+	 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
+	 * the timerlat irq handler.
+	 */
+	if (!taa_data->timer_irq_start_time) {
+		expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
+
+		taa_data->timer_irq_start_time = start;
+		taa_data->timer_irq_duration = duration;
+
+		taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
+
+		/*
+		 * not exit from idle.
+		 */
+		if (taa_data->run_thread_pid)
+			return 0;
+
+		if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
+			taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
+
+		return 0;
+	}
+
+	/*
+	 * IRQ interference.
+	 */
+	taa_data->thread_irq_sum += duration;
+	trace_seq_printf(taa_data->irqs_seq, "	%24s:%-3llu	\t	%9.2f us\n",
+			 desc, vector, ns_to_usf(duration));
+
+	return 0;
+}
+
+static char *softirq_name[] = { "HI", "TIMER",	"NET_TX", "NET_RX", "BLOCK",
+				"IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
+
+
+/*
+ * timerlat_aa_softirq_handler - Handles Softirq noise
+ *
+ * It is used to collect information about interferences from Softirq. It is
+ * hooked to the osnoise:softirq_noise event.
+ *
+ * It is only printed in the non-rt kernel, as softirqs become thread on RT.
+ */
+static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
+				       struct tep_event *event, void *context)
+{
+	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
+	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
+	unsigned long long duration;
+	unsigned long long vector;
+	unsigned long long start;
+
+	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
+		return 0;
+
+	tep_get_field_val(s, event, "duration", record, &duration, 1);
+	tep_get_field_val(s, event, "start", record, &start, 1);
+	tep_get_field_val(s, event, "vector", record, &vector, 1);
+
+	taa_data->thread_softirq_sum += duration;
+
+	trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu	\t	%9.2f us\n",
+			 softirq_name[vector], vector, ns_to_usf(duration));
+	return 0;
+}
+
+/*
+ * timerlat_aa_softirq_handler - Handles thread noise
+ *
+ * It is used to collect information about interferences from threads. It is
+ * hooked to the osnoise:thread_noise event.
+ *
+ * Note: if you see thread noise, your timerlat thread was not the highest prio one.
+ */
+static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
+				      struct tep_event *event, void *context)
+{
+	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
+	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
+	unsigned long long duration;
+	unsigned long long start;
+	unsigned long long pid;
+	const char *comm;
+	int val;
+
+	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
+		return 0;
+
+	tep_get_field_val(s, event, "duration", record, &duration, 1);
+	tep_get_field_val(s, event, "start", record, &start, 1);
+
+	tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
+	comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
+
+	if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
+		taa_data->thread_blocking_duration = duration;
+
+		if (comm)
+			strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
+		else
+			sprintf(taa_data->run_thread_comm, "<...>");
+
+	} else {
+		taa_data->thread_thread_sum += duration;
+
+		trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
+			 comm, pid, ns_to_usf(duration));
+	}
+
+	return 0;
+}
+
+/*
+ * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
+ *
+ * Saves and parse the stack trace generated by the timerlat IRQ.
+ */
+static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
+			      struct tep_event *event, void *context)
+{
+	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
+	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
+	unsigned long *caller;
+	const char *function;
+	int val, i;
+
+	trace_seq_reset(taa_data->stack_seq);
+
+	trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
+	caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
+	if (caller) {
+		for (i = 0; ; i++) {
+			function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
+			if (!function)
+				break;
+			trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
+		}
+	}
+	return 0;
+}
+
+/*
+ * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
+ *
+ * Handles the sched:sched_switch event to trace the current thread running on the
+ * CPU. It is used to display the threads running on the other CPUs when the trace
+ * stops.
+ */
+static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
+					    struct tep_event *event, void *context)
+{
+	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
+	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
+	const char *comm;
+	int val;
+
+	tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
+	comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
+
+	strncpy(taa_data->current_comm, comm, MAX_COMM);
+
+	/*
+	 * If this was a kworker, clean the last kworkers that ran.
+	 */
+	taa_data->kworker = 0;
+	taa_data->kworker_func = 0;
+
+	return 0;
+}
+
+/*
+ * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
+ *
+ * Handles workqueue:workqueue_execute_start event, keeping track of
+ * the job that a kworker could be doing in the CPU.
+ *
+ * We already catch problems of hardware related latencies caused by work queues
+ * running driver code that causes hardware stall. For example, with DRM drivers.
+ */
+static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
+					     struct tep_event *event, void *context)
+{
+	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
+	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
+
+	tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
+	tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
+	return 0;
+}
+
+/*
+ * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
+ *
+ * This is the core of the analysis.
+ */
+static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
+				     int irq_thresh, int thread_thresh)
+{
+	unsigned long long exp_irq_ts;
+	int total;
+	int irq;
+
+	/*
+	 * IRQ latency or Thread latency?
+	 */
+	if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
+		irq = 1;
+		total = taa_data->tlat_irq_latency;
+	} else {
+		irq = 0;
+		total = taa_data->tlat_thread_latency;
+	}
+
+	/*
+	 * Expected IRQ arrival time using the trace clock as the base.
+	 */
+	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
+
+	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
+		printf("  Previous IRQ interference:	\t	up to %9.2f us",
+			ns_to_usf(taa_data->prev_irq_duration));
+
+	/*
+	 * The delay that the IRQ suffered before starting.
+	 */
+	printf("  IRQ handler delay:		%16s	%9.2f us (%.2f %%)\n",
+		(ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
+		ns_to_usf(taa_data->timer_irq_start_delay),
+		ns_to_per(total, taa_data->timer_irq_start_delay));
+
+	/*
+	 * Timerlat IRQ.
+	 */
+	printf("  IRQ latency:	\t\t\t\t	%9.2f us\n",
+		ns_to_usf(taa_data->tlat_irq_latency));
+
+	if (irq) {
+		/*
+		 * If the trace stopped due to IRQ, the other events will not happen
+		 * because... the trace stopped :-).
+		 *
+		 * That is all folks, the stack trace was printed before the stop,
+		 * so it will be displayed, it is the key.
+		 */
+		printf("  Blocking thread:\n");
+		printf("	%24s:%-9llu\n",
+			taa_data->run_thread_comm, taa_data->run_thread_pid);
+	} else  {
+		/*
+		 * The duration of the IRQ handler that handled the timerlat IRQ.
+		 */
+		printf("  Timerlat IRQ duration:	\t\t	%9.2f us (%.2f %%)\n",
+			ns_to_usf(taa_data->timer_irq_duration),
+			ns_to_per(total, taa_data->timer_irq_duration));
+
+		/*
+		 * The amount of time that the current thread postponed the scheduler.
+		 *
+		 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
+		 * is no need to compute values here.
+		 */
+		printf("  Blocking thread:	\t\t\t	%9.2f us (%.2f %%)\n",
+			ns_to_usf(taa_data->thread_blocking_duration),
+			ns_to_per(total, taa_data->thread_blocking_duration));
+
+		printf("	%24s:%-9llu		%9.2f us\n",
+			taa_data->run_thread_comm, taa_data->run_thread_pid,
+			ns_to_usf(taa_data->thread_blocking_duration));
+	}
+
+	/*
+	 * Print the stack trace!
+	 */
+	trace_seq_do_printf(taa_data->stack_seq);
+
+	/*
+	 * NMIs can happen during the IRQ, so they are always possible.
+	 */
+	if (taa_data->thread_nmi_sum)
+		printf("  NMI interference	\t\t\t	%9.2f us (%.2f %%)\n",
+			ns_to_usf(taa_data->thread_nmi_sum),
+			ns_to_per(total, taa_data->thread_nmi_sum));
+
+	/*
+	 * If it is an IRQ latency, the other factors can be skipped.
+	 */
+	if (irq)
+		goto print_total;
+
+	/*
+	 * Prints the interference caused by IRQs to the thread latency.
+	 */
+	if (taa_data->thread_irq_sum) {
+		printf("  IRQ interference	\t\t\t	%9.2f us (%.2f %%)\n",
+			ns_to_usf(taa_data->thread_irq_sum),
+			ns_to_per(total, taa_data->thread_irq_sum));
+
+		trace_seq_do_printf(taa_data->irqs_seq);
+	}
+
+	/*
+	 * Prints the interference caused by Softirqs to the thread latency.
+	 */
+	if (taa_data->thread_softirq_sum) {
+		printf("  Softirq interference	\t\t\t	%9.2f us (%.2f %%)\n",
+			ns_to_usf(taa_data->thread_softirq_sum),
+			ns_to_per(total, taa_data->thread_softirq_sum));
+
+		trace_seq_do_printf(taa_data->softirqs_seq);
+	}
+
+	/*
+	 * Prints the interference caused by other threads to the thread latency.
+	 *
+	 * If this happens, your timerlat is not the highest prio. OK, migration
+	 * thread can happen. But otherwise, you are not measuring the "scheduling
+	 * latency" only, and here is the difference from scheduling latency and
+	 * timer handling latency.
+	 */
+	if (taa_data->thread_thread_sum) {
+		printf("  Thread interference	\t\t\t	%9.2f us (%.2f %%)\n",
+			ns_to_usf(taa_data->thread_thread_sum),
+			ns_to_per(total, taa_data->thread_thread_sum));
+
+		trace_seq_do_printf(taa_data->threads_seq);
+	}
+
+	/*
+	 * Done.
+	 */
+print_total:
+	printf("------------------------------------------------------------------------\n");
+	printf("  %s latency:	\t\t\t	%9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
+		ns_to_usf(total));
+}
+
+/**
+ * timerlat_auto_analysis - Analyze the collected data
+ */
+void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
+{
+	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
+	unsigned long long max_exit_from_idle = 0;
+	struct timerlat_aa_data *taa_data;
+	int max_exit_from_idle_cpu;
+	struct tep_handle *tep;
+	int cpu;
+
+	/* bring stop tracing to the ns scale */
+	irq_thresh = irq_thresh * 1000;
+	thread_thresh = thread_thresh * 1000;
+
+	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
+		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
+
+		if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
+			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
+			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
+		} else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
+			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
+			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
+		}
+
+		if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
+			max_exit_from_idle = taa_data->max_exit_idle_latency;
+			max_exit_from_idle_cpu = cpu;
+		}
+
+	}
+
+	if (max_exit_from_idle) {
+		printf("\n");
+		printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
+			ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
+	}
+	if (!taa_ctx->dump_tasks)
+		return;
+
+	printf("\n");
+	printf("Printing CPU tasks:\n");
+	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
+		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
+		tep = taa_ctx->tool->trace.tep;
+
+		printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
+
+		if (taa_data->kworker_func)
+			printf(" kworker:%s:%s",
+				tep_find_function(tep, taa_data->kworker) ? : "<...>",
+				tep_find_function(tep, taa_data->kworker_func));
+		printf("\n");
+	}
+
+}
+
+/*
+ * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
+ */
+static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
+{
+	struct timerlat_aa_data *taa_data;
+	int i;
+
+	if (!taa_ctx->taa_data)
+		return;
+
+	for (i = 0; i < taa_ctx->nr_cpus; i++) {
+		taa_data = timerlat_aa_get_data(taa_ctx, i);
+
+		if (taa_data->prev_irqs_seq) {
+			trace_seq_destroy(taa_data->prev_irqs_seq);
+			free(taa_data->prev_irqs_seq);
+		}
+
+		if (taa_data->nmi_seq) {
+			trace_seq_destroy(taa_data->nmi_seq);
+			free(taa_data->nmi_seq);
+		}
+
+		if (taa_data->irqs_seq) {
+			trace_seq_destroy(taa_data->irqs_seq);
+			free(taa_data->irqs_seq);
+		}
+
+		if (taa_data->softirqs_seq) {
+			trace_seq_destroy(taa_data->softirqs_seq);
+			free(taa_data->softirqs_seq);
+		}
+
+		if (taa_data->threads_seq) {
+			trace_seq_destroy(taa_data->threads_seq);
+			free(taa_data->threads_seq);
+		}
+
+		if (taa_data->stack_seq) {
+			trace_seq_destroy(taa_data->stack_seq);
+			free(taa_data->stack_seq);
+		}
+	}
+}
+
+/*
+ * timerlat_aa_init_seqs - Init seq files used to store parsed information
+ *
+ * Instead of keeping data structures to store raw data, use seq files to
+ * store parsed data.
+ *
+ * Allocates and initialize seq files.
+ *
+ * Returns 0 on success, -1 otherwise.
+ */
+static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
+{
+	struct timerlat_aa_data *taa_data;
+	int i;
+
+	for (i = 0; i < taa_ctx->nr_cpus; i++) {
+
+		taa_data = timerlat_aa_get_data(taa_ctx, i);
+
+		taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
+		if (!taa_data->prev_irqs_seq)
+			goto out_err;
+
+		trace_seq_init(taa_data->prev_irqs_seq);
+
+		taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
+		if (!taa_data->nmi_seq)
+			goto out_err;
+
+		trace_seq_init(taa_data->nmi_seq);
+
+		taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
+		if (!taa_data->irqs_seq)
+			goto out_err;
+
+		trace_seq_init(taa_data->irqs_seq);
+
+		taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
+		if (!taa_data->softirqs_seq)
+			goto out_err;
+
+		trace_seq_init(taa_data->softirqs_seq);
+
+		taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
+		if (!taa_data->threads_seq)
+			goto out_err;
+
+		trace_seq_init(taa_data->threads_seq);
+
+		taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
+		if (!taa_data->stack_seq)
+			goto out_err;
+
+		trace_seq_init(taa_data->stack_seq);
+	}
+
+	return 0;
+
+out_err:
+	timerlat_aa_destroy_seqs(taa_ctx);
+	return -1;
+}
+
+/*
+ * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
+ */
+static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
+{
+	tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
+
+	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
+				     timerlat_aa_nmi_handler, tool);
+
+	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
+				     timerlat_aa_irq_handler, tool);
+
+	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
+				     timerlat_aa_softirq_handler, tool);
+
+	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
+				     timerlat_aa_thread_handler, tool);
+
+	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
+				     timerlat_aa_stack_handler, tool);
+	if (!dump_tasks)
+		return;
+
+	tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
+	tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
+				     timerlat_aa_sched_switch_handler, tool);
+
+	tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
+	tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
+				     timerlat_aa_kworker_start_handler, tool);
+}
+
+/*
+ * timerlat_aa_register_events - Register events used in the auto-analysis
+ *
+ * Returns 0 on success, -1 otherwise.
+ */
+static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
+{
+	int retval;
+
+	/*
+	 * register auto-analysis handlers.
+	 */
+	retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
+	if (retval < 0 && !errno) {
+		err_msg("Could not find osnoise events\n");
+		goto out_err;
+	}
+
+	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
+				   timerlat_aa_nmi_handler, tool);
+
+	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
+				   timerlat_aa_irq_handler, tool);
+
+	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
+				   timerlat_aa_softirq_handler, tool);
+
+	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
+				   timerlat_aa_thread_handler, tool);
+
+	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
+				   timerlat_aa_stack_handler, tool);
+
+	if (!dump_tasks)
+		return 0;
+
+	/*
+	 * Dump task events.
+	 */
+	retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
+	if (retval < 0 && !errno) {
+		err_msg("Could not find sched_switch\n");
+		goto out_err;
+	}
+
+	tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
+				   timerlat_aa_sched_switch_handler, tool);
+
+	retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
+	if (retval < 0 && !errno) {
+		err_msg("Could not find workqueue_execute_start\n");
+		goto out_err;
+	}
+
+	tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
+				   timerlat_aa_kworker_start_handler, tool);
+
+	return 0;
+
+out_err:
+	timerlat_aa_unregister_events(tool, dump_tasks);
+	return -1;
+}
+
+/**
+ * timerlat_aa_destroy - Destroy timerlat auto-analysis
+ */
+void timerlat_aa_destroy(void)
+{
+	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
+
+	if (!taa_ctx)
+		return;
+
+	if (!taa_ctx->taa_data)
+		goto out_ctx;
+
+	timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
+	timerlat_aa_destroy_seqs(taa_ctx);
+	free(taa_ctx->taa_data);
+out_ctx:
+	free(taa_ctx);
+}
+
+/**
+ * timerlat_aa_init - Initialize timerlat auto-analysis
+ *
+ * Returns 0 on success, -1 otherwise.
+ */
+int timerlat_aa_init(struct osnoise_tool *tool, int nr_cpus, int dump_tasks)
+{
+	struct timerlat_aa_context *taa_ctx;
+	int retval;
+
+	taa_ctx = calloc(1, sizeof(*taa_ctx));
+	if (!taa_ctx)
+		return -1;
+
+	__timerlat_aa_ctx = taa_ctx;
+
+	taa_ctx->nr_cpus = nr_cpus;
+	taa_ctx->tool = tool;
+	taa_ctx->dump_tasks = dump_tasks;
+
+	taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
+	if (!taa_ctx->taa_data)
+		goto out_err;
+
+	retval = timerlat_aa_init_seqs(taa_ctx);
+	if (retval)
+		goto out_err;
+
+	retval = timerlat_aa_register_events(tool, dump_tasks);
+	if (retval)
+		goto out_err;
+
+	return 0;
+
+out_err:
+	timerlat_aa_destroy();
+	return -1;
+}
--- a/tools/tracing/rtla/src/timerlat_aa.h
+++ b/tools/tracing/rtla/src/timerlat_aa.h
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+int timerlat_aa_init(struct osnoise_tool *tool, int nr_cpus, int dump_task);
+void timerlat_aa_destroy(void);
+
+int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
+			struct tep_event *event, void *context);
+
+void timerlat_auto_analysis(int irq_thresh, int thread_thresh);
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -10,10 +10,12 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <time.h>
+#include <errno.h>

 #include "utils.h"
 #include "osnoise.h"
 #include "timerlat.h"
+#include "timerlat_aa.h"

 struct timerlat_top_params {
 	char			*cpus;
@@ -30,6 +32,8 @@ struct timerlat_top_params {
 	int			quiet;
 	int			set_sched;
 	int			dma_latency;
+	int			no_aa;
+	int			dump_tasks;
 	struct sched_attr	sched_param;
 	struct trace_events	*events;
 };
@@ -130,17 +134,22 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
 		     struct tep_event *event, void *context)
 {
 	struct trace_instance *trace = context;
+	struct timerlat_top_params *params;
 	unsigned long long latency, thread;
 	struct osnoise_tool *top;
 	int cpu = record->cpu;

 	top = container_of(trace, struct osnoise_tool, trace);
+	params = top->params;

 	tep_get_field_val(s, event, "context", record, &thread, 1);
 	tep_get_field_val(s, event, "timer_latency", record, &latency, 1);

 	timerlat_top_update(top, cpu, thread, latency);

+	if (!params->no_aa)
+		timerlat_aa_handler(s, record, event, context);
+
 	return 0;
 }

@@ -281,11 +290,13 @@ static void timerlat_top_usage(char *usage)
 		"	  -c/--cpus cpus: run the tracer only on the given cpus",
 		"	  -d/--duration time[m|h|d]: duration of the session in seconds",
 		"	  -D/--debug: print debug info",
+		"	     --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)",
 		"	  -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
 		"	  -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
 		"	     --filter <command>: enable a trace event filter to the previous -e event",
 		"	     --trigger <command>: enable a trace event trigger to the previous -e event",
 		"	  -n/--nano: display data in nanoseconds",
+		"	     --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage",
 		"	  -q/--quiet print only a summary at the end",
 		"	     --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency",
 		"	  -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
@@ -349,13 +360,15 @@ static struct timerlat_top_params
 			{"trigger",		required_argument,	0, '0'},
 			{"filter",		required_argument,	0, '1'},
 			{"dma-latency",		required_argument,	0, '2'},
+			{"no-aa",		no_argument,		0, '3'},
+			{"dump-tasks",		no_argument,		0, '4'},
 			{0, 0, 0, 0}
 		};

 		/* getopt_long stores the option index here. */
 		int option_index = 0;

-		c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:",
+		c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:34",
 				 long_options, &option_index);

 		/* detect the end of the options. */
@@ -368,13 +381,13 @@ static struct timerlat_top_params

 			/* set thread stop to auto_thresh */
 			params->stop_total_us = auto_thresh;
+			params->stop_us = auto_thresh;

 			/* get stack trace */
 			params->print_stack = auto_thresh;

 			/* set trace */
 			params->trace_output = "timerlat_trace.txt";
-
 			break;
 		case 'c':
 			retval = parse_cpu_list(optarg, &params->monitored_cpus);
@@ -437,6 +450,7 @@ static struct timerlat_top_params
 				params->trace_output = &optarg[1];
 			else
 				params->trace_output = "timerlat_trace.txt";
+
 			break;
 		case '0': /* trigger */
 			if (params->events) {
@@ -467,6 +481,12 @@ static struct timerlat_top_params
 				exit(EXIT_FAILURE);
 			}
 			break;
+		case '3': /* no-aa */
+			params->no_aa = 1;
+			break;
+		case '4':
+			params->dump_tasks = 1;
+			break;
 		default:
 			timerlat_top_usage("Invalid option");
 		}
@@ -477,6 +497,12 @@ static struct timerlat_top_params
 		exit(EXIT_FAILURE);
 	}

+	/*
+	 * Auto analysis only happens if stop tracing, thus:
+	 */
+	if (!params->stop_us && !params->stop_total_us)
+		params->no_aa = 1;
+
 	return params;
 }

@@ -547,6 +573,7 @@ static struct osnoise_tool
 {
 	struct osnoise_tool *top;
 	int nr_cpus;
+	int retval;

 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);

@@ -563,6 +590,16 @@ static struct osnoise_tool
 	tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat",
 				   timerlat_top_handler, top);

+	/*
+	 * If no auto analysis, we are ready.
+	 */
+	if (params->no_aa)
+		return top;
+
+	retval = timerlat_aa_init(top, nr_cpus, params->dump_tasks);
+	if (retval)
+		goto out_err;
+
 	return top;

 out_err:
@@ -688,6 +725,10 @@ int timerlat_top_main(int argc, char *argv[])

 	if (trace_is_off(&top->trace, &record->trace)) {
 		printf("rtla timerlat hit stop tracing\n");
+
+		if (!params->no_aa)
+			timerlat_auto_analysis(params->stop_us, params->stop_total_us);
+
 		if (params->trace_output) {
 			printf("  Saving trace to %s\n", params->trace_output);
 			save_trace_to_file(record->trace.inst, params->trace_output);
@@ -701,6 +742,7 @@ int timerlat_top_main(int argc, char *argv[])
 	params->events = NULL;
 out_free:
 	timerlat_free_top(top->data);
+	timerlat_aa_destroy();
 	osnoise_destroy_tool(record);
 	osnoise_destroy_tool(top);
 	free(params);

--- a/tools/tracing/rtla/src/utils.h
+++ b/tools/tracing/rtla/src/utils.h
@@ -56,3 +56,6 @@ struct sched_attr {
 int parse_prio(char *arg, struct sched_attr *sched_param);
 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr);
 int set_cpu_dma_latency(int32_t latency);
+
+#define ns_to_usf(x) (((double)x/1000))
+#define ns_to_per(total, part) ((part * 100) / (double)total)
--- a/tools/verification/rv/src/in_kernel.c
+++ b/tools/verification/rv/src/in_kernel.c
@@ -519,7 +519,7 @@ static void ikm_usage_print_reactors(void)

 		start = ++end;
 		end = strstr(start, "\n");
-	};
+	}

 	fprintf(stderr, "\n");
 }