Merge branch 'mk/stackprof-object-support' into 'master'

Add support for other stackprof profiles See merge request gitlab-org/gitlab!45277

Merge branch 'mk/stackprof-object-support' into 'master'
Add support for other stackprof profiles See merge request gitlab-org/gitlab!45277
ea70a6e4 · Markus Koller · 0b98d751 · 9951c27c · ea70a6e4 · ea70a6e4
Commit ea70a6e4 authored Oct 19, 2020 by Markus Koller
3 changed files
--- a/changelogs/unreleased/mk-stackprof-object-support.yml
+++ b/changelogs/unreleased/mk-stackprof-object-support.yml
+---
+title: Support all stackprof profiling modes
+merge_request: 45277
+author:
+type: changed
--- a/config/initializers/stackprof.rb
+++ b/config/initializers/stackprof.rb
@@ -2,14 +2,18 @@
 # trigger stackprof by sending a SIGUSR2 signal
 #
-# default settings:
+# Docs: https://docs.gitlab.com/ee/development/performance.html#production
-# * collect raw samples
-# * sample at 100hz (every 10k microseconds)
-# * timeout profile after 30 seconds
-# * write to $TMPDIR/stackprof.$PID.$RAND.profile
 module Gitlab
  class StackProf
+    DEFAULT_FILE_PREFIX = Dir.tmpdir
+    DEFAULT_TIMEOUT_SEC = 30
+    DEFAULT_MODE = :cpu
+    # Sample interval as a frequency in microseconds (~100hz); appropriate for CPU profiles
+    DEFAULT_INTERVAL_US = 10_000
+    # Sample interval in event occurrences (n = every nth event); appropriate for allocation profiles
+    DEFAULT_INTERVAL_EVENTS = 1_000
    # this is a workaround for sidekiq, which defines its own SIGUSR2 handler.
    # by defering to the sidekiq startup event, we get to set up our own
    # handler late enough.
@@ -32,11 +36,7 @@ module Gitlab
    end
    def self.on_worker_start
-      Gitlab::AppJsonLogger.info(
+      log_event('listening for SIGUSR2 signal')
-        event: "stackprof",
-        message: "listening on SIGUSR2 signal",
-        pid: Process.pid
-      )
      # create a pipe in order to propagate signal out of the signal handler
      # see also: https://cr.yp.to/docs/selfpipe.html
@@ -55,43 +55,46 @@ module Gitlab
      # a given interval (by default 30 seconds), avoiding unbounded memory
      # growth from a profile that was started and never stopped.
      t = Thread.new do
-        timeout_s = ENV['STACKPROF_TIMEOUT_S']&.to_i || 30
+        timeout_s = ENV['STACKPROF_TIMEOUT_S']&.to_i || DEFAULT_TIMEOUT_SEC
        current_timeout_s = nil
        loop do
-          got_value = IO.select([read], nil, nil, current_timeout_s)
+          read.getbyte if IO.select([read], nil, nil, current_timeout_s)
-          read.getbyte if got_value
          if ::StackProf.running?
-            stackprof_file_prefix = ENV['STACKPROF_FILE_PREFIX'] || Dir.tmpdir
+            stackprof_file_prefix = ENV['STACKPROF_FILE_PREFIX'] || DEFAULT_FILE_PREFIX
            stackprof_out_file = "#{stackprof_file_prefix}/stackprof.#{Process.pid}.#{SecureRandom.hex(6)}.profile"
-            Gitlab::AppJsonLogger.info(
+            log_event(
-              event: "stackprof",
+              'stopping profile',
-              message: "stopping profile",
+              profile_filename: stackprof_out_file,
-              output_filename: stackprof_out_file,
+              profile_timeout_s: timeout_s
-              pid: Process.pid,
-              timeout_s: timeout_s,
-              timed_out: got_value.nil?
            )
            ::StackProf.stop
            ::StackProf.results(stackprof_out_file)
            current_timeout_s = nil
          else
-            Gitlab::AppJsonLogger.info(
+            mode = ENV['STACKPROF_MODE']&.to_sym || DEFAULT_MODE
-              event: "stackprof",
+            interval = ENV['STACKPROF_INTERVAL']&.to_i
-              message: "starting profile",
+            interval ||= (mode == :object ? DEFAULT_INTERVAL_EVENTS : DEFAULT_INTERVAL_US)
-              pid: Process.pid
+            log_event(
+              'starting profile',
+              profile_mode: mode,
+              profile_interval: interval,
+              profile_timeout: timeout_s
            )
            ::StackProf.start(
-              mode: :cpu,
+              mode: mode,
              raw: Gitlab::Utils.to_boolean(ENV['STACKPROF_RAW'] || 'true'),
-              interval: ENV['STACKPROF_INTERVAL_US']&.to_i || 10_000
+              interval: interval
            )
            current_timeout_s = timeout_s
          end
        end
+      rescue => e
+        log_event("stackprof failed: #{e}")
      end
      t.abort_on_exception = true
@@ -121,6 +124,14 @@ module Gitlab
        write.write('.')
      end
    end
+    def self.log_event(event, labels = {})
+      Gitlab::AppJsonLogger.info({
+        event: 'stackprof',
+        message: event,
+        pid: Process.pid
+      }.merge(labels.compact))
+    end
  end
 end

--- a/doc/development/performance.md
+++ b/doc/development/performance.md
@@ -247,8 +247,12 @@ The following configuration options can be configured:
 - `STACKPROF_ENABLED`: Enables stackprof signal handler on SIGUSR2 signal.
  Defaults to `false`.
- `STACKPROF_INTERVAL_US`: Sampling interval in microseconds. Defaults to
+- `STACKPROF_MODE`: See [sampling modes](https://github.com/tmm1/stackprof#sampling).
-  `10000` μs (100hz).
+  Defaults to `cpu`.
+- `STACKPROF_INTERVAL`: Sampling interval. Unit semantics depend on `STACKPROF_MODE`.
+  For `object` mode this is a per-event interval (every `n`th event will be sampled)
+  and defaults to `1000`.
+  For other modes such as `cpu` this is a frequency and defaults to `10000` μs (100hz).
 - `STACKPROF_FILE_PREFIX`: File path prefix where profiles are stored. Defaults
  to `$TMPDIR` (often corresponds to `/tmp`).
 - `STACKPROF_TIMEOUT_S`: Profiling timeout in seconds. Profiling will