Commit 694a0195 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Instrument builds queue SQL execution duration

This commit adds a Prometheus histogram used to observe the time it
takes to execute a SQL query we use to retrieve builds queue from the
database.
parent f827d6ba
...@@ -24,7 +24,7 @@ module Ci ...@@ -24,7 +24,7 @@ module Ci
def execute(params = {}) def execute(params = {})
@metrics.increment_queue_operation(:queue_attempt) @metrics.increment_queue_operation(:queue_attempt)
@metrics.observe_queue_time do @metrics.observe_queue_time(:process) do
process_queue(params) process_queue(params)
end end
end end
...@@ -110,7 +110,7 @@ module Ci ...@@ -110,7 +110,7 @@ module Ci
end end
if Feature.enabled?(:ci_register_job_service_one_by_one, runner, default_enabled: true) if Feature.enabled?(:ci_register_job_service_one_by_one, runner, default_enabled: true)
build_ids = process_queue(-> { builds.pluck(:id) }) build_ids = retrieve_queue(-> { builds.pluck(:id) })
@metrics.observe_queue_size(-> { build_ids.size }) @metrics.observe_queue_size(-> { build_ids.size })
...@@ -118,7 +118,7 @@ module Ci ...@@ -118,7 +118,7 @@ module Ci
yield Ci::Build.find(build_id) yield Ci::Build.find(build_id)
end end
else else
builds = process_queue(-> { builds.to_a }) builds = retrieve_queue(-> { builds.to_a })
@metrics.observe_queue_size(-> { builds.size }) @metrics.observe_queue_size(-> { builds.size })
...@@ -127,8 +127,10 @@ module Ci ...@@ -127,8 +127,10 @@ module Ci
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
def process_queue(queue_query_proc) def retrieve_queue(queue_query_proc)
queue_query_proc.call @metrics.observe_queue_time(:retrieve) do
queue_query_proc.call
end
end end
def process_build(build, params) def process_build(build, params)
......
...@@ -9,8 +9,8 @@ module EE ...@@ -9,8 +9,8 @@ module EE
## ##
# We only stick a runner to primary database to be able to detect the # We only stick a runner to primary database to be able to detect the
# replication lag in `EE::Ci::RegisterJobService#execute`. The # replication lag in `EE::Ci::RegisterJobService#execute`. The
# intention here is not execute `Ci::RegisterJobService#execute` on the # intention here is not to execute `Ci::RegisterJobService#execute` on
# primary database. # the primary database.
# #
::Gitlab::Database::LoadBalancing::Sticking.stick(:runner, id) ::Gitlab::Database::LoadBalancing::Sticking.stick(:runner, id)
......
...@@ -10,7 +10,7 @@ module Gitlab ...@@ -10,7 +10,7 @@ module Gitlab
QUEUE_ACTIVE_RUNNERS_BUCKETS = [1, 3, 10, 30, 60, 300, 900, 1800, 3600].freeze QUEUE_ACTIVE_RUNNERS_BUCKETS = [1, 3, 10, 30, 60, 300, 900, 1800, 3600].freeze
QUEUE_DEPTH_TOTAL_BUCKETS = [1, 2, 3, 5, 8, 16, 32, 50, 100, 250, 500, 1000, 2000, 5000].freeze QUEUE_DEPTH_TOTAL_BUCKETS = [1, 2, 3, 5, 8, 16, 32, 50, 100, 250, 500, 1000, 2000, 5000].freeze
QUEUE_SIZE_TOTAL_BUCKETS = [1, 5, 10, 50, 100, 500, 1000, 2000, 5000].freeze QUEUE_SIZE_TOTAL_BUCKETS = [1, 5, 10, 50, 100, 500, 1000, 2000, 5000].freeze
QUEUE_ITERATION_DURATION_SECONDS_BUCKETS = [0.1, 0.3, 0.5, 1, 5, 10, 30, 60, 180, 300].freeze QUEUE_PROCESSING_DURATION_SECONDS_BUCKETS = [0.01, 0.05, 0.1, 0.3, 0.5, 1, 5, 10, 30, 60, 180, 300].freeze
METRICS_SHARD_TAG_PREFIX = 'metrics_shard::' METRICS_SHARD_TAG_PREFIX = 'metrics_shard::'
DEFAULT_METRICS_SHARD = 'default' DEFAULT_METRICS_SHARD = 'default'
...@@ -100,7 +100,7 @@ module Gitlab ...@@ -100,7 +100,7 @@ module Gitlab
self.class.queue_size_total.observe({}, size_proc.call.to_f) self.class.queue_size_total.observe({}, size_proc.call.to_f)
end end
def observe_queue_time def observe_queue_time(metric)
start_time = ::Gitlab::Metrics::System.monotonic_time start_time = ::Gitlab::Metrics::System.monotonic_time
result = yield result = yield
...@@ -108,7 +108,15 @@ module Gitlab ...@@ -108,7 +108,15 @@ module Gitlab
return result unless Feature.enabled?(:gitlab_ci_builds_queuing_metrics, default_enabled: false) return result unless Feature.enabled?(:gitlab_ci_builds_queuing_metrics, default_enabled: false)
seconds = ::Gitlab::Metrics::System.monotonic_time - start_time seconds = ::Gitlab::Metrics::System.monotonic_time - start_time
self.class.queue_iteration_duration_seconds.observe({}, seconds.to_f)
case metric
when :process
self.class.queue_iteration_duration_seconds.observe({}, seconds.to_f)
when :retrieve
self.class.queue_retrieval_duration_seconds.observe({}, seconds.to_f)
else
raise ArgumentError unless Rails.env.production?
end
result result
end end
...@@ -187,7 +195,18 @@ module Gitlab ...@@ -187,7 +195,18 @@ module Gitlab
strong_memoize(:queue_iteration_duration_seconds) do strong_memoize(:queue_iteration_duration_seconds) do
name = :gitlab_ci_queue_iteration_duration_seconds name = :gitlab_ci_queue_iteration_duration_seconds
comment = 'Time it takes to find a build in CI/CD queue' comment = 'Time it takes to find a build in CI/CD queue'
buckets = QUEUE_ITERATION_DURATION_SECONDS_BUCKETS buckets = QUEUE_PROCESSING_DURATION_SECONDS_BUCKETS
labels = {}
Gitlab::Metrics.histogram(name, comment, labels, buckets)
end
end
def self.queue_retrieval_duration_seconds
strong_memoize(:queue_retrival_duration_seconds) do
name = :gitlab_ci_queue_retrival_duration_seconds
comment = 'Time it takes to execute a SQL query to retrieve builds queue'
buckets = QUEUE_PROCESSING_DURATION_SECONDS_BUCKETS
labels = {} labels = {}
Gitlab::Metrics.histogram(name, comment, labels, buckets) Gitlab::Metrics.histogram(name, comment, labels, buckets)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment