Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
abc0e8e6
Commit
abc0e8e6
authored
Jul 01, 2020
by
Matthias Käppler
Committed by
Ash McKenzie
Jul 01, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Query recorded metrics instead of ad-hoc
See
https://gitlab.com/gitlab-org/omnibus-gitlab/-/merge_requests/4343
parent
731bd999
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
155 additions
and
91 deletions
+155
-91
lib/gitlab/usage_data_concerns/topology.rb
lib/gitlab/usage_data_concerns/topology.rb
+66
-51
lib/gitlab/utils/usage_data.rb
lib/gitlab/utils/usage_data.rb
+5
-5
spec/lib/gitlab/usage_data_concerns/topology_spec.rb
spec/lib/gitlab/usage_data_concerns/topology_spec.rb
+75
-34
spec/lib/gitlab/utils/usage_data_spec.rb
spec/lib/gitlab/utils/usage_data_spec.rb
+9
-1
No files found.
lib/gitlab/usage_data_concerns/topology.rb
View file @
abc0e8e6
...
...
@@ -18,55 +18,70 @@ module Gitlab
def
topology_usage_data
topology_data
,
duration
=
measure_duration
do
alt_usage_data
(
fallback:
{})
do
{
nodes:
topology_node_data
}.
compact
end
alt_usage_data
(
fallback:
{})
{
topology_fetch_all_data
}
end
{
topology:
topology_data
.
merge
(
duration_s:
duration
)
}
end
private
def
topology_node_data
with_prometheus_client
do
|
client
|
# node-level data
by_instance_mem
=
topology_node_memory
(
client
)
by_instance_cpus
=
topology_node_cpus
(
client
)
# service-level data
by_instance_by_job_by_metric_memory
=
topology_all_service_memory
(
client
)
by_instance_by_job_process_count
=
topology_all_service_process_count
(
client
)
instances
=
Set
.
new
(
by_instance_mem
.
keys
+
by_instance_cpus
.
keys
)
instances
.
map
do
|
instance
|
{
node_memory_total_bytes:
by_instance_mem
[
instance
],
node_cpus:
by_instance_cpus
[
instance
],
node_services:
topology_node_services
(
instance
,
by_instance_by_job_process_count
,
by_instance_by_job_by_metric_memory
)
}.
compact
end
def
topology_fetch_all_data
with_prometheus_client
(
fallback:
{})
do
|
client
|
{
nodes:
topology_node_data
(
client
)
}
end
end
def
topology_node_data
(
client
)
# node-level data
by_instance_mem
=
topology_node_memory
(
client
)
by_instance_cpus
=
topology_node_cpus
(
client
)
# service-level data
by_instance_by_job_by_type_memory
=
topology_all_service_memory
(
client
)
by_instance_by_job_process_count
=
topology_all_service_process_count
(
client
)
instances
=
Set
.
new
(
by_instance_mem
.
keys
+
by_instance_cpus
.
keys
)
instances
.
map
do
|
instance
|
{
node_memory_total_bytes:
by_instance_mem
[
instance
],
node_cpus:
by_instance_cpus
[
instance
],
node_services:
topology_node_services
(
instance
,
by_instance_by_job_process_count
,
by_instance_by_job_by_type_memory
)
}.
compact
end
end
def
topology_node_memory
(
client
)
aggregate_
single
(
client
,
'avg (node_memory_MemTotal_bytes) by (instance)
'
)
aggregate_
by_instance
(
client
,
'gitlab_usage_ping:node_memory_total_bytes:avg
'
)
end
def
topology_node_cpus
(
client
)
aggregate_
single
(
client
,
'count (node_cpu_seconds_total{mode="idle"}) by (instance)
'
)
aggregate_
by_instance
(
client
,
'gitlab_usage_ping:node_cpus:count
'
)
end
def
topology_all_service_memory
(
client
)
aggregate_many
(
client
,
'avg ({__name__ =~ "(ruby_){0,1}process_(resident|unique|proportional)_memory_bytes", job != "gitlab_exporter_process"}) by (instance, job, __name__)'
)
{
rss:
topology_service_memory_rss
(
client
),
uss:
topology_service_memory_uss
(
client
),
pss:
topology_service_memory_pss
(
client
)
}
end
def
topology_service_memory_rss
(
client
)
aggregate_by_labels
(
client
,
'gitlab_usage_ping:node_service_process_resident_memory_bytes:avg'
)
end
def
topology_service_memory_uss
(
client
)
aggregate_by_labels
(
client
,
'gitlab_usage_ping:node_service_process_unique_memory_bytes:avg'
)
end
def
topology_service_memory_pss
(
client
)
aggregate_by_labels
(
client
,
'gitlab_usage_ping:node_service_process_proportional_memory_bytes:avg'
)
end
def
topology_all_service_process_count
(
client
)
aggregate_
many
(
client
,
'count ({__name__ =~ "(ruby_){0,1}process_start_time_seconds", job != "gitlab_exporter_process"}) by (instance, job)
'
)
aggregate_
by_labels
(
client
,
'gitlab_usage_ping:node_service_process:count
'
)
end
def
topology_node_services
(
instance
,
all_process_counts
,
all_process_memory
)
...
...
@@ -92,24 +107,21 @@ module Gitlab
end
end
def
topology_instance_service_memory
(
instance
,
all_instance_data
)
topology_data_for_instance
(
instance
,
all_instance_data
).
each_with_object
({})
do
|
entry
,
hash
|
metric
,
memory
=
entry
job
=
metric
[
'job'
]
key
=
case
metric
[
'__name__'
]
when
match_process_memory_metric_for_type
(
'resident'
)
then
:process_memory_rss
when
match_process_memory_metric_for_type
(
'unique'
)
then
:process_memory_uss
when
match_process_memory_metric_for_type
(
'proportional'
)
then
:process_memory_pss
end
hash
[
job
]
||=
{}
hash
[
job
][
key
]
||=
memory
# Given a hash mapping memory set types to Prometheus response data, returns a hash
# mapping instance/node names to services and their respective memory use in bytes
def
topology_instance_service_memory
(
instance
,
instance_data_by_type
)
result
=
{}
instance_data_by_type
.
each
do
|
memory_type
,
instance_data
|
topology_data_for_instance
(
instance
,
instance_data
).
each
do
|
metric
,
memory_bytes
|
job
=
metric
[
'job'
]
key
=
"process_memory_
#{
memory_type
}
"
.
to_sym
result
[
job
]
||=
{}
result
[
job
][
key
]
||=
memory_bytes
end
end
end
def
match_process_memory_metric_for_type
(
type
)
/(ruby_){0,1}process_
#{
type
}
_memory_bytes/
result
end
def
topology_data_for_instance
(
instance
,
all_instance_data
)
...
...
@@ -120,14 +132,17 @@ module Gitlab
instance
.
gsub
(
/:.+$/
,
''
)
end
# Will retain a single `instance` key that values are mapped to
def
aggregate_single
(
client
,
query
)
client
.
aggregate
(
query
)
{
|
metric
|
drop_port
(
metric
[
'instance'
])
}
def
one_week_average
(
query
)
"avg_over_time (
#{
query
}
[1w])"
end
def
aggregate_by_instance
(
client
,
query
)
client
.
aggregate
(
one_week_average
(
query
))
{
|
metric
|
drop_port
(
metric
[
'instance'
])
}
end
# Will retain a composite key that values are mapped to
def
aggregate_
many
(
client
,
query
)
client
.
aggregate
(
query
)
do
|
metric
|
def
aggregate_
by_labels
(
client
,
query
)
client
.
aggregate
(
one_week_average
(
query
)
)
do
|
metric
|
metric
[
'instance'
]
=
drop_port
(
metric
[
'instance'
])
metric
end
...
...
lib/gitlab/utils/usage_data.rb
View file @
abc0e8e6
...
...
@@ -77,11 +77,11 @@ module Gitlab
end
end
def
with_prometheus_client
if
Gitlab
::
Prometheus
::
Internal
.
prometheus_enabled?
prometheus_address
=
Gitlab
::
Prometheus
::
Internal
.
uri
yield
Gitlab
::
PrometheusClient
.
new
(
prometheus_address
,
allow_local_requests:
true
)
end
def
with_prometheus_client
(
fallback:
nil
)
return
fallback
unless
Gitlab
::
Prometheus
::
Internal
.
prometheus_enabled?
prometheus_address
=
Gitlab
::
Prometheus
::
Internal
.
uri
yield
Gitlab
::
PrometheusClient
.
new
(
prometheus_address
,
allow_local_requests:
true
)
end
def
measure_duration
...
...
spec/lib/gitlab/usage_data_concerns/topology_spec.rb
View file @
abc0e8e6
...
...
@@ -19,18 +19,14 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
expect
(
Gitlab
::
Prometheus
::
Internal
).
to
receive
(
:uri
).
and_return
(
'http://prom:9090'
)
end
it
'contains a topology element'
do
allow_prometheus_queries
expect
(
subject
).
to
have_key
(
:topology
)
end
context
'tracking node metrics'
do
it
'contains node level metrics for each instance'
do
expect_prometheus_api_to
(
receive_node_memory_query
,
receive_node_cpu_count_query
,
receive_node_service_memory_query
,
receive_node_service_memory_rss_query
,
receive_node_service_memory_uss_query
,
receive_node_service_memory_pss_query
,
receive_node_service_process_count_query
)
...
...
@@ -82,19 +78,51 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
expect_prometheus_api_to
(
receive_node_memory_query
(
result:
[]),
receive_node_cpu_count_query
,
receive_node_service_memory_query
,
receive_node_service_memory_rss_query
(
result:
[]),
receive_node_service_memory_uss_query
(
result:
[]),
receive_node_service_memory_pss_query
,
receive_node_service_process_count_query
)
keys
=
subject
[
:topology
][
:nodes
].
flat_map
(
&
:keys
)
expect
(
keys
).
not_to
include
(
:node_memory_total_bytes
)
expect
(
keys
).
to
include
(
:node_cpus
,
:node_services
)
expect
(
subject
[
:topology
]).
to
eq
({
duration_s:
0
,
nodes:
[
{
node_cpus:
16
,
node_services:
[
{
name:
'sidekiq'
,
process_count:
15
,
process_memory_pss:
401
},
{
name:
'redis'
,
process_count:
1
}
]
},
{
node_cpus:
8
,
node_services:
[
{
name:
'web'
,
process_count:
10
,
process_memory_pss:
302
},
{
name:
'sidekiq'
,
process_count:
5
}
]
}
]
})
end
end
context
'and no results are found'
do
it
'does not report anything'
do
expect_prometheus_api_to
receive
(
:
aggregate
).
at_least
(
:once
).
and_return
({})
expect_prometheus_api_to
receive
(
:
query
).
at_least
(
:once
).
and_return
({})
expect
(
subject
[
:topology
]).
to
eq
({
duration_s:
0
,
...
...
@@ -105,7 +133,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
context
'and a connection error is raised'
do
it
'does not report anything'
do
expect_prometheus_api_to
receive
(
:
aggregate
).
and_raise
(
'Connection failed'
)
expect_prometheus_api_to
receive
(
:
query
).
and_raise
(
'Connection failed'
)
expect
(
subject
[
:topology
]).
to
eq
({
duration_s:
0
})
end
...
...
@@ -123,7 +151,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
def
receive_node_memory_query
(
result:
nil
)
receive
(
:query
)
.
with
(
/node_memory_
MemT
otal_bytes/
,
an_instance_of
(
Hash
))
.
with
(
/node_memory_
t
otal_bytes/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
},
...
...
@@ -138,7 +166,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
def
receive_node_cpu_count_query
(
result:
nil
)
receive
(
:query
)
.
with
(
/node_cpu
_seconds_total
/
,
an_instance_of
(
Hash
))
.
with
(
/node_cpu
s
/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
{
'metric'
=>
{
'instance'
=>
'instance2:8090'
},
...
...
@@ -151,46 +179,59 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
])
end
def
receive_node_service_memory_query
(
result:
nil
)
def
receive_node_service_memory_
rss_
query
(
result:
nil
)
receive
(
:query
)
.
with
(
/process_
.+
_memory_bytes/
,
an_instance_of
(
Hash
))
.
with
(
/process_
resident
_memory_bytes/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
# instance 1: runs Puma + a small Sidekiq
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
,
'__name__'
=>
'ruby_process_resident_memory_bytes'
},
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
},
'value'
=>
[
1000
,
'300'
]
},
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
,
'__name__'
=>
'ruby_process_unique_memory_bytes'
},
'value'
=>
[
1000
,
'301'
]
},
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
,
'__name__'
=>
'ruby_process_proportional_memory_bytes'
},
'value'
=>
[
1000
,
'302'
]
},
{
'metric'
=>
{
'instance'
=>
'instance1:8090'
,
'job'
=>
'gitlab-sidekiq'
,
'__name__'
=>
'ruby_process_resident_memory_bytes'
},
'metric'
=>
{
'instance'
=>
'instance1:8090'
,
'job'
=>
'gitlab-sidekiq'
},
'value'
=>
[
1000
,
'303'
]
},
# instance 2: runs a dedicated Sidekiq + Redis (which uses a different metric name)
{
'metric'
=>
{
'instance'
=>
'instance2:8090'
,
'job'
=>
'gitlab-sidekiq'
,
'__name__'
=>
'ruby_process_resident_memory_bytes'
},
'metric'
=>
{
'instance'
=>
'instance2:8090'
,
'job'
=>
'gitlab-sidekiq'
},
'value'
=>
[
1000
,
'400'
]
},
{
'metric'
=>
{
'instance'
=>
'instance2:8090'
,
'job'
=>
'gitlab-sidekiq'
,
'__name__'
=>
'ruby_process_proportional_memory_bytes'
},
'value'
=>
[
1000
,
'401'
]
'metric'
=>
{
'instance'
=>
'instance2:9121'
,
'job'
=>
'redis'
},
'value'
=>
[
1000
,
'402'
]
}
])
end
def
receive_node_service_memory_uss_query
(
result:
nil
)
receive
(
:query
)
.
with
(
/process_unique_memory_bytes/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
},
'value'
=>
[
1000
,
'301'
]
}
])
end
def
receive_node_service_memory_pss_query
(
result:
nil
)
receive
(
:query
)
.
with
(
/process_proportional_memory_bytes/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
},
'value'
=>
[
1000
,
'302'
]
},
{
'metric'
=>
{
'instance'
=>
'instance2:
9121'
,
'job'
=>
'redis'
,
'__name__'
=>
'process_resident_memory_bytes
'
},
'value'
=>
[
1000
,
'40
2
'
]
'metric'
=>
{
'instance'
=>
'instance2:
8090'
,
'job'
=>
'gitlab-sidekiq
'
},
'value'
=>
[
1000
,
'40
1
'
]
}
])
end
def
receive_node_service_process_count_query
(
result:
nil
)
receive
(
:query
)
.
with
(
/
process_start_time_seconds
/
,
an_instance_of
(
Hash
))
.
with
(
/
service_process:count
/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
# instance 1
{
...
...
spec/lib/gitlab/utils/usage_data_spec.rb
View file @
abc0e8e6
...
...
@@ -88,13 +88,21 @@ RSpec.describe Gitlab::Utils::UsageData do
end
context
'when Prometheus is disabled'
do
it
'returns nil'
do
before
do
expect
(
Gitlab
::
Prometheus
::
Internal
).
to
receive
(
:prometheus_enabled?
).
and_return
(
false
)
end
it
'returns nil by default'
do
result
=
described_class
.
with_prometheus_client
{
|
client
|
client
}
expect
(
result
).
to
be
nil
end
it
'returns fallback if provided'
do
result
=
described_class
.
with_prometheus_client
(
fallback:
[])
{
|
client
|
client
}
expect
(
result
).
to
eq
([])
end
end
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment