Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
abc0e8e6
Commit
abc0e8e6
authored
Jul 01, 2020
by
Matthias Käppler
Committed by
Ash McKenzie
Jul 01, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Query recorded metrics instead of ad-hoc
See
https://gitlab.com/gitlab-org/omnibus-gitlab/-/merge_requests/4343
parent
731bd999
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
155 additions
and
91 deletions
+155
-91
lib/gitlab/usage_data_concerns/topology.rb
lib/gitlab/usage_data_concerns/topology.rb
+66
-51
lib/gitlab/utils/usage_data.rb
lib/gitlab/utils/usage_data.rb
+5
-5
spec/lib/gitlab/usage_data_concerns/topology_spec.rb
spec/lib/gitlab/usage_data_concerns/topology_spec.rb
+75
-34
spec/lib/gitlab/utils/usage_data_spec.rb
spec/lib/gitlab/utils/usage_data_spec.rb
+9
-1
No files found.
lib/gitlab/usage_data_concerns/topology.rb
View file @
abc0e8e6
...
@@ -18,24 +18,27 @@ module Gitlab
...
@@ -18,24 +18,27 @@ module Gitlab
def
topology_usage_data
def
topology_usage_data
topology_data
,
duration
=
measure_duration
do
topology_data
,
duration
=
measure_duration
do
alt_usage_data
(
fallback:
{})
do
alt_usage_data
(
fallback:
{})
{
topology_fetch_all_data
}
{
nodes:
topology_node_data
}.
compact
end
end
end
{
topology:
topology_data
.
merge
(
duration_s:
duration
)
}
{
topology:
topology_data
.
merge
(
duration_s:
duration
)
}
end
end
private
private
def
topology_node_data
def
topology_fetch_all_data
with_prometheus_client
do
|
client
|
with_prometheus_client
(
fallback:
{})
do
|
client
|
{
nodes:
topology_node_data
(
client
)
}
end
end
def
topology_node_data
(
client
)
# node-level data
# node-level data
by_instance_mem
=
topology_node_memory
(
client
)
by_instance_mem
=
topology_node_memory
(
client
)
by_instance_cpus
=
topology_node_cpus
(
client
)
by_instance_cpus
=
topology_node_cpus
(
client
)
# service-level data
# service-level data
by_instance_by_job_by_metric
_memory
=
topology_all_service_memory
(
client
)
by_instance_by_job_by_type
_memory
=
topology_all_service_memory
(
client
)
by_instance_by_job_process_count
=
topology_all_service_process_count
(
client
)
by_instance_by_job_process_count
=
topology_all_service_process_count
(
client
)
instances
=
Set
.
new
(
by_instance_mem
.
keys
+
by_instance_cpus
.
keys
)
instances
=
Set
.
new
(
by_instance_mem
.
keys
+
by_instance_cpus
.
keys
)
...
@@ -44,29 +47,41 @@ module Gitlab
...
@@ -44,29 +47,41 @@ module Gitlab
node_memory_total_bytes:
by_instance_mem
[
instance
],
node_memory_total_bytes:
by_instance_mem
[
instance
],
node_cpus:
by_instance_cpus
[
instance
],
node_cpus:
by_instance_cpus
[
instance
],
node_services:
node_services:
topology_node_services
(
instance
,
by_instance_by_job_process_count
,
by_instance_by_job_by_metric
_memory
)
topology_node_services
(
instance
,
by_instance_by_job_process_count
,
by_instance_by_job_by_type
_memory
)
}.
compact
}.
compact
end
end
end
end
end
def
topology_node_memory
(
client
)
def
topology_node_memory
(
client
)
aggregate_
single
(
client
,
'avg (node_memory_MemTotal_bytes) by (instance)
'
)
aggregate_
by_instance
(
client
,
'gitlab_usage_ping:node_memory_total_bytes:avg
'
)
end
end
def
topology_node_cpus
(
client
)
def
topology_node_cpus
(
client
)
aggregate_
single
(
client
,
'count (node_cpu_seconds_total{mode="idle"}) by (instance)
'
)
aggregate_
by_instance
(
client
,
'gitlab_usage_ping:node_cpus:count
'
)
end
end
def
topology_all_service_memory
(
client
)
def
topology_all_service_memory
(
client
)
aggregate_many
(
{
client
,
rss:
topology_service_memory_rss
(
client
),
'avg ({__name__ =~ "(ruby_){0,1}process_(resident|unique|proportional)_memory_bytes", job != "gitlab_exporter_process"}) by (instance, job, __name__)'
uss:
topology_service_memory_uss
(
client
),
)
pss:
topology_service_memory_pss
(
client
)
}
end
def
topology_service_memory_rss
(
client
)
aggregate_by_labels
(
client
,
'gitlab_usage_ping:node_service_process_resident_memory_bytes:avg'
)
end
def
topology_service_memory_uss
(
client
)
aggregate_by_labels
(
client
,
'gitlab_usage_ping:node_service_process_unique_memory_bytes:avg'
)
end
def
topology_service_memory_pss
(
client
)
aggregate_by_labels
(
client
,
'gitlab_usage_ping:node_service_process_proportional_memory_bytes:avg'
)
end
end
def
topology_all_service_process_count
(
client
)
def
topology_all_service_process_count
(
client
)
aggregate_
many
(
client
,
'count ({__name__ =~ "(ruby_){0,1}process_start_time_seconds", job != "gitlab_exporter_process"}) by (instance, job)
'
)
aggregate_
by_labels
(
client
,
'gitlab_usage_ping:node_service_process:count
'
)
end
end
def
topology_node_services
(
instance
,
all_process_counts
,
all_process_memory
)
def
topology_node_services
(
instance
,
all_process_counts
,
all_process_memory
)
...
@@ -92,24 +107,21 @@ module Gitlab
...
@@ -92,24 +107,21 @@ module Gitlab
end
end
end
end
def
topology_instance_service_memory
(
instance
,
all_instance_data
)
# Given a hash mapping memory set types to Prometheus response data, returns a hash
topology_data_for_instance
(
instance
,
all_instance_data
).
each_with_object
({})
do
|
entry
,
hash
|
# mapping instance/node names to services and their respective memory use in bytes
metric
,
memory
=
entry
def
topology_instance_service_memory
(
instance
,
instance_data_by_type
)
result
=
{}
instance_data_by_type
.
each
do
|
memory_type
,
instance_data
|
topology_data_for_instance
(
instance
,
instance_data
).
each
do
|
metric
,
memory_bytes
|
job
=
metric
[
'job'
]
job
=
metric
[
'job'
]
key
=
key
=
"process_memory_
#{
memory_type
}
"
.
to_sym
case
metric
[
'__name__'
]
when
match_process_memory_metric_for_type
(
'resident'
)
then
:process_memory_rss
when
match_process_memory_metric_for_type
(
'unique'
)
then
:process_memory_uss
when
match_process_memory_metric_for_type
(
'proportional'
)
then
:process_memory_pss
end
hash
[
job
]
||=
{}
result
[
job
]
||=
{}
hash
[
job
][
key
]
||=
memory
result
[
job
][
key
]
||=
memory_bytes
end
end
end
end
def
match_process_memory_metric_for_type
(
type
)
result
/(ruby_){0,1}process_
#{
type
}
_memory_bytes/
end
end
def
topology_data_for_instance
(
instance
,
all_instance_data
)
def
topology_data_for_instance
(
instance
,
all_instance_data
)
...
@@ -120,14 +132,17 @@ module Gitlab
...
@@ -120,14 +132,17 @@ module Gitlab
instance
.
gsub
(
/:.+$/
,
''
)
instance
.
gsub
(
/:.+$/
,
''
)
end
end
# Will retain a single `instance` key that values are mapped to
def
one_week_average
(
query
)
def
aggregate_single
(
client
,
query
)
"avg_over_time (
#{
query
}
[1w])"
client
.
aggregate
(
query
)
{
|
metric
|
drop_port
(
metric
[
'instance'
])
}
end
def
aggregate_by_instance
(
client
,
query
)
client
.
aggregate
(
one_week_average
(
query
))
{
|
metric
|
drop_port
(
metric
[
'instance'
])
}
end
end
# Will retain a composite key that values are mapped to
# Will retain a composite key that values are mapped to
def
aggregate_
many
(
client
,
query
)
def
aggregate_
by_labels
(
client
,
query
)
client
.
aggregate
(
query
)
do
|
metric
|
client
.
aggregate
(
one_week_average
(
query
)
)
do
|
metric
|
metric
[
'instance'
]
=
drop_port
(
metric
[
'instance'
])
metric
[
'instance'
]
=
drop_port
(
metric
[
'instance'
])
metric
metric
end
end
...
...
lib/gitlab/utils/usage_data.rb
View file @
abc0e8e6
...
@@ -77,12 +77,12 @@ module Gitlab
...
@@ -77,12 +77,12 @@ module Gitlab
end
end
end
end
def
with_prometheus_client
def
with_prometheus_client
(
fallback:
nil
)
if
Gitlab
::
Prometheus
::
Internal
.
prometheus_enabled?
return
fallback
unless
Gitlab
::
Prometheus
::
Internal
.
prometheus_enabled?
prometheus_address
=
Gitlab
::
Prometheus
::
Internal
.
uri
prometheus_address
=
Gitlab
::
Prometheus
::
Internal
.
uri
yield
Gitlab
::
PrometheusClient
.
new
(
prometheus_address
,
allow_local_requests:
true
)
yield
Gitlab
::
PrometheusClient
.
new
(
prometheus_address
,
allow_local_requests:
true
)
end
end
end
def
measure_duration
def
measure_duration
result
=
nil
result
=
nil
...
...
spec/lib/gitlab/usage_data_concerns/topology_spec.rb
View file @
abc0e8e6
...
@@ -19,18 +19,14 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
...
@@ -19,18 +19,14 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
expect
(
Gitlab
::
Prometheus
::
Internal
).
to
receive
(
:uri
).
and_return
(
'http://prom:9090'
)
expect
(
Gitlab
::
Prometheus
::
Internal
).
to
receive
(
:uri
).
and_return
(
'http://prom:9090'
)
end
end
it
'contains a topology element'
do
allow_prometheus_queries
expect
(
subject
).
to
have_key
(
:topology
)
end
context
'tracking node metrics'
do
context
'tracking node metrics'
do
it
'contains node level metrics for each instance'
do
it
'contains node level metrics for each instance'
do
expect_prometheus_api_to
(
expect_prometheus_api_to
(
receive_node_memory_query
,
receive_node_memory_query
,
receive_node_cpu_count_query
,
receive_node_cpu_count_query
,
receive_node_service_memory_query
,
receive_node_service_memory_rss_query
,
receive_node_service_memory_uss_query
,
receive_node_service_memory_pss_query
,
receive_node_service_process_count_query
receive_node_service_process_count_query
)
)
...
@@ -82,19 +78,51 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
...
@@ -82,19 +78,51 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
expect_prometheus_api_to
(
expect_prometheus_api_to
(
receive_node_memory_query
(
result:
[]),
receive_node_memory_query
(
result:
[]),
receive_node_cpu_count_query
,
receive_node_cpu_count_query
,
receive_node_service_memory_query
,
receive_node_service_memory_rss_query
(
result:
[]),
receive_node_service_memory_uss_query
(
result:
[]),
receive_node_service_memory_pss_query
,
receive_node_service_process_count_query
receive_node_service_process_count_query
)
)
keys
=
subject
[
:topology
][
:nodes
].
flat_map
(
&
:keys
)
expect
(
subject
[
:topology
]).
to
eq
({
expect
(
keys
).
not_to
include
(
:node_memory_total_bytes
)
duration_s:
0
,
expect
(
keys
).
to
include
(
:node_cpus
,
:node_services
)
nodes:
[
{
node_cpus:
16
,
node_services:
[
{
name:
'sidekiq'
,
process_count:
15
,
process_memory_pss:
401
},
{
name:
'redis'
,
process_count:
1
}
]
},
{
node_cpus:
8
,
node_services:
[
{
name:
'web'
,
process_count:
10
,
process_memory_pss:
302
},
{
name:
'sidekiq'
,
process_count:
5
}
]
}
]
})
end
end
end
end
context
'and no results are found'
do
context
'and no results are found'
do
it
'does not report anything'
do
it
'does not report anything'
do
expect_prometheus_api_to
receive
(
:
aggregate
).
at_least
(
:once
).
and_return
({})
expect_prometheus_api_to
receive
(
:
query
).
at_least
(
:once
).
and_return
({})
expect
(
subject
[
:topology
]).
to
eq
({
expect
(
subject
[
:topology
]).
to
eq
({
duration_s:
0
,
duration_s:
0
,
...
@@ -105,7 +133,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
...
@@ -105,7 +133,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
context
'and a connection error is raised'
do
context
'and a connection error is raised'
do
it
'does not report anything'
do
it
'does not report anything'
do
expect_prometheus_api_to
receive
(
:
aggregate
).
and_raise
(
'Connection failed'
)
expect_prometheus_api_to
receive
(
:
query
).
and_raise
(
'Connection failed'
)
expect
(
subject
[
:topology
]).
to
eq
({
duration_s:
0
})
expect
(
subject
[
:topology
]).
to
eq
({
duration_s:
0
})
end
end
...
@@ -123,7 +151,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
...
@@ -123,7 +151,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
def
receive_node_memory_query
(
result:
nil
)
def
receive_node_memory_query
(
result:
nil
)
receive
(
:query
)
receive
(
:query
)
.
with
(
/node_memory_
MemT
otal_bytes/
,
an_instance_of
(
Hash
))
.
with
(
/node_memory_
t
otal_bytes/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
.
and_return
(
result
||
[
{
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
},
'metric'
=>
{
'instance'
=>
'instance1:8080'
},
...
@@ -138,7 +166,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
...
@@ -138,7 +166,7 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
def
receive_node_cpu_count_query
(
result:
nil
)
def
receive_node_cpu_count_query
(
result:
nil
)
receive
(
:query
)
receive
(
:query
)
.
with
(
/node_cpu
_seconds_total
/
,
an_instance_of
(
Hash
))
.
with
(
/node_cpu
s
/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
.
and_return
(
result
||
[
{
{
'metric'
=>
{
'instance'
=>
'instance2:8090'
},
'metric'
=>
{
'instance'
=>
'instance2:8090'
},
...
@@ -151,46 +179,59 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
...
@@ -151,46 +179,59 @@ RSpec.describe Gitlab::UsageDataConcerns::Topology do
])
])
end
end
def
receive_node_service_memory_query
(
result:
nil
)
def
receive_node_service_memory_
rss_
query
(
result:
nil
)
receive
(
:query
)
receive
(
:query
)
.
with
(
/process_
.+
_memory_bytes/
,
an_instance_of
(
Hash
))
.
with
(
/process_
resident
_memory_bytes/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
.
and_return
(
result
||
[
# instance 1: runs Puma + a small Sidekiq
{
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
,
'__name__'
=>
'ruby_process_resident_memory_bytes'
},
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
},
'value'
=>
[
1000
,
'300'
]
'value'
=>
[
1000
,
'300'
]
},
},
{
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
,
'__name__'
=>
'ruby_process_unique_memory_bytes'
},
'metric'
=>
{
'instance'
=>
'instance1:8090'
,
'job'
=>
'gitlab-sidekiq'
},
'value'
=>
[
1000
,
'301'
]
},
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
,
'__name__'
=>
'ruby_process_proportional_memory_bytes'
},
'value'
=>
[
1000
,
'302'
]
},
{
'metric'
=>
{
'instance'
=>
'instance1:8090'
,
'job'
=>
'gitlab-sidekiq'
,
'__name__'
=>
'ruby_process_resident_memory_bytes'
},
'value'
=>
[
1000
,
'303'
]
'value'
=>
[
1000
,
'303'
]
},
},
# instance 2: runs a dedicated Sidekiq + Redis (which uses a different metric name)
# instance 2: runs a dedicated Sidekiq + Redis (which uses a different metric name)
{
{
'metric'
=>
{
'instance'
=>
'instance2:8090'
,
'job'
=>
'gitlab-sidekiq'
,
'__name__'
=>
'ruby_process_resident_memory_bytes'
},
'metric'
=>
{
'instance'
=>
'instance2:8090'
,
'job'
=>
'gitlab-sidekiq'
},
'value'
=>
[
1000
,
'400'
]
'value'
=>
[
1000
,
'400'
]
},
},
{
{
'metric'
=>
{
'instance'
=>
'instance2:8090'
,
'job'
=>
'gitlab-sidekiq'
,
'__name__'
=>
'ruby_process_proportional_memory_bytes'
},
'metric'
=>
{
'instance'
=>
'instance2:9121'
,
'job'
=>
'redis'
},
'value'
=>
[
1000
,
'401'
]
'value'
=>
[
1000
,
'402'
]
}
])
end
def
receive_node_service_memory_uss_query
(
result:
nil
)
receive
(
:query
)
.
with
(
/process_unique_memory_bytes/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
},
'value'
=>
[
1000
,
'301'
]
}
])
end
def
receive_node_service_memory_pss_query
(
result:
nil
)
receive
(
:query
)
.
with
(
/process_proportional_memory_bytes/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
{
'metric'
=>
{
'instance'
=>
'instance1:8080'
,
'job'
=>
'gitlab-rails'
},
'value'
=>
[
1000
,
'302'
]
},
},
{
{
'metric'
=>
{
'instance'
=>
'instance2:
9121'
,
'job'
=>
'redis'
,
'__name__'
=>
'process_resident_memory_bytes
'
},
'metric'
=>
{
'instance'
=>
'instance2:
8090'
,
'job'
=>
'gitlab-sidekiq
'
},
'value'
=>
[
1000
,
'40
2
'
]
'value'
=>
[
1000
,
'40
1
'
]
}
}
])
])
end
end
def
receive_node_service_process_count_query
(
result:
nil
)
def
receive_node_service_process_count_query
(
result:
nil
)
receive
(
:query
)
receive
(
:query
)
.
with
(
/
process_start_time_seconds
/
,
an_instance_of
(
Hash
))
.
with
(
/
service_process:count
/
,
an_instance_of
(
Hash
))
.
and_return
(
result
||
[
.
and_return
(
result
||
[
# instance 1
# instance 1
{
{
...
...
spec/lib/gitlab/utils/usage_data_spec.rb
View file @
abc0e8e6
...
@@ -88,13 +88,21 @@ RSpec.describe Gitlab::Utils::UsageData do
...
@@ -88,13 +88,21 @@ RSpec.describe Gitlab::Utils::UsageData do
end
end
context
'when Prometheus is disabled'
do
context
'when Prometheus is disabled'
do
it
'returns nil'
do
before
do
expect
(
Gitlab
::
Prometheus
::
Internal
).
to
receive
(
:prometheus_enabled?
).
and_return
(
false
)
expect
(
Gitlab
::
Prometheus
::
Internal
).
to
receive
(
:prometheus_enabled?
).
and_return
(
false
)
end
it
'returns nil by default'
do
result
=
described_class
.
with_prometheus_client
{
|
client
|
client
}
result
=
described_class
.
with_prometheus_client
{
|
client
|
client
}
expect
(
result
).
to
be
nil
expect
(
result
).
to
be
nil
end
end
it
'returns fallback if provided'
do
result
=
described_class
.
with_prometheus_client
(
fallback:
[])
{
|
client
|
client
}
expect
(
result
).
to
eq
([])
end
end
end
end
end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment