Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
62fb215e
Commit
62fb215e
authored
Oct 02, 2019
by
Qingyu Zhao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add Sidekiq memory killer prometheus metrics
parent
2cd6006a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
93 additions
and
13 deletions
+93
-13
config/initializers/7_prometheus_metrics.rb
config/initializers/7_prometheus_metrics.rb
+3
-0
lib/gitlab/sidekiq_daemon/memory_killer.rb
lib/gitlab/sidekiq_daemon/memory_killer.rb
+39
-2
spec/lib/gitlab/sidekiq_daemon/memory_killer_spec.rb
spec/lib/gitlab/sidekiq_daemon/memory_killer_spec.rb
+51
-11
No files found.
config/initializers/7_prometheus_metrics.rb
View file @
62fb215e
...
...
@@ -41,6 +41,9 @@ Sidekiq.configure_server do |config|
# after all workers have forked, but I don't know how at this point.
::
Prometheus
::
Client
.
reinitialize_on_pid_change
(
force:
true
)
# temporary solution before fix https://gitlab.com/gitlab-org/gitlab/issues/33125
::
Prometheus
::
Client
.
reinitialize_on_pid_change
(
force:
true
)
Gitlab
::
Metrics
::
Exporter
::
SidekiqExporter
.
instance
.
start
end
end
...
...
lib/gitlab/sidekiq_daemon/memory_killer.rb
View file @
62fb215e
...
...
@@ -20,15 +20,33 @@ module Gitlab
# Developer/admin should always set `memory_killer_max_memory_growth_kb` explicitly
# In case not set, default to 300M. This is for extra-safe.
DEFAULT_MAX_MEMORY_GROWTH_KB
=
300_000
# Phases of memory killer
PHASE
=
{
running:
1
,
above_soft_limit:
2
,
stop_fetching_new_jobs:
3
,
shutting_down:
4
,
killing_sidekiq:
5
}.
freeze
def
initialize
super
@enabled
=
true
@metrics
=
init_metrics
end
private
def
init_metrics
{
sidekiq_current_rss:
::
Gitlab
::
Metrics
.
gauge
(
:sidekiq_current_rss
,
'Current RSS of Sidekiq Worker'
),
sidekiq_memory_killer_soft_limit_rss:
::
Gitlab
::
Metrics
.
gauge
(
:sidekiq_memory_killer_soft_limit_rss
,
'Current soft_limit_rss of Sidekiq Worker'
),
sidekiq_memory_killer_hard_limit_rss:
::
Gitlab
::
Metrics
.
gauge
(
:sidekiq_memory_killer_hard_limit_rss
,
'Current hard_limit_rss of Sidekiq Worker'
),
sidekiq_memory_killer_phase:
::
Gitlab
::
Metrics
.
gauge
(
:sidekiq_memory_killer_phase
,
'Current phase of Sidekiq Worker'
)
}
end
def
run_thread
Sidekiq
.
logger
.
info
(
class:
self
.
class
.
to_s
,
...
...
@@ -77,27 +95,37 @@ module Gitlab
# Tell Sidekiq to stop fetching new jobs
# We first SIGNAL and then wait given time
# We also monitor a number of running jobs and allow to restart early
update_metrics
(
PHASE
[
:stop_fetching_new_jobs
],
get_rss
,
get_soft_limit_rss
,
get_hard_limit_rss
)
signal_and_wait
(
SHUTDOWN_TIMEOUT_SECONDS
,
'SIGTSTP'
,
'stop fetching new jobs'
)
return
unless
enabled?
# Tell sidekiq to restart itself
# Keep extra safe to wait `Sidekiq.options[:timeout] + 2` seconds before SIGKILL
update_metrics
(
PHASE
[
:shutting_down
],
get_rss
,
get_soft_limit_rss
,
get_hard_limit_rss
)
signal_and_wait
(
Sidekiq
.
options
[
:timeout
]
+
2
,
'SIGTERM'
,
'gracefully shut down'
)
return
unless
enabled?
# Ideally we should never reach this condition
# Wait for Sidekiq to shutdown gracefully, and kill it if it didn't
# Kill the whole pgroup, so we can be sure no children are left behind
update_metrics
(
PHASE
[
:killing_sidekiq
],
get_rss
,
get_soft_limit_rss
,
get_hard_limit_rss
)
signal_pgroup
(
'SIGKILL'
,
'die'
)
end
def
rss_within_range?
phase
=
PHASE
[
:running
]
current_rss
=
nil
soft_limit_rss
=
nil
hard_limit_rss
=
nil
deadline
=
Gitlab
::
Metrics
::
System
.
monotonic_time
+
GRACE_BALLOON_SECONDS
.
seconds
loop
do
return
true
unless
enabled?
current_rss
=
get_rss
soft_limit_rss
=
get_soft_limit_rss
hard_limit_rss
=
get_hard_limit_rss
update_metrics
(
phase
,
current_rss
,
soft_limit_rss
,
hard_limit_rss
)
# RSS go above hard limit should trigger forcible shutdown right away
break
if
current_rss
>
hard_limit_rss
...
...
@@ -105,6 +133,8 @@ module Gitlab
# RSS go below the soft limit
return
true
if
current_rss
<
soft_limit_rss
phase
=
PHASE
[
:above_soft_limit
]
# RSS did not go below the soft limit within deadline, restart
break
if
Gitlab
::
Metrics
::
System
.
monotonic_time
>
deadline
...
...
@@ -116,6 +146,13 @@ module Gitlab
false
end
def
update_metrics
(
phase
,
current_rss
,
soft_limit_rss
,
hard_limit_rss
)
@metrics
[
:sidekiq_memory_killer_phase
].
set
({},
phase
)
@metrics
[
:sidekiq_current_rss
].
set
({},
current_rss
)
@metrics
[
:sidekiq_memory_killer_soft_limit_rss
].
set
({},
soft_limit_rss
)
@metrics
[
:sidekiq_memory_killer_hard_limit_rss
].
set
({},
hard_limit_rss
)
end
def
log_rss_out_of_range
(
current_rss
,
hard_limit_rss
,
soft_limit_rss
)
Sidekiq
.
logger
.
warn
(
class:
self
.
class
.
to_s
,
...
...
@@ -143,11 +180,11 @@ module Gitlab
output
.
to_i
end
def
soft_limit_rss
def
get_
soft_limit_rss
SOFT_LIMIT_RSS_KB
+
rss_increase_by_jobs
end
def
hard_limit_rss
def
get_
hard_limit_rss
HARD_LIMIT_RSS_KB
end
...
...
spec/lib/gitlab/sidekiq_daemon/memory_killer_spec.rb
View file @
62fb215e
...
...
@@ -5,11 +5,23 @@ require 'spec_helper'
describe
Gitlab
::
SidekiqDaemon
::
MemoryKiller
do
let
(
:memory_killer
)
{
described_class
.
new
}
let
(
:pid
)
{
12345
}
let
(
:current_rss_metric
)
{
double
(
'current rss metric'
)
}
let
(
:soft_limit_rss_metric
)
{
double
(
'soft limit rss metric'
)
}
let
(
:hard_limit_rss_metric
)
{
double
(
'hard limit rss metric'
)
}
let
(
:current_phase_metric
)
{
double
(
'current phase metric'
)
}
before
do
allow
(
memory_killer
).
to
receive
(
:pid
).
and_return
(
pid
)
allow
(
Sidekiq
.
logger
).
to
receive
(
:info
)
allow
(
Sidekiq
.
logger
).
to
receive
(
:warn
)
allow
(
Gitlab
::
Metrics
).
to
receive
(
:gauge
).
with
(
:sidekiq_current_rss
,
anything
).
and_return
(
current_rss_metric
)
allow
(
Gitlab
::
Metrics
).
to
receive
(
:gauge
).
with
(
:sidekiq_memory_killer_soft_limit_rss
,
anything
).
and_return
(
soft_limit_rss_metric
)
allow
(
Gitlab
::
Metrics
).
to
receive
(
:gauge
).
with
(
:sidekiq_memory_killer_hard_limit_rss
,
anything
).
and_return
(
hard_limit_rss_metric
)
allow
(
Gitlab
::
Metrics
).
to
receive
(
:gauge
).
with
(
:sidekiq_memory_killer_phase
,
anything
).
and_return
(
current_phase_metric
)
allow
(
memory_killer
).
to
receive
(
:pid
).
and_return
(
pid
)
allow
(
current_rss_metric
).
to
receive
(
:set
)
allow
(
soft_limit_rss_metric
).
to
receive
(
:set
)
allow
(
hard_limit_rss_metric
).
to
receive
(
:set
)
allow
(
current_phase_metric
).
to
receive
(
:set
)
end
describe
'#run_thread'
do
...
...
@@ -121,8 +133,10 @@ describe Gitlab::SidekiqDaemon::MemoryKiller do
it
'return true when everything is within limit'
do
expect
(
memory_killer
).
to
receive
(
:get_rss
).
and_return
(
100
)
expect
(
memory_killer
).
to
receive
(
:soft_limit_rss
).
and_return
(
200
)
expect
(
memory_killer
).
to
receive
(
:hard_limit_rss
).
and_return
(
300
)
expect
(
memory_killer
).
to
receive
(
:get_soft_limit_rss
).
and_return
(
200
)
expect
(
memory_killer
).
to
receive
(
:get_hard_limit_rss
).
and_return
(
300
)
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
with
(
described_class
::
PHASE
[
:running
],
100
,
200
,
300
)
expect
(
Gitlab
::
Metrics
::
System
).
to
receive
(
:monotonic_time
).
and_call_original
expect
(
memory_killer
).
not_to
receive
(
:log_rss_out_of_range
)
...
...
@@ -132,9 +146,10 @@ describe Gitlab::SidekiqDaemon::MemoryKiller do
it
'return false when rss exceeds hard_limit_rss'
do
expect
(
memory_killer
).
to
receive
(
:get_rss
).
and_return
(
400
)
expect
(
memory_killer
).
to
receive
(
:soft_limit_rss
).
at_least
(
:once
).
and_return
(
200
)
expect
(
memory_killer
).
to
receive
(
:hard_limit_rss
).
at_least
(
:once
).
and_return
(
300
)
expect
(
memory_killer
).
to
receive
(
:
get_
soft_limit_rss
).
at_least
(
:once
).
and_return
(
200
)
expect
(
memory_killer
).
to
receive
(
:
get_
hard_limit_rss
).
at_least
(
:once
).
and_return
(
300
)
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
with
(
described_class
::
PHASE
[
:running
],
400
,
200
,
300
)
expect
(
Gitlab
::
Metrics
::
System
).
to
receive
(
:monotonic_time
).
and_call_original
expect
(
memory_killer
).
to
receive
(
:log_rss_out_of_range
).
with
(
400
,
300
,
200
)
...
...
@@ -144,9 +159,11 @@ describe Gitlab::SidekiqDaemon::MemoryKiller do
it
'return false when rss exceed hard_limit_rss after a while'
do
expect
(
memory_killer
).
to
receive
(
:get_rss
).
and_return
(
250
,
400
)
expect
(
memory_killer
).
to
receive
(
:soft_limit_rss
).
at_least
(
:once
).
and_return
(
200
)
expect
(
memory_killer
).
to
receive
(
:hard_limit_rss
).
at_least
(
:once
).
and_return
(
300
)
expect
(
memory_killer
).
to
receive
(
:
get_
soft_limit_rss
).
at_least
(
:once
).
and_return
(
200
)
expect
(
memory_killer
).
to
receive
(
:
get_
hard_limit_rss
).
at_least
(
:once
).
and_return
(
300
)
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
with
(
described_class
::
PHASE
[
:running
],
250
,
200
,
300
)
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
with
(
described_class
::
PHASE
[
:above_soft_limit
],
400
,
200
,
300
)
expect
(
Gitlab
::
Metrics
::
System
).
to
receive
(
:monotonic_time
).
twice
.
and_call_original
expect
(
memory_killer
).
to
receive
(
:sleep
).
with
(
check_interval_seconds
)
...
...
@@ -157,9 +174,11 @@ describe Gitlab::SidekiqDaemon::MemoryKiller do
it
'return true when rss below soft_limit_rss after a while within GRACE_BALLOON_SECONDS'
do
expect
(
memory_killer
).
to
receive
(
:get_rss
).
and_return
(
250
,
100
)
expect
(
memory_killer
).
to
receive
(
:soft_limit_rss
).
and_return
(
200
,
200
)
expect
(
memory_killer
).
to
receive
(
:hard_limit_rss
).
and_return
(
300
,
300
)
expect
(
memory_killer
).
to
receive
(
:
get_
soft_limit_rss
).
and_return
(
200
,
200
)
expect
(
memory_killer
).
to
receive
(
:
get_
hard_limit_rss
).
and_return
(
300
,
300
)
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
with
(
described_class
::
PHASE
[
:running
],
250
,
200
,
300
)
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
with
(
described_class
::
PHASE
[
:above_soft_limit
],
100
,
200
,
300
)
expect
(
Gitlab
::
Metrics
::
System
).
to
receive
(
:monotonic_time
).
twice
.
and_call_original
expect
(
memory_killer
).
to
receive
(
:sleep
).
with
(
check_interval_seconds
)
...
...
@@ -170,9 +189,11 @@ describe Gitlab::SidekiqDaemon::MemoryKiller do
it
'return false when rss exceed soft_limit_rss longer than GRACE_BALLOON_SECONDS'
do
expect
(
memory_killer
).
to
receive
(
:get_rss
).
exactly
(
4
).
times
.
and_return
(
250
)
expect
(
memory_killer
).
to
receive
(
:
soft_limit_rss
).
exactly
(
5
).
times
.
and_return
(
200
)
expect
(
memory_killer
).
to
receive
(
:
hard_limit_rss
).
exactly
(
5
).
times
.
and_return
(
300
)
expect
(
memory_killer
).
to
receive
(
:
get_soft_limit_rss
).
exactly
(
4
).
times
.
and_return
(
200
)
expect
(
memory_killer
).
to
receive
(
:
get_hard_limit_rss
).
exactly
(
4
).
times
.
and_return
(
300
)
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
with
(
described_class
::
PHASE
[
:running
],
250
,
200
,
300
)
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
exactly
(
3
).
times
.
with
(
described_class
::
PHASE
[
:above_soft_limit
],
250
,
200
,
300
)
expect
(
Gitlab
::
Metrics
::
System
).
to
receive
(
:monotonic_time
).
exactly
(
5
).
times
.
and_call_original
expect
(
memory_killer
).
to
receive
(
:sleep
).
exactly
(
3
).
times
.
with
(
check_interval_seconds
).
and_call_original
...
...
@@ -190,11 +211,17 @@ describe Gitlab::SidekiqDaemon::MemoryKiller do
before
do
stub_const
(
"
#{
described_class
}
::SHUTDOWN_TIMEOUT_SECONDS"
,
shutdown_timeout_seconds
)
allow
(
Sidekiq
).
to
receive
(
:options
).
and_return
(
timeout:
9
)
allow
(
memory_killer
).
to
receive
(
:get_rss
).
and_return
(
100
)
allow
(
memory_killer
).
to
receive
(
:get_soft_limit_rss
).
and_return
(
200
)
allow
(
memory_killer
).
to
receive
(
:get_hard_limit_rss
).
and_return
(
300
)
end
it
'send signal'
do
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
with
(
described_class
::
PHASE
[
:stop_fetching_new_jobs
],
100
,
200
,
300
).
ordered
expect
(
memory_killer
).
to
receive
(
:signal_and_wait
).
with
(
shutdown_timeout_seconds
,
'SIGTSTP'
,
'stop fetching new jobs'
).
ordered
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
with
(
described_class
::
PHASE
[
:shutting_down
],
100
,
200
,
300
).
ordered
expect
(
memory_killer
).
to
receive
(
:signal_and_wait
).
with
(
11
,
'SIGTERM'
,
'gracefully shut down'
).
ordered
expect
(
memory_killer
).
to
receive
(
:update_metrics
).
with
(
described_class
::
PHASE
[
:killing_sidekiq
],
100
,
200
,
300
).
ordered
expect
(
memory_killer
).
to
receive
(
:signal_pgroup
).
with
(
'SIGKILL'
,
'die'
).
ordered
subject
...
...
@@ -401,4 +428,17 @@ describe Gitlab::SidekiqDaemon::MemoryKiller do
expect
(
subject
).
to
eq
(
10
)
end
end
describe
'#update_metrics'
do
subject
{
memory_killer
.
send
(
:update_metrics
,
2
,
150
,
200
,
300
)
}
it
'calls gitlab metrics gauge set methods'
do
expect
(
current_phase_metric
).
to
receive
(
:set
).
with
({},
2
)
expect
(
current_rss_metric
).
to
receive
(
:set
).
with
({},
150
)
expect
(
soft_limit_rss_metric
).
to
receive
(
:set
).
with
({},
200
)
expect
(
hard_limit_rss_metric
).
to
receive
(
:set
).
with
({},
300
)
subject
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment