Commit 887cab1a authored by Oswaldo Ferreira's avatar Oswaldo Ferreira

Support graceful timeout for Sidekiq Cluster processes

Considering we're moving towards using Sidekiq Cluster
only and customers using Omnibus are allowed to configure
a Sidekiq timeout through -t flag, here we support a
-t flag for `bin/sidekiq-cluster`.

Here's how it works:

Internally, we'll pass the given value to Sidekiq (`-t`) flag.
For whatever value given, Sidekiq Cluster will wait for 5
extra seconds in order to hard stop all remaining alive processes.

So for instance, if `10` is given, Sidekiq will try to gracefully
terminate within this time. If it gets stuck for some reason, the
processes will be sigkilled within `15` seconds (total wait time).
parent 9e713e25
---
title: Support custom graceful timeout for Sidekiq Cluster processes
merge_request: 27710
author:
type: added
...@@ -62,21 +62,28 @@ module Gitlab ...@@ -62,21 +62,28 @@ module Gitlab
# directory - The directory of the Rails application. # directory - The directory of the Rails application.
# #
# Returns an Array containing the PIDs of the started processes. # Returns an Array containing the PIDs of the started processes.
def self.start(queues, env: :development, directory: Dir.pwd, max_concurrency: 50, min_concurrency: 0, dryrun: false) def self.start(queues, env: :development, directory: Dir.pwd, max_concurrency: 50, min_concurrency: 0, timeout: CLI::DEFAULT_SOFT_TIMEOUT_SECONDS, dryrun: false)
queues.map.with_index do |pair, index| queues.map.with_index do |pair, index|
start_sidekiq(pair, env: env, directory: directory, max_concurrency: max_concurrency, min_concurrency: min_concurrency, worker_id: index, dryrun: dryrun) start_sidekiq(pair, env: env,
directory: directory,
max_concurrency: max_concurrency,
min_concurrency: min_concurrency,
worker_id: index,
timeout: timeout,
dryrun: dryrun)
end end
end end
# Starts a Sidekiq process that processes _only_ the given queues. # Starts a Sidekiq process that processes _only_ the given queues.
# #
# Returns the PID of the started process. # Returns the PID of the started process.
def self.start_sidekiq(queues, env:, directory:, max_concurrency:, min_concurrency:, worker_id:, dryrun:) def self.start_sidekiq(queues, env:, directory:, max_concurrency:, min_concurrency:, worker_id:, timeout:, dryrun:)
counts = count_by_queue(queues) counts = count_by_queue(queues)
cmd = %w[bundle exec sidekiq] cmd = %w[bundle exec sidekiq]
cmd << "-c#{self.concurrency(queues, min_concurrency, max_concurrency)}" cmd << "-c#{self.concurrency(queues, min_concurrency, max_concurrency)}"
cmd << "-e#{env}" cmd << "-e#{env}"
cmd << "-t#{timeout}"
cmd << "-gqueues:#{proc_details(counts)}" cmd << "-gqueues:#{proc_details(counts)}"
cmd << "-r#{directory}" cmd << "-r#{directory}"
......
...@@ -8,9 +8,17 @@ module Gitlab ...@@ -8,9 +8,17 @@ module Gitlab
module SidekiqCluster module SidekiqCluster
class CLI class CLI
CHECK_TERMINATE_INTERVAL_SECONDS = 1 CHECK_TERMINATE_INTERVAL_SECONDS = 1
# How long to wait in total when asking for a clean termination
# Sidekiq default to self-terminate is 25s # How long to wait when asking for a clean termination.
TERMINATE_TIMEOUT_SECONDS = 30 # It maps the Sidekiq default timeout:
# https://github.com/mperham/sidekiq/wiki/Signals#term
#
# This value is passed to Sidekiq's `-t` if none
# is given through arguments.
DEFAULT_SOFT_TIMEOUT_SECONDS = 25
# After surpassing the soft timeout.
DEFAULT_HARD_TIMEOUT_SECONDS = 5
CommandError = Class.new(StandardError) CommandError = Class.new(StandardError)
...@@ -74,7 +82,8 @@ module Gitlab ...@@ -74,7 +82,8 @@ module Gitlab
directory: @rails_path, directory: @rails_path,
max_concurrency: @max_concurrency, max_concurrency: @max_concurrency,
min_concurrency: @min_concurrency, min_concurrency: @min_concurrency,
dryrun: @dryrun dryrun: @dryrun,
timeout: soft_timeout_seconds
) )
return if @dryrun return if @dryrun
...@@ -88,6 +97,15 @@ module Gitlab ...@@ -88,6 +97,15 @@ module Gitlab
SidekiqCluster.write_pid(@pid) if @pid SidekiqCluster.write_pid(@pid) if @pid
end end
def soft_timeout_seconds
@soft_timeout_seconds || DEFAULT_SOFT_TIMEOUT_SECONDS
end
# The amount of time it'll wait for killing the alive Sidekiq processes.
def hard_timeout_seconds
soft_timeout_seconds + DEFAULT_HARD_TIMEOUT_SECONDS
end
def monotonic_time def monotonic_time
Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second) Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second)
end end
...@@ -101,7 +119,7 @@ module Gitlab ...@@ -101,7 +119,7 @@ module Gitlab
end end
def wait_for_termination def wait_for_termination
deadline = monotonic_time + TERMINATE_TIMEOUT_SECONDS deadline = monotonic_time + hard_timeout_seconds
sleep(CHECK_TERMINATE_INTERVAL_SECONDS) while continue_waiting?(deadline) sleep(CHECK_TERMINATE_INTERVAL_SECONDS) while continue_waiting?(deadline)
hard_stop_stuck_pids hard_stop_stuck_pids
...@@ -176,6 +194,10 @@ module Gitlab ...@@ -176,6 +194,10 @@ module Gitlab
@interval = int.to_i @interval = int.to_i
end end
opt.on('-t', '--timeout INT', 'Graceful timeout for all running processes') do |timeout|
@soft_timeout_seconds = timeout.to_i
end
opt.on('-d', '--dryrun', 'Print commands that would be run without this flag, and quit') do |int| opt.on('-d', '--dryrun', 'Print commands that would be run without this flag, and quit') do |int|
@dryrun = true @dryrun = true
end end
......
...@@ -5,8 +5,9 @@ require 'rspec-parameterized' ...@@ -5,8 +5,9 @@ require 'rspec-parameterized'
describe Gitlab::SidekiqCluster::CLI do describe Gitlab::SidekiqCluster::CLI do
let(:cli) { described_class.new('/dev/null') } let(:cli) { described_class.new('/dev/null') }
let(:timeout) { described_class::DEFAULT_SOFT_TIMEOUT_SECONDS }
let(:default_options) do let(:default_options) do
{ env: 'test', directory: Dir.pwd, max_concurrency: 50, min_concurrency: 0, dryrun: false } { env: 'test', directory: Dir.pwd, max_concurrency: 50, min_concurrency: 0, dryrun: false, timeout: timeout }
end end
before do before do
...@@ -80,6 +81,22 @@ describe Gitlab::SidekiqCluster::CLI do ...@@ -80,6 +81,22 @@ describe Gitlab::SidekiqCluster::CLI do
end end
end end
context '-timeout flag' do
it 'when given', 'starts Sidekiq workers with given timeout' do
expect(Gitlab::SidekiqCluster).to receive(:start)
.with([['foo']], default_options.merge(timeout: 10))
cli.run(%w(foo --timeout 10))
end
it 'when not given', 'starts Sidekiq workers with default timeout' do
expect(Gitlab::SidekiqCluster).to receive(:start)
.with([['foo']], default_options.merge(timeout: described_class::DEFAULT_SOFT_TIMEOUT_SECONDS))
cli.run(%w(foo))
end
end
context 'queue namespace expansion' do context 'queue namespace expansion' do
it 'starts Sidekiq workers for all queues in all_queues.yml with a namespace in argv' do it 'starts Sidekiq workers for all queues in all_queues.yml with a namespace in argv' do
expect(Gitlab::SidekiqConfig::CliMethods).to receive(:worker_queues).and_return(['cronjob:foo', 'cronjob:bar']) expect(Gitlab::SidekiqConfig::CliMethods).to receive(:worker_queues).and_return(['cronjob:foo', 'cronjob:bar'])
...@@ -222,7 +239,8 @@ describe Gitlab::SidekiqCluster::CLI do ...@@ -222,7 +239,8 @@ describe Gitlab::SidekiqCluster::CLI do
.with([], :KILL) .with([], :KILL)
stub_const("Gitlab::SidekiqCluster::CLI::CHECK_TERMINATE_INTERVAL_SECONDS", 0.1) stub_const("Gitlab::SidekiqCluster::CLI::CHECK_TERMINATE_INTERVAL_SECONDS", 0.1)
stub_const("Gitlab::SidekiqCluster::CLI::TERMINATE_TIMEOUT_SECONDS", 1) allow(cli).to receive(:terminate_timeout_seconds) { 1 }
cli.wait_for_termination cli.wait_for_termination
end end
...@@ -251,7 +269,8 @@ describe Gitlab::SidekiqCluster::CLI do ...@@ -251,7 +269,8 @@ describe Gitlab::SidekiqCluster::CLI do
cli.run(%w(foo)) cli.run(%w(foo))
stub_const("Gitlab::SidekiqCluster::CLI::CHECK_TERMINATE_INTERVAL_SECONDS", 0.1) stub_const("Gitlab::SidekiqCluster::CLI::CHECK_TERMINATE_INTERVAL_SECONDS", 0.1)
stub_const("Gitlab::SidekiqCluster::CLI::TERMINATE_TIMEOUT_SECONDS", 1) allow(cli).to receive(:terminate_timeout_seconds) { 1 }
cli.wait_for_termination cli.wait_for_termination
end end
end end
......
...@@ -58,6 +58,7 @@ describe Gitlab::SidekiqCluster do ...@@ -58,6 +58,7 @@ describe Gitlab::SidekiqCluster do
directory: 'foo/bar', directory: 'foo/bar',
max_concurrency: 20, max_concurrency: 20,
min_concurrency: 10, min_concurrency: 10,
timeout: 25,
dryrun: true dryrun: true
} }
...@@ -74,6 +75,7 @@ describe Gitlab::SidekiqCluster do ...@@ -74,6 +75,7 @@ describe Gitlab::SidekiqCluster do
max_concurrency: 50, max_concurrency: 50,
min_concurrency: 0, min_concurrency: 0,
worker_id: an_instance_of(Integer), worker_id: an_instance_of(Integer),
timeout: 25,
dryrun: false dryrun: false
} }
...@@ -87,10 +89,10 @@ describe Gitlab::SidekiqCluster do ...@@ -87,10 +89,10 @@ describe Gitlab::SidekiqCluster do
describe '.start_sidekiq' do describe '.start_sidekiq' do
let(:first_worker_id) { 0 } let(:first_worker_id) { 0 }
let(:options) do let(:options) do
{ env: :production, directory: 'foo/bar', max_concurrency: 20, min_concurrency: 0, worker_id: first_worker_id, dryrun: false } { env: :production, directory: 'foo/bar', max_concurrency: 20, min_concurrency: 0, worker_id: first_worker_id, timeout: 10, dryrun: false }
end end
let(:env) { { "ENABLE_SIDEKIQ_CLUSTER" => "1", "SIDEKIQ_WORKER_ID" => first_worker_id.to_s } } let(:env) { { "ENABLE_SIDEKIQ_CLUSTER" => "1", "SIDEKIQ_WORKER_ID" => first_worker_id.to_s } }
let(:args) { ['bundle', 'exec', 'sidekiq', anything, '-eproduction', *([anything] * 5)] } let(:args) { ['bundle', 'exec', 'sidekiq', anything, '-eproduction', '-t10', *([anything] * 5)] }
it 'starts a Sidekiq process' do it 'starts a Sidekiq process' do
allow(Process).to receive(:spawn).and_return(1) allow(Process).to receive(:spawn).and_return(1)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment