Commit 73f323b0 authored by Stan Hu's avatar Stan Hu

Support graceful shutdown of Workhorse connections

This commit does two things:

1. Uses Go's graceful HTTP shutdown to close the listen port and wait
for all HTTP handlers to finish. This will wait up to `shutdown_timeout`
seconds, which is 0 by default, to preserve existing behavior.

2. Kicks out all long poll queries by closing down the shutdown channel,
which is monitored by the Redis key watcher.

Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/325114

Changelog: added
parent 1c61d265
# alt_document_root = '/home/git/public/assets'
# shutdown_timeout = "60s"
[redis]
URL = "unix:/home/git/gitlab/redis/redis.socket"
......
......@@ -16,12 +16,20 @@ import (
"gitlab.com/gitlab-org/gitlab-workhorse/internal/upstream"
)
func TestDefaultConfig(t *testing.T) {
_, cfg, err := buildConfig("test", []string{"-config", "/dev/null"})
require.NoError(t, err, "build config")
require.Equal(t, 0*time.Second, cfg.ShutdownTimeout.Duration)
}
func TestConfigFile(t *testing.T) {
f, err := ioutil.TempFile("", "workhorse-config-test")
require.NoError(t, err)
defer os.Remove(f.Name())
data := `
shutdown_timeout = "60s"
[redis]
password = "redis password"
[object_storage]
......@@ -43,6 +51,7 @@ max_scaler_procs = 123
require.Equal(t, "redis password", cfg.Redis.Password)
require.Equal(t, "test provider", cfg.ObjectStorageCredentials.Provider)
require.Equal(t, uint32(123), cfg.ImageResizerConfig.MaxScalerProcs, "image resizer max_scaler_procs")
require.Equal(t, 60*time.Second, cfg.ShutdownTimeout.Duration)
}
func TestConfigErrorHelp(t *testing.T) {
......
......@@ -28,7 +28,7 @@ type TomlDuration struct {
time.Duration
}
func (d *TomlDuration) UnmarshalTest(text []byte) error {
func (d *TomlDuration) UnmarshalText(text []byte) error {
temp, err := time.ParseDuration(string(text))
d.Duration = temp
return err
......@@ -103,6 +103,7 @@ type Config struct {
PropagateCorrelationID bool `toml:"-"`
ImageResizerConfig ImageResizerConfig `toml:"image_resizer"`
AltDocumentRoot string `toml:"alt_document_root"`
ShutdownTimeout TomlDuration `toml:"shutdown_timeout"`
}
var DefaultImageResizerConfig = ImageResizerConfig{
......
......@@ -17,6 +17,7 @@ import (
var (
keyWatcher = make(map[string][]chan string)
keyWatcherMutex sync.Mutex
shutdown = make(chan struct{})
redisReconnectTimeout = backoff.Backoff{
//These are the defaults
Min: 100 * time.Millisecond,
......@@ -112,6 +113,20 @@ func Process() {
}
}
func Shutdown() {
log.Info("keywatcher: shutting down")
keyWatcherMutex.Lock()
defer keyWatcherMutex.Unlock()
select {
case <-shutdown:
// already closed
default:
close(shutdown)
}
}
func notifyChanWatchers(key, value string) {
keyWatcherMutex.Lock()
defer keyWatcherMutex.Unlock()
......@@ -182,6 +197,9 @@ func WatchKey(key, value string, timeout time.Duration) (WatchKeyStatus, error)
}
select {
case <-shutdown:
log.WithFields(log.Fields{"key": key}).Info("stopping watch due to shutdown")
return WatchKeyStatusNoChange, nil
case currentValue := <-kw.Chan:
if currentValue == "" {
return WatchKeyStatusNoChange, fmt.Errorf("keywatcher: redis GET failed")
......
......@@ -160,3 +160,58 @@ func TestWatchKeyMassivelyParallel(t *testing.T) {
processMessages(runTimes, "somethingelse")
wg.Wait()
}
func TestShutdown(t *testing.T) {
conn, td := setupMockPool()
defer td()
defer func() { shutdown = make(chan struct{}) }()
conn.Command("GET", runnerKey).Expect("something")
wg := &sync.WaitGroup{}
wg.Add(2)
go func() {
val, err := WatchKey(runnerKey, "something", 10*time.Second)
require.NoError(t, err, "Expected no error")
require.Equal(t, WatchKeyStatusNoChange, val, "Expected value not to change")
wg.Done()
}()
go func() {
for countWatchers(runnerKey) == 0 {
time.Sleep(time.Millisecond)
}
require.Equal(t, 1, countWatchers(runnerKey))
Shutdown()
wg.Done()
}()
wg.Wait()
for countWatchers(runnerKey) == 1 {
time.Sleep(time.Millisecond)
}
require.Equal(t, 0, countWatchers(runnerKey))
// Adding a key after the shutdown should result in an immediate response
var val WatchKeyStatus
var err error
done := make(chan struct{})
go func() {
val, err = WatchKey(runnerKey, "something", 10*time.Second)
close(done)
}()
select {
case <-done:
require.NoError(t, err, "Expected no error")
require.Equal(t, WatchKeyStatusNoChange, val, "Expected value not to change")
case <-time.After(100 * time.Millisecond):
t.Fatal("timeout waiting for WatchKey")
}
}
package main
import (
"context"
"flag"
"fmt"
"io/ioutil"
......@@ -8,6 +9,7 @@ import (
"net/http"
_ "net/http/pprof"
"os"
"os/signal"
"syscall"
"time"
......@@ -144,6 +146,7 @@ func buildConfig(arg0 string, args []string) (*bootConfig, *config.Config, error
cfg.ObjectStorageCredentials = cfgFromFile.ObjectStorageCredentials
cfg.ImageResizerConfig = cfgFromFile.ImageResizerConfig
cfg.AltDocumentRoot = cfgFromFile.AltDocumentRoot
cfg.ShutdownTimeout = cfgFromFile.ShutdownTimeout
return boot, cfg, nil
}
......@@ -225,7 +228,22 @@ func run(boot bootConfig, cfg config.Config) error {
up := wrapRaven(upstream.NewUpstream(cfg, accessLogger))
go func() { finalErrors <- http.Serve(listener, up) }()
done := make(chan os.Signal, 1)
signal.Notify(done, syscall.SIGINT, syscall.SIGTERM)
return <-finalErrors
server := http.Server{Handler: up}
go func() { finalErrors <- server.Serve(listener) }()
select {
case err := <-finalErrors:
return err
case sig := <-done:
log.WithFields(log.Fields{"shutdown_timeout_s": cfg.ShutdownTimeout.Duration.Seconds(), "signal": sig.String()}).Infof("shutdown initiated")
ctx, cancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout.Duration) // lint:allow context.Background
defer cancel()
redis.Shutdown()
return server.Shutdown(ctx)
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment