Commit 33445cfc authored by Stan Hu's avatar Stan Hu

Merge branch 'improve-chaos-controller' into 'master'

Add `db_spin` and refactor ChaosController

Closes #64425

See merge request gitlab-org/gitlab-ce!30559
parents b2f61487 56eb9f6c
# frozen_string_literal: true
class ChaosController < ActionController::Base
before_action :validate_request
before_action :validate_chaos_secret, unless: :development?
before_action :request_start_time
def leakmem
memory_mb = (params[:memory_mb]&.to_i || 100)
duration_s = (params[:duration_s]&.to_i || 30).seconds
start = Time.now
retainer = []
# Add `n` 1mb chunks of memory to the retainer array
memory_mb.times { retainer << "x" * 1.megabyte }
duration_taken = (Time.now - start).seconds
Kernel.sleep duration_s - duration_taken if duration_s > duration_taken
Kernel.sleep(duration_left)
render plain: "OK"
end
def cpuspin
duration_s = (params[:duration_s]&.to_i || 30).seconds
end_time = Time.now + duration_s.seconds
rand while Time.now < end_time
def cpu_spin
rand while Time.now < expected_end_time
render plain: "OK"
end
def db_spin
while Time.now < expected_end_time
ActiveRecord::Base.connection.execute("SELECT 1")
end_interval_time = Time.now + [duration_s, interval_s].min
rand while Time.now < end_interval_time
end
end
def sleep
duration_s = (params[:duration_s]&.to_i || 30).seconds
Kernel.sleep duration_s
Kernel.sleep(duration_left)
render plain: "OK"
end
......@@ -40,17 +41,57 @@ class ChaosController < ActionController::Base
private
def validate_request
secret = ENV['GITLAB_CHAOS_SECRET']
# GITLAB_CHAOS_SECRET is required unless you're running in Development mode
if !secret && !Rails.env.development?
render plain: "chaos misconfigured: please configure GITLAB_CHAOS_SECRET when using GITLAB_ENABLE_CHAOS_ENDPOINTS outside of a development environment", status: :internal_server_error
end
def request_start_time
@start_time ||= Time.now
end
def expected_end_time
request_start_time + duration_s
end
return unless secret
def duration_left
# returns 0 if over time
[expected_end_time - Time.now, 0].max
end
def validate_chaos_secret
unless chaos_secret_configured
render plain: "chaos misconfigured: please configure GITLAB_CHAOS_SECRET",
status: :internal_server_error
return
end
unless request.headers["HTTP_X_CHAOS_SECRET"] == secret
render plain: "To experience chaos, please set X-Chaos-Secret header", status: :unauthorized
unless Devise.secure_compare(chaos_secret_configured, chaos_secret_request)
render plain: "To experience chaos, please set a valid `X-Chaos-Secret` header or `token` param",
status: :unauthorized
return
end
end
def chaos_secret_configured
ENV['GITLAB_CHAOS_SECRET']
end
def chaos_secret_request
request.headers["HTTP_X_CHAOS_SECRET"] || params[:token]
end
def interval_s
interval_s = params[:interval_s] || 1
interval_s.to_f.seconds
end
def duration_s
duration_s = params[:duration_s] || 30
duration_s.to_i.seconds
end
def memory_mb
memory_mb = params[:memory_mb] || 100
memory_mb.to_i
end
def development?
Rails.env.development?
end
end
......@@ -110,11 +110,14 @@ Rails.application.routes.draw do
draw :jira_connect
end
if ENV['GITLAB_ENABLE_CHAOS_ENDPOINTS']
get '/chaos/leakmem' => 'chaos#leakmem'
get '/chaos/cpuspin' => 'chaos#cpuspin'
get '/chaos/sleep' => 'chaos#sleep'
get '/chaos/kill' => 'chaos#kill'
if ENV['GITLAB_CHAOS_SECRET'] || Rails.env.development?
resource :chaos, only: [] do
get :leakmem
get :cpu_spin
get :db_spin
get :sleep
get :kill
end
end
end
......
......@@ -15,23 +15,19 @@ Currently, there are four endpoints for simulating the following conditions:
## Enabling chaos endpoints
For obvious reasons, these endpoints are not enabled by default. They can be enabled by setting the `GITLAB_ENABLE_CHAOS_ENDPOINTS` environment variable to `1`.
For example, if you're using the [GDK](https://gitlab.com/gitlab-org/gitlab-development-kit) this can be done with the following command:
```bash
GITLAB_ENABLE_CHAOS_ENDPOINTS=1 gdk run
```
## Securing the chaos endpoints
For obvious reasons, these endpoints are not enabled by default on `production`.
They are enabled by default on **development** environments.
DANGER: **Danger:**
It is highly recommended that you secure access to the chaos endpoints using a secret token. This is recommended when enabling these endpoints locally and essential when running in a staging or other shared environment. You should not enable them in production unless you absolutely know what you're doing.
It is required that you secure access to the chaos endpoints using a secret token.
You should not enable them in production unless you absolutely know what you're doing.
A secret token can be set through the `GITLAB_CHAOS_SECRET` environment variable. For example, when using the [GDK](https://gitlab.com/gitlab-org/gitlab-development-kit) this can be done with the following command:
A secret token can be set through the `GITLAB_CHAOS_SECRET` environment variable.
For example, when using the [GDK](https://gitlab.com/gitlab-org/gitlab-development-kit)
this can be done with the following command:
```bash
GITLAB_ENABLE_CHAOS_ENDPOINTS=1 GITLAB_CHAOS_SECRET=secret gdk run
GITLAB_CHAOS_SECRET=secret gdk run
```
Replace `secret` with your own secret token.
......@@ -56,10 +52,11 @@ GET /-/chaos/leakmem?memory_mb=1024&duration_s=50
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | ---------------------------------------------------------------------------------- |
| `memory_mb` | integer | no | How much memory, in MB, should be leaked. Defaults to 100MB. |
| `duration_s` | integer | no | Minimum duration, in seconds, that the memory should be retained. Defaults to 30s. |
| `duration_s` | integer | no | Minimum duration_s, in seconds, that the memory should be retained. Defaults to 30s. |
```bash
curl http://localhost:3000/-/chaos/leakmem?memory_mb=1024&duration_s=10 --header 'X-Chaos-Secret: secret'
curl http://localhost:3000/-/chaos/leakmem?memory_mb=1024&duration_s=10&token=secret
```
## CPU spin
......@@ -70,23 +67,47 @@ Depending on your rack server setup, your request may timeout after a predermine
If you're using Unicorn, this is done by killing the worker process.
```
GET /-/chaos/cpuspin
GET /-/chaos/cpuspin?duration_s=50
GET /-/chaos/cpu_spin
GET /-/chaos/cpu_spin?duration_s=50
```
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | --------------------------------------------------------------------- |
| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
```bash
curl http://localhost:3000/-/chaos/cpu_spin?duration_s=60 --header 'X-Chaos-Secret: secret'
curl http://localhost:3000/-/chaos/cpu_spin?duration_s=60&token=secret
```
## DB spin
This endpoint attempts to fully utilise a single core, and interleave it with DB request, for the given period.
This endpoint can be used to model yielding execution to another threads when running concurrently.
Depending on your rack server setup, your request may timeout after a predermined period (normally 60 seconds).
If you're using Unicorn, this is done by killing the worker process.
```
GET /-/chaos/db_spin
GET /-/chaos/db_spin?duration_s=50
```
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | --------------------------------------------------------------------- |
| `interval_s` | float | no | Interval, in seconds, for every DB request. Defaults to 1s |
| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
```bash
curl http://localhost:3000/-/chaos/cpuspin?duration_s=60 --header 'X-Chaos-Secret: secret'
curl http://localhost:3000/-/chaos/db_spin?interval_s=1&duration_s=60 --header 'X-Chaos-Secret: secret'
curl http://localhost:3000/-/chaos/db_spin?interval_s=1&duration_s=60&token=secret
```
## Sleep
This endpoint is similar to the CPU Spin endpoint but simulates off-processor activity, such as network calls to backend services. It will sleep for a given duration.
This endpoint is similar to the CPU Spin endpoint but simulates off-processor activity, such as network calls to backend services. It will sleep for a given duration_s.
As with the CPU Spin endpoint, this may lead to your request timing out if duration exceeds the configured limit.
As with the CPU Spin endpoint, this may lead to your request timing out if duration_s exceeds the configured limit.
```
GET /-/chaos/sleep
......@@ -99,6 +120,7 @@ GET /-/chaos/sleep?duration_s=50
```bash
curl http://localhost:3000/-/chaos/sleep?duration_s=60 --header 'X-Chaos-Secret: secret'
curl http://localhost:3000/-/chaos/sleep?duration_s=60&token=secret
```
## Kill
......@@ -114,4 +136,5 @@ GET /-/chaos/kill
```bash
curl http://localhost:3000/-/chaos/kill --header 'X-Chaos-Secret: secret'
curl http://localhost:3000/-/chaos/kill?token=secret
```
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment