Commit 2a2579fc authored by Ramya Authappan's avatar Ramya Authappan

Merge branch 'jmd-improve-gitaly-e2e-stop-start-times' into 'master'

Use docker pause instead of stop to minimise test downtime

See merge request gitlab-org/gitlab!79912
parents 83da1103 68ca6605
......@@ -50,6 +50,7 @@ module QA
def stop_primary_node
stop_node(@primary_node)
wait_until_node_is_removed_from_healthy_storages(@primary_node)
end
def start_primary_node
......@@ -67,6 +68,7 @@ module QA
def stop_secondary_node
stop_node(@secondary_node)
wait_until_node_is_removed_from_healthy_storages(@stop_secondary_node)
end
def start_secondary_node
......@@ -75,6 +77,7 @@ module QA
def stop_tertiary_node
stop_node(@tertiary_node)
wait_until_node_is_removed_from_healthy_storages(@tertiary_node)
end
def start_tertiary_node
......@@ -82,20 +85,39 @@ module QA
end
def start_node(name)
shell "docker start #{name}"
end
state = node_state(name)
return if state == "running"
if state == "paused"
shell "docker unpause #{name}"
end
if state == "stopped"
shell "docker start #{name}"
end
def stop_node(name)
shell "docker stop #{name}"
wait_until_shell_command_matches(
"docker inspect -f {{.State.Running}} #{name}",
/false/,
/true/,
sleep_interval: 3,
max_duration: 180,
retry_on_exception: true
)
end
def stop_node(name)
shell "docker pause #{name}"
end
def node_state(name)
state = "stopped"
wait_until_shell_command("docker inspect -f {{.State.Status}} #{name}") do |line|
QA::Runtime::Logger.debug(line)
break state = "running" if line.include?("running")
break state = "paused" if line.include?("paused")
end
end
def clear_replication_queue
QA::Runtime::Logger.info("Clearing the replication queue")
shell sql_to_docker_exec_cmd(
......@@ -204,9 +226,8 @@ module QA
def wait_for_praefect
QA::Runtime::Logger.info("Waiting for health check on praefect")
Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do
# praefect runs a grpc server on port 2305, which will return an error 'Connection refused' until such time it is ready
wait_until_shell_command("docker exec #{@gitaly_cluster} bash -c 'curl #{@praefect}:2305'") do |line|
break if line.include?('curl: (1) Received HTTP/0.9 when not allowed')
wait_until_shell_command("docker exec #{@praefect} gitlab-ctl status praefect") do |line|
break true if line.include?('run: praefect: ')
QA::Runtime::Logger.debug(line.chomp)
end
......@@ -269,9 +290,8 @@ module QA
def wait_for_gitaly_health_check(node)
QA::Runtime::Logger.info("Waiting for health check on #{node}")
Support::Waiter.wait_until(max_duration: 120, sleep_interval: 1, raise_on_failure: true) do
# gitaly runs a grpc server on port 8075, which will return an error 'Connection refused' until such time it is ready
wait_until_shell_command("docker exec #{@praefect} bash -c 'curl #{node}:8075'") do |line|
break if line.include?('curl: (1) Received HTTP/0.9 when not allowed')
wait_until_shell_command("docker exec #{node} gitlab-ctl status gitaly") do |line|
break true if line.include?('run: gitaly: ')
QA::Runtime::Logger.debug(line.chomp)
end
......
......@@ -9,37 +9,30 @@ module QA
project = nil
let(:intial_commit_message) { 'Initial commit' }
let(:first_added_commit_message) { 'pushed to primary gitaly node' }
let(:second_added_commit_message) { 'commit to failover node' }
let(:first_added_commit_message) { 'first_added_commit_message to primary gitaly node' }
let(:second_added_commit_message) { 'second_added_commit_message to failover node' }
before(:context) do
# Reset the cluster in case previous tests left it in a bad state
praefect_manager.start_all_nodes
project = Resource::Project.fabricate! do |project|
project.name = "gitaly_cluster"
project.initialize_with_readme = true
end
end
after do
praefect_manager.start_all_nodes
# We need to ensure that the the project is replicated to all nodes before proceeding with this test
praefect_manager.wait_for_replication(project.id)
end
it 'automatically fails over', testcase: 'https://gitlab.com/gitlab-org/gitlab/-/quality/test_cases/347830' do
# Create a new project with a commit and wait for it to replicate
# make sure that our project is published to the 'primary' node
# stop other nodes, so we can control which node the commit is sent to
praefect_manager.stop_secondary_node
praefect_manager.stop_tertiary_node
praefect_manager.wait_for_secondary_node_health_check_failure
praefect_manager.wait_for_tertiary_node_health_check_failure
Resource::Repository::ProjectPush.fabricate! do |push|
push.project = project
push.commit_message = first_added_commit_message
push.new_branch = false
push.file_content = "This should exist on all nodes"
push.file_content = 'This file created on gitaly1 while gitaly2/gitaly3 not running'
end
praefect_manager.start_all_nodes
......@@ -56,7 +49,7 @@ module QA
commit.add_files([
{
file_path: "file-#{SecureRandom.hex(8)}",
content: 'This should exist on one node before reconciliation'
content: 'This is created on gitaly2/gitaly3 while gitaly1 is unavailable'
}
])
end
......
......@@ -4,7 +4,7 @@ require 'parallel'
module QA
RSpec.describe 'Create' do
context 'Gitaly Cluster replication queue', :orchestrated, :gitaly_cluster, :skip_live_env, quarantine: { issue: 'https://gitlab.com/gitlab-org/gitlab/-/issues/346453', type: :flaky } do
context 'Gitaly Cluster replication queue', :orchestrated, :gitaly_cluster, :skip_live_env do
let(:praefect_manager) { Service::PraefectManager.new }
let(:project) do
Resource::Project.fabricate! do |project|
......@@ -15,12 +15,10 @@ module QA
before do
praefect_manager.start_all_nodes
praefect_manager.start_praefect
end
after do
praefect_manager.start_all_nodes
praefect_manager.start_praefect
praefect_manager.clear_replication_queue
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment