Commit 35b58684 authored by Ramya Authappan's avatar Ramya Authappan

Merge branch 'ml-rename-gitaly-ha-to-cluster' into 'master'

Rename Gitaly HA to Gitaly Cluster in E2E tests

Closes gitlab-org/quality/team-tasks#573

See merge request gitlab-org/gitlab!38251
parents e0184e88 e794bf68
......@@ -17,6 +17,7 @@ gem 'knapsack', '~> 1.17'
gem 'parallel_tests', '~> 2.29'
gem 'rotp', '~> 3.1.0'
gem 'timecop', '~> 0.9.1'
gem "parallel", "~> 1.19"
group :development do
gem 'pry-byebug', '~> 3.5.1', platform: :mri
......
......@@ -126,6 +126,7 @@ DEPENDENCIES
gitlab-qa
knapsack (~> 1.17)
nokogiri (~> 1.10.9)
parallel (~> 1.19)
parallel_tests (~> 2.29)
pry-byebug (~> 3.5.1)
rake (~> 12.3.3)
......
......@@ -151,7 +151,6 @@ module QA
autoload :Mattermost, 'qa/scenario/test/integration/mattermost'
autoload :ObjectStorage, 'qa/scenario/test/integration/object_storage'
autoload :SMTP, 'qa/scenario/test/integration/smtp'
autoload :GitalyHA, 'qa/scenario/test/integration/gitaly_ha'
end
module Sanity
......
# frozen_string_literal: true
module QA
module Scenario
module Test
module Integration
class GitalyHA < Test::Instance::All
tags :gitaly_ha
end
end
end
end
end
......@@ -10,7 +10,7 @@ module QA
PrometheusQueryError = Class.new(StandardError)
def initialize
@gitlab = 'gitlab-gitaly-ha'
@gitlab = 'gitlab-gitaly-cluster'
@praefect = 'praefect'
@postgres = 'postgres'
@primary_node = 'gitaly1'
......@@ -28,7 +28,7 @@ module QA
def replicated?(project_id)
Support::Retrier.retry_until(raise_on_failure: false) do
replicas = wait_until_shell_command(%(docker exec gitlab-gitaly-ha bash -c 'gitlab-rake "gitlab:praefect:replicas[#{project_id}]"')) do |line|
replicas = wait_until_shell_command(%(docker exec #{@gitlab} bash -c 'gitlab-rake "gitlab:praefect:replicas[#{project_id}]"')) do |line|
QA::Runtime::Logger.debug(line.chomp)
# The output of the rake task looks something like this:
#
......@@ -77,6 +77,7 @@ module QA
def trigger_failover_by_stopping_primary_node
QA::Runtime::Logger.info("Stopping node #{@primary_node} to trigger failover")
stop_node(@primary_node)
wait_for_new_primary
end
def clear_replication_queue
......@@ -121,7 +122,7 @@ module QA
end
def query_read_distribution
output = shell "docker exec gitlab-gitaly-ha bash -c 'curl -s http://localhost:9090/api/v1/query?query=gitaly_praefect_read_distribution'" do |line|
output = shell "docker exec #{@gitlab} bash -c 'curl -s http://localhost:9090/api/v1/query?query=gitaly_praefect_read_distribution'" do |line|
QA::Runtime::Logger.debug(line)
break line
end
......@@ -179,15 +180,6 @@ module QA
wait_for_reliable_connection
end
def reset_cluster
QA::Runtime::Logger.info('Reset Gitaly Cluster by starting all nodes and enabling writes')
start_node(@praefect)
start_node(@primary_node)
start_node(@secondary_node)
start_node(@tertiary_node)
wait_for_health_check_all_nodes
end
def verify_storage_move(source_storage, destination_storage)
return if QA::Runtime::Env.dot_com?
......@@ -346,7 +338,7 @@ module QA
end
def value_for_node(data, node)
data.find(-> {0}) { |item| item[:node] == node }[:value]
data.find(-> {{ value: 0 }}) { |item| item[:node] == node }[:value]
end
def wait_for_reliable_connection
......
# frozen_string_literal: true
module QA
RSpec.describe 'Create' do
context 'Gitaly automatic failover and manual recovery', :orchestrated, :gitaly_cluster do
# Variables shared between contexts. They're used and shared between
# contexts so they can't be `let` variables.
praefect_manager = Service::PraefectManager.new
project = nil
let(:intial_commit_message) { 'Initial commit' }
let(:first_added_commit_message) { 'pushed to primary gitaly node' }
let(:second_added_commit_message) { 'commit to failover node' }
before(:context) do
# Reset the cluster in case previous tests left it in a bad state
praefect_manager.reset_primary_to_original
project = Resource::Project.fabricate! do |project|
project.name = "gitaly_cluster"
project.initialize_with_readme = true
end
end
after(:context) do
# Leave the cluster in a suitable state for subsequent tests,
# if there was a problem during the tests here
praefect_manager.reset_primary_to_original
end
it 'automatically fails over' do
# Create a new project with a commit and wait for it to replicate
Resource::Repository::ProjectPush.fabricate! do |push|
push.project = project
push.commit_message = first_added_commit_message
push.new_branch = false
push.file_content = "This should exist on both nodes"
end
praefect_manager.wait_for_replication(project.id)
# Stop the primary node to trigger failover, and then wait
# for Gitaly to be ready for writes again
praefect_manager.trigger_failover_by_stopping_primary_node
praefect_manager.wait_for_new_primary
praefect_manager.wait_for_health_check_current_primary_node
praefect_manager.wait_for_gitaly_check
Resource::Repository::Commit.fabricate_via_api! do |commit|
commit.project = project
commit.commit_message = second_added_commit_message
commit.add_files([
{
file_path: "file-#{SecureRandom.hex(8)}",
content: 'This should exist on one node before reconciliation'
}
])
end
# Confirm that we have access to the repo after failover,
# including the commit we just added
expect(project.commits.map { |commit| commit[:message].chomp })
.to include(intial_commit_message)
.and include(first_added_commit_message)
.and include(second_added_commit_message)
end
context 'when recovering from dataloss after failover' do
it 'allows reconciliation' do
# Start the old primary node again
praefect_manager.start_primary_node
praefect_manager.wait_for_health_check_current_primary_node
# Confirm dataloss (i.e., inconsistent nodes)
expect(praefect_manager.replicated?(project.id)).to be false
# Reconcile nodes to recover from dataloss
praefect_manager.reconcile_nodes
praefect_manager.wait_for_replication(project.id)
# Confirm that all commits are available after reconciliation
expect(project.commits.map { |commit| commit[:message].chomp })
.to include(intial_commit_message)
.and include(first_added_commit_message)
.and include(second_added_commit_message)
# Restore the original primary node
praefect_manager.reset_primary_to_original
# Check that all commits are still available even though the primary
# node was offline when one was made
expect(project.commits.map { |commit| commit[:message].chomp })
.to include(intial_commit_message)
.and include(first_added_commit_message)
.and include(second_added_commit_message)
end
end
end
end
end
......@@ -3,7 +3,7 @@
module QA
RSpec.describe 'Create' do
context 'Gitaly' do
describe 'Backend node recovery', :orchestrated, :gitaly_ha, :skip_live_env do
describe 'Backend node recovery', :orchestrated, :gitaly_cluster, :skip_live_env do
let(:praefect_manager) { Service::PraefectManager.new }
let(:project) do
Resource::Project.fabricate! do |project|
......
......@@ -6,7 +6,7 @@ module QA
RSpec.describe 'Create' do
context 'Gitaly' do
# Issue to track removal of feature flag: https://gitlab.com/gitlab-org/quality/team-tasks/-/issues/602
describe 'Distributed reads', :orchestrated, :gitaly_ha, :skip_live_env, :requires_admin do
describe 'Distributed reads', :orchestrated, :gitaly_cluster, :skip_live_env, :requires_admin do
let(:number_of_reads) { 100 }
let(:praefect_manager) { Service::PraefectManager.new }
let(:project) do
......
......@@ -4,7 +4,7 @@ require 'parallel'
module QA
RSpec.describe 'Create' do
context 'Gitaly Cluster replication queue', :orchestrated, :gitaly_ha, :skip_live_env, quarantine: { issue: 'https://gitlab.com/gitlab-org/quality/pipeline-triage/-/issues/39#note_388590227', type: :stale } do
context 'Gitaly Cluster replication queue', :orchestrated, :gitaly_cluster, :skip_live_env do
let(:praefect_manager) { Service::PraefectManager.new }
let(:project) do
Resource::Project.fabricate! do |project|
......@@ -14,7 +14,8 @@ module QA
end
after do
praefect_manager.reset_cluster
praefect_manager.start_praefect
praefect_manager.wait_for_reliable_connection
praefect_manager.clear_replication_queue
end
......
# frozen_string_literal: true
module QA
RSpec.describe 'Create' do
context 'Gitaly' do
describe 'High Availability', :orchestrated, :gitaly_ha, quarantine: { issue: 'https://gitlab.com/gitlab-org/quality/pipeline-triage/-/issues/39#note_388590227', type: :stale } do
let(:project) do
Resource::Project.fabricate! do |project|
project.name = 'gitaly_high_availability'
end
end
let(:initial_file) { 'pushed_to_primary.txt' }
let(:final_file) { 'committed_to_primary.txt' }
let(:praefect_manager) { Service::PraefectManager.new }
before do
Flow::Login.sign_in
end
after do
praefect_manager.reset_cluster
end
it 'makes sure that automatic failover is happening' do
Resource::Repository::ProjectPush.fabricate! do |push|
push.project = project
push.commit_message = 'pushed to primary gitaly node'
push.new_branch = true
push.file_name = initial_file
push.file_content = "This should exist on both nodes"
end
praefect_manager.trigger_failover_by_stopping_primary_node
project.visit!
Page::Project::Show.perform do |show|
show.wait_until do
show.has_name?(project.name)
end
expect(show).to have_file(initial_file)
end
Resource::Repository::Commit.fabricate_via_api! do |commit|
commit.project = project
commit.add_files([
{
file_path: final_file,
content: 'This should exist on both nodes too'
}
])
end
project.visit!
Page::Project::Show.perform do |show|
expect(show).to have_file(final_file)
end
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment