Commit cab085e8 authored by Mark Lapierre's avatar Mark Lapierre

Update stale E2E test - automatic replication

parent 9c8f090c
......@@ -353,15 +353,47 @@ module QA
Support::Waiter.wait_until(sleep_interval: 1) { replication_queue_incomplete_count == 0 && replicated?(project_id) }
end
def replication_pending?
result = []
shell sql_to_docker_exec_cmd(
<<~SQL
select job from replication_queue
where state = 'ready'
and job ->> 'change' = 'update'
and job ->> 'source_node_storage' = '#{current_primary_node}'
and job ->> 'target_node_storage' = '#{@primary_node}';
SQL
) do |line|
result << line
end
# The result looks like:
#
# job
# -----------
# {"change": "update", "params": null, "relative_path": "@hashed/4b/22/4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328cb08b5531fcacdabf8a.git", "virtual_storage": "default", "source_node_storage": "gitaly3", "target_node_storage": "gitaly1"}
# (1 row)
# <blank row>
#
# Therefore when replication is pending there is at least 1 row of data plus 4 rows of metadata/layout
result.size >= 5
end
private
def current_primary_node
shell dataloss_command do |line|
QA::Runtime::Logger.debug(line.chomp)
match = line.match(/Primary: (.*)/)
break match[1] if match
result = []
shell sql_to_docker_exec_cmd("select node_name from shard_primaries where shard_name = '#{@virtual_storage}';") do |line|
result << line
end
# The result looks like:
# node_name
# -----------
# gitaly1
# (1 row)
result[2].strip
end
def dataloss_command
......
......@@ -3,7 +3,7 @@
module QA
RSpec.describe 'Create' do
context 'Gitaly' do
describe 'Backend node recovery', :orchestrated, :gitaly_cluster, :skip_live_env, quarantine: { issue: 'https://gitlab.com/gitlab-org/gitlab/-/issues/238186', type: :investigating } do
describe 'Backend node recovery', :orchestrated, :gitaly_cluster, :skip_live_env do
let(:praefect_manager) { Service::PraefectManager.new }
let(:project) do
Resource::Project.fabricate! do |project|
......@@ -50,18 +50,17 @@ module QA
push.file_content = 'new file'
end
# Confirm that the commit is waiting to be replicated
expect(praefect_manager).to be_replication_pending
# Start the old primary node again
praefect_manager.start_primary_node
praefect_manager.wait_for_health_check_current_primary_node
# Confirm dataloss (i.e., inconsistent nodes)
expect(praefect_manager.replicated?(project.id)).to be false
praefect_manager.wait_for_health_check_all_nodes
# Reconcile nodes to recover from dataloss
praefect_manager.reconcile_nodes
# Wait for automatic replication
praefect_manager.wait_for_replication(project.id)
# Confirm that both commits are available after reconciliation
# Confirm that both commits are available
expect(project.commits.map { |commit| commit[:message].chomp })
.to include("Initial commit").and include("pushed after failover")
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment