Commit 07858743 authored by Mikołaj Wawrzyniak's avatar Mikołaj Wawrzyniak

Merge branch 'mk/log-skip-sync-due-to-unhealthy-shard' into 'master'

Geo: Add logging to help diagnose why syncing is not working

See merge request gitlab-org/gitlab!58526
parents 92c9b69c b74dfc05
......@@ -24,7 +24,13 @@ module EachShardWorker
end
def healthy_ready_shards
ready_shards.select(&:success)
success_checks, failed_checks = ready_shards.partition(&:success)
if failed_checks.any?
::Gitlab::AppLogger.error(message: 'Excluding unhealthy shards', failed_checks: failed_checks.map(&:payload), class: self.class.name)
end
success_checks
end
def ready_shards
......
......@@ -10,7 +10,10 @@ module Geo
def perform(shard_name)
@shard_name = shard_name
return unless Gitlab::ShardHealthCache.healthy_shard?(shard_name)
unless Gitlab::ShardHealthCache.healthy_shard?(shard_name)
log_error("Skipped scheduling syncs due to unhealthy shard", nil, { shard_name: shard_name })
return
end
super()
end
......
......@@ -27,6 +27,8 @@ RSpec.describe Geo::DesignRepositoryShardSyncWorker, :geo, :clean_gitlab_redis_c
it 'does not perform Geo::DesignRepositorySyncWorker when shard becomes unhealthy' do
Gitlab::ShardHealthCache.update([])
log_data = { message: "Skipped scheduling syncs due to unhealthy shard", shard_name: shard_name }
expect(Gitlab::Geo::Logger).to receive(:error).with(a_hash_including(log_data))
expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async)
subject.perform(shard_name)
......
......@@ -24,6 +24,8 @@ RSpec.describe Geo::RepositoryShardSyncWorker, :geo, :clean_gitlab_redis_cache,
it 'does not perform Geo::ProjectSyncWorker when shard becomes unhealthy' do
Gitlab::ShardHealthCache.update([])
log_data = { message: "Skipped scheduling syncs due to unhealthy shard", shard_name: shard_name }
expect(Gitlab::Geo::Logger).to receive(:error).with(a_hash_including(log_data))
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async)
subject.perform(shard_name)
......
......@@ -56,6 +56,13 @@ RSpec.describe Geo::Scheduler::PerShardSchedulerWorker do
it "returns an array of healthy shard names" do
expect(per_shard_scheduler_worker.healthy_ready_shards).to eq(healthy_ready_shards)
end
it "logs unhealthy shards" do
log_data = { message: "Excluding unhealthy shards", failed_checks: [{ labels: { shard: unhealthy_shard_name }, message: '14:Connect Failed', status: 'failed' }], class: described_class.name }
expect(Gitlab::AppLogger).to receive(:error).with(a_hash_including(log_data))
per_shard_scheduler_worker.healthy_ready_shards
end
end
describe '#healthy_shard_names' do
......
......@@ -42,5 +42,12 @@ RSpec.describe RepositoryCheck::DispatchWorker do
subject.perform
end
it 'logs unhealthy shards' do
log_data = { message: "Excluding unhealthy shards", failed_checks: [{ labels: { shard: unhealthy_shard_name }, message: '14:Connect Failed', status: 'failed' }], class: described_class.name }
expect(Gitlab::AppLogger).to receive(:error).with(a_hash_including(log_data))
subject.perform
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment