Commit 07858743 authored by Mikołaj Wawrzyniak's avatar Mikołaj Wawrzyniak

Merge branch 'mk/log-skip-sync-due-to-unhealthy-shard' into 'master'

Geo: Add logging to help diagnose why syncing is not working

See merge request gitlab-org/gitlab!58526
parents 92c9b69c b74dfc05
...@@ -24,7 +24,13 @@ module EachShardWorker ...@@ -24,7 +24,13 @@ module EachShardWorker
end end
def healthy_ready_shards def healthy_ready_shards
ready_shards.select(&:success) success_checks, failed_checks = ready_shards.partition(&:success)
if failed_checks.any?
::Gitlab::AppLogger.error(message: 'Excluding unhealthy shards', failed_checks: failed_checks.map(&:payload), class: self.class.name)
end
success_checks
end end
def ready_shards def ready_shards
......
...@@ -10,7 +10,10 @@ module Geo ...@@ -10,7 +10,10 @@ module Geo
def perform(shard_name) def perform(shard_name)
@shard_name = shard_name @shard_name = shard_name
return unless Gitlab::ShardHealthCache.healthy_shard?(shard_name) unless Gitlab::ShardHealthCache.healthy_shard?(shard_name)
log_error("Skipped scheduling syncs due to unhealthy shard", nil, { shard_name: shard_name })
return
end
super() super()
end end
......
...@@ -27,6 +27,8 @@ RSpec.describe Geo::DesignRepositoryShardSyncWorker, :geo, :clean_gitlab_redis_c ...@@ -27,6 +27,8 @@ RSpec.describe Geo::DesignRepositoryShardSyncWorker, :geo, :clean_gitlab_redis_c
it 'does not perform Geo::DesignRepositorySyncWorker when shard becomes unhealthy' do it 'does not perform Geo::DesignRepositorySyncWorker when shard becomes unhealthy' do
Gitlab::ShardHealthCache.update([]) Gitlab::ShardHealthCache.update([])
log_data = { message: "Skipped scheduling syncs due to unhealthy shard", shard_name: shard_name }
expect(Gitlab::Geo::Logger).to receive(:error).with(a_hash_including(log_data))
expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async) expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async)
subject.perform(shard_name) subject.perform(shard_name)
......
...@@ -24,6 +24,8 @@ RSpec.describe Geo::RepositoryShardSyncWorker, :geo, :clean_gitlab_redis_cache, ...@@ -24,6 +24,8 @@ RSpec.describe Geo::RepositoryShardSyncWorker, :geo, :clean_gitlab_redis_cache,
it 'does not perform Geo::ProjectSyncWorker when shard becomes unhealthy' do it 'does not perform Geo::ProjectSyncWorker when shard becomes unhealthy' do
Gitlab::ShardHealthCache.update([]) Gitlab::ShardHealthCache.update([])
log_data = { message: "Skipped scheduling syncs due to unhealthy shard", shard_name: shard_name }
expect(Gitlab::Geo::Logger).to receive(:error).with(a_hash_including(log_data))
expect(Geo::ProjectSyncWorker).not_to receive(:perform_async) expect(Geo::ProjectSyncWorker).not_to receive(:perform_async)
subject.perform(shard_name) subject.perform(shard_name)
......
...@@ -56,6 +56,13 @@ RSpec.describe Geo::Scheduler::PerShardSchedulerWorker do ...@@ -56,6 +56,13 @@ RSpec.describe Geo::Scheduler::PerShardSchedulerWorker do
it "returns an array of healthy shard names" do it "returns an array of healthy shard names" do
expect(per_shard_scheduler_worker.healthy_ready_shards).to eq(healthy_ready_shards) expect(per_shard_scheduler_worker.healthy_ready_shards).to eq(healthy_ready_shards)
end end
it "logs unhealthy shards" do
log_data = { message: "Excluding unhealthy shards", failed_checks: [{ labels: { shard: unhealthy_shard_name }, message: '14:Connect Failed', status: 'failed' }], class: described_class.name }
expect(Gitlab::AppLogger).to receive(:error).with(a_hash_including(log_data))
per_shard_scheduler_worker.healthy_ready_shards
end
end end
describe '#healthy_shard_names' do describe '#healthy_shard_names' do
......
...@@ -42,5 +42,12 @@ RSpec.describe RepositoryCheck::DispatchWorker do ...@@ -42,5 +42,12 @@ RSpec.describe RepositoryCheck::DispatchWorker do
subject.perform subject.perform
end end
it 'logs unhealthy shards' do
log_data = { message: "Excluding unhealthy shards", failed_checks: [{ labels: { shard: unhealthy_shard_name }, message: '14:Connect Failed', status: 'failed' }], class: described_class.name }
expect(Gitlab::AppLogger).to receive(:error).with(a_hash_including(log_data))
subject.perform
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment