Add query to return projects verified before minimum interval

To constantly re-verify repositories we should find projects
whe the last verification attempt happened before the minimum
re-verification interval. This interval says after how many
days (at least) a repository should be re-rverified.
parent 56a67316
...@@ -48,11 +48,18 @@ module Geo ...@@ -48,11 +48,18 @@ module Geo
.where(repository_never_verified) .where(repository_never_verified)
.limit(batch_size) .limit(batch_size)
relation = apply_shard_restriction(relation) if shard_name.present? apply_shard_restriction(relation)
relation
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
def find_reverifiable_repositories(interval:, batch_size:)
build_query_to_find_reverifiable_projects(type: :repository, interval: interval, batch_size: batch_size)
end
def find_reverifiable_wikis(interval:, batch_size:)
build_query_to_find_reverifiable_projects(type: :wiki, interval: interval, batch_size: batch_size)
end
def count_verified_repositories def count_verified_repositories
Project.verified_repos.count Project.verified_repos.count
end end
...@@ -84,8 +91,7 @@ module Geo ...@@ -84,8 +91,7 @@ module Geo
.and(repository_state_table["last_#{type}_verification_failure"].not_eq(nil)) .and(repository_state_table["last_#{type}_verification_failure"].not_eq(nil))
).take(batch_size) ).take(batch_size)
query = apply_shard_restriction(query) if shard_name.present? apply_shard_restriction(query)
query
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
...@@ -98,8 +104,29 @@ module Geo ...@@ -98,8 +104,29 @@ module Geo
.where(repository_outdated.or(wiki_outdated)) .where(repository_outdated.or(wiki_outdated))
.take(batch_size) .take(batch_size)
query = apply_shard_restriction(query) if shard_name.present? apply_shard_restriction(query)
query end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def build_query_to_find_reverifiable_projects(type:, interval:, batch_size:)
verification_succeded =
repository_state_table["#{type}_verification_checksum"].not_eq(nil)
.and(repository_state_table["last_#{type}_verification_failure"].eq(nil))
verified_before_interval =
repository_state_table["last_#{type}_verification_ran_at"].eq(nil).or(
repository_state_table["last_#{type}_verification_ran_at"].lteq(interval))
# We should prioritize less active projects first because high active
# projects have their repositories verified more frequently.
query =
Project.joins(:repository_state)
.where(verification_succeded.and(verified_before_interval))
.order(last_repository_updated_at_asc)
.limit(batch_size)
apply_shard_restriction(query)
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
...@@ -141,8 +168,10 @@ module Geo ...@@ -141,8 +168,10 @@ module Geo
end end
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def apply_shard_restriction(relation) def apply_shard_restriction(query)
relation.where(projects_table[:repository_storage].eq(shard_name)) return query unless shard_name.present?
query.where(projects_table[:repository_storage].eq(shard_name))
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
end end
......
...@@ -4,6 +4,7 @@ FactoryBot.define do ...@@ -4,6 +4,7 @@ FactoryBot.define do
trait :repository_failed do trait :repository_failed do
repository_verification_checksum nil repository_verification_checksum nil
last_repository_verification_ran_at { Time.now }
last_repository_verification_failure 'Could not calculate the checksum' last_repository_verification_failure 'Could not calculate the checksum'
repository_retry_count 1 repository_retry_count 1
repository_retry_at { 5.minutes.ago } repository_retry_at { 5.minutes.ago }
...@@ -11,11 +12,13 @@ FactoryBot.define do ...@@ -11,11 +12,13 @@ FactoryBot.define do
trait :repository_outdated do trait :repository_outdated do
repository_verification_checksum nil repository_verification_checksum nil
last_repository_verification_ran_at { 1.day.ago }
last_repository_verification_failure nil last_repository_verification_failure nil
end end
trait :repository_verified do trait :repository_verified do
repository_verification_checksum 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee' repository_verification_checksum 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee'
last_repository_verification_ran_at { 1.day.ago }
last_repository_verification_failure nil last_repository_verification_failure nil
repository_retry_count nil repository_retry_count nil
repository_retry_at nil repository_retry_at nil
...@@ -23,6 +26,7 @@ FactoryBot.define do ...@@ -23,6 +26,7 @@ FactoryBot.define do
trait :wiki_failed do trait :wiki_failed do
wiki_verification_checksum nil wiki_verification_checksum nil
last_wiki_verification_ran_at { Time.now }
last_wiki_verification_failure 'Could not calculate the checksum' last_wiki_verification_failure 'Could not calculate the checksum'
wiki_retry_count 1 wiki_retry_count 1
wiki_retry_at { 5.minutes.ago } wiki_retry_at { 5.minutes.ago }
...@@ -30,11 +34,13 @@ FactoryBot.define do ...@@ -30,11 +34,13 @@ FactoryBot.define do
trait :wiki_outdated do trait :wiki_outdated do
wiki_verification_checksum nil wiki_verification_checksum nil
last_wiki_verification_ran_at { 1.day.ago }
last_wiki_verification_failure nil last_wiki_verification_failure nil
end end
trait :wiki_verified do trait :wiki_verified do
wiki_verification_checksum 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef' wiki_verification_checksum 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef'
last_wiki_verification_ran_at { 1.day.ago }
last_wiki_verification_failure nil last_wiki_verification_failure nil
wiki_retry_count nil wiki_retry_count nil
wiki_retry_at nil wiki_retry_at nil
......
...@@ -188,4 +188,104 @@ describe Geo::RepositoryVerificationFinder, :postgresql do ...@@ -188,4 +188,104 @@ describe Geo::RepositoryVerificationFinder, :postgresql do
end end
end end
end end
describe '#find_reverifiable_repositories' do
it 'returns projects where repository was verified before the minimum re-verification interval' do
project_recently_verified = create(:project)
create(:repository_state, :repository_verified, project: project, last_repository_verification_ran_at: 2.days.ago)
create(:repository_state, :repository_verified, project: project_recently_verified, last_repository_verification_ran_at: Time.now)
expect(subject.find_reverifiable_repositories(interval: 1.day.ago, batch_size: 10))
.to match_array(project)
end
it 'does not return projects where repository verification is outdated' do
create(:repository_state, :repository_outdated, project: project, last_repository_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_repositories(interval: 1.day.ago, batch_size: 10))
.to be_empty
end
it 'does not return projects where repository verification failed' do
create(:repository_state, :repository_failed, project: project, last_repository_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_repositories(interval: 1.day.ago, batch_size: 10))
.to be_empty
end
it 'returns less active projects first' do
less_active_project = create(:project)
create(:repository_state, :repository_verified, project: project, last_repository_verification_ran_at: 2.days.ago)
create(:repository_state, :repository_verified, project: less_active_project, last_repository_verification_ran_at: 2.days.ago)
project.update_column(:last_repository_updated_at, 30.minutes.ago)
less_active_project.update_column(:last_repository_updated_at, 2.days.ago)
expect(subject.find_reverifiable_repositories(interval: 1.day.ago, batch_size: 10))
.to eq [less_active_project, project]
end
context 'with shard restriction' do
subject { described_class.new(shard_name: project.repository_storage) }
it 'does not return projects on other shards' do
project_other_shard = create(:project)
project_other_shard.update_column(:repository_storage, 'other')
create(:repository_state, :repository_verified, project: project, last_repository_verification_ran_at: 2.days.ago)
create(:repository_state, :repository_verified, project: project_other_shard, last_repository_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_repositories(interval: 1.day.ago, batch_size: 10))
.to match_array(project)
end
end
end
describe '#find_reverifiable_wikis' do
it 'returns projects where wiki was verified before the minimum re-verification interval' do
project_recently_verified = create(:project)
create(:repository_state, :wiki_verified, project: project, last_wiki_verification_ran_at: 2.days.ago)
create(:repository_state, :wiki_verified, project: project_recently_verified, last_wiki_verification_ran_at: Time.now)
expect(subject.find_reverifiable_wikis(interval: 1.day.ago, batch_size: 10))
.to match_array(project)
end
it 'does not return projects where wiki verification is outdated' do
create(:repository_state, :wiki_outdated, project: project, last_wiki_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_wikis(interval: 1.day.ago, batch_size: 10))
.to be_empty
end
it 'does not return projects where wiki verification failed' do
create(:repository_state, :wiki_failed, project: project, last_wiki_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_wikis(interval: 1.day.ago, batch_size: 10))
.to be_empty
end
it 'returns less active projects first' do
less_active_project = create(:project)
create(:repository_state, :wiki_verified, project: project, last_wiki_verification_ran_at: 2.days.ago)
create(:repository_state, :wiki_verified, project: less_active_project, last_wiki_verification_ran_at: 2.days.ago)
project.update_column(:last_repository_updated_at, 30.minutes.ago)
less_active_project.update_column(:last_repository_updated_at, 2.days.ago)
expect(subject.find_reverifiable_wikis(interval: 1.day.ago, batch_size: 10))
.to eq [less_active_project, project]
end
context 'with shard restriction' do
subject { described_class.new(shard_name: project.repository_storage) }
it 'does not return projects on other shards' do
project_other_shard = create(:project)
project_other_shard.update_column(:repository_storage, 'other')
create(:repository_state, :wiki_verified, project: project, last_wiki_verification_ran_at: 2.days.ago)
create(:repository_state, :wiki_verified, project: project_other_shard, last_wiki_verification_ran_at: 2.days.ago)
expect(subject.find_reverifiable_wikis(interval: 1.day.ago, batch_size: 10))
.to match_array(project)
end
end
end
end end
...@@ -4,10 +4,42 @@ require 'spec_helper' ...@@ -4,10 +4,42 @@ require 'spec_helper'
describe Gitlab::BackgroundMigration::ResetChecksumFromProjectRepositoryStates, :migration, schema: 20180914195058 do describe Gitlab::BackgroundMigration::ResetChecksumFromProjectRepositoryStates, :migration, schema: 20180914195058 do
describe '#perform' do describe '#perform' do
let(:users) { table(:users) }
let(:projects) { table(:projects) }
let(:repository_states) { table(:project_repository_states) }
def create_repository_state(params = {})
attrs = {
repository_verification_checksum: 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee',
last_repository_verification_failure: nil,
repository_retry_count: nil,
repository_retry_at: nil,
wiki_verification_checksum: 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef',
last_wiki_verification_failure: nil,
wiki_retry_count: nil,
wiki_retry_at: nil
}.merge(params)
repository_states.create!(attrs)
end
it 'processes all repository states in batch' do it 'processes all repository states in batch' do
repository_state_1 = create(:repository_state, :repository_verified, :wiki_verified) users.create!(email: 'test@example.com', projects_limit: 100, username: 'test')
repository_state_2 = create(:repository_state, :repository_failed, :wiki_failed) projects.create!(id: 1, name: 'project-1', path: 'project-1', visibility_level: 0, namespace_id: 1)
repository_state_3 = create(:repository_state, :repository_verified, :wiki_verified) projects.create!(id: 2, name: 'project-2', path: 'project-2', visibility_level: 0, namespace_id: 1)
projects.create!(id: 3, name: 'project-3', path: 'project-3', visibility_level: 0, namespace_id: 1)
repository_state_1 = create_repository_state(project_id: 1)
repository_state_2 = create_repository_state(
project_id: 2,
wiki_verification_checksum: nil,
last_wiki_verification_failure: 'Could not calculate the checksum',
wiki_retry_count: 1,
wiki_retry_at: Time.now + 5.minutes
)
repository_state_3 = create_repository_state(project_id: 3)
subject.perform(repository_state_1.project_id, repository_state_2.project_id) subject.perform(repository_state_1.project_id, repository_state_2.project_id)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment