Commit e26a80d1 authored by Douwe Maan's avatar Douwe Maan

Merge branch '2467-repository-sync-dirty-projects' into 'master'

Perform Geo::RepositorySyncService for dirty projects

See merge request !2130
parents aa29e639 e1b905fe
......@@ -3,6 +3,11 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
validates :project, presence: true
scope :dirty, -> { where(arel_table[:resync_repository].eq(true).or(arel_table[:resync_wiki].eq(true))) }
scope :failed, -> { where.not(last_repository_synced_at: nil).where(last_repository_successful_sync_at: nil) }
scope :synced, -> { where.not(last_repository_synced_at: nil, last_repository_successful_sync_at: nil) }
def self.synced
where.not(last_repository_synced_at: nil, last_repository_successful_sync_at: nil)
.where(resync_repository: false, resync_wiki: false)
end
end
......@@ -1095,6 +1095,10 @@ class Project < ActiveRecord::Base
end
end
def ensure_repository
create_repository unless repository_exists?
end
def repository_exists?
!!repository.exists?
end
......
......@@ -162,6 +162,10 @@ class ProjectWiki
wiki
end
def ensure_repository
create_repo! unless repository_exists?
end
def hook_attrs
{
web_url: web_url,
......
......@@ -12,8 +12,8 @@ module Geo
def execute
try_obtain_lease do
log('Started repository sync')
started_at, finished_at = fetch_repositories
update_registry(started_at, finished_at)
sync_project_repository
sync_wiki_repository
log('Finished repository sync')
end
rescue ActiveRecord::RecordNotFound
......@@ -26,14 +26,47 @@ module Geo
@project ||= Project.find(project_id)
end
def fetch_repositories
def registry
@registry ||= Geo::ProjectRegistry.find_or_initialize_by(project_id: project_id)
end
def sync_project_repository
return unless sync_repository?
started_at, finished_at = fetch_project_repository
update_registry(:repository, started_at, finished_at)
expire_repository_caches
end
def sync_repository?
registry.resync_repository? ||
registry.last_repository_successful_sync_at.nil? ||
registry.last_repository_synced_at.nil?
end
def sync_wiki_repository
return unless sync_wiki?
started_at, finished_at = fetch_wiki_repository
update_registry(:wiki, started_at, finished_at)
end
def sync_wiki?
registry.resync_wiki? ||
registry.last_wiki_successful_sync_at.nil? ||
registry.last_wiki_synced_at.nil?
end
def fetch_project_repository
return unless sync_repository?
log('Fetching project repository')
started_at = DateTime.now
finished_at = nil
begin
fetch_project_repository
fetch_wiki_repository
expire_repository_caches
project.ensure_repository
project.repository.fetch_geo_mirror(ssh_url_to_repo)
finished_at = DateTime.now
rescue Gitlab::Shell::Error => e
......@@ -47,18 +80,23 @@ module Geo
[started_at, finished_at]
end
def fetch_project_repository
log('Fetching project repository')
project.create_repository unless project.repository_exists?
project.repository.fetch_geo_mirror(ssh_url_to_repo)
end
def fetch_wiki_repository
# Second .wiki call returns a Gollum::Wiki, and it will always create the physical repository when not found
if project.wiki.wiki.exist?
log('Fetching wiki repository')
return unless sync_wiki?
log('Fetching wiki repository')
started_at = DateTime.now
finished_at = nil
begin
project.wiki.ensure_repository
project.wiki.repository.fetch_geo_mirror(ssh_url_to_wiki)
finished_at = DateTime.now
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error, ProjectWiki::CouldNotCreateWikiError => e
Rails.logger.error("#{self.class.name}: Error syncing wiki repository for project #{project.path_with_namespace}: #{e}")
end
[started_at, finished_at]
end
def expire_repository_caches
......@@ -84,11 +122,15 @@ module Geo
Gitlab::ExclusiveLease.cancel(lease_key, repository_lease)
end
def update_registry(started_at, finished_at)
log('Updating repository sync information')
registry = Geo::ProjectRegistry.find_or_initialize_by(project_id: project_id)
registry.last_repository_synced_at = started_at
registry.last_repository_successful_sync_at = finished_at if finished_at
def update_registry(type, started_at, finished_at)
log("Updating #{type} sync information")
registry.public_send("last_#{type}_synced_at=", started_at)
if finished_at
registry.public_send("last_#{type}_successful_sync_at=", finished_at)
registry.public_send("resync_#{type}=", false)
end
registry.save
end
......
......@@ -12,7 +12,7 @@ class GeoRepositorySyncWorker
start_time = Time.now
project_ids_not_synced = find_project_ids_not_synced
project_ids_updated_recently = find_synced_project_ids_updated_recently
project_ids_updated_recently = find_project_ids_updated_recently
project_ids = interleave(project_ids_not_synced, project_ids_updated_recently)
logger.info "Started Geo repository syncing for #{project_ids.length} project(s)"
......@@ -43,30 +43,24 @@ class GeoRepositorySyncWorker
def find_project_ids_not_synced
Project.where.not(id: Geo::ProjectRegistry.synced.pluck(:project_id))
.order(last_repository_updated_at: :desc)
.limit(BATCH_SIZE)
.pluck(:id)
end
def find_synced_project_ids_updated_recently
Geo::ProjectRegistry.where(project_id: find_project_ids_updated_recently)
.where('last_repository_synced_at <= ?', LAST_SYNC_INTERVAL.ago)
.order(last_repository_synced_at: :asc)
def find_project_ids_updated_recently
Geo::ProjectRegistry.dirty
.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at, :desc))
.limit(BATCH_SIZE)
.pluck(:project_id)
end
def find_project_ids_updated_recently
Project.where(id: Geo::ProjectRegistry.synced.pluck(:project_id))
.where('last_repository_updated_at >= ?', LAST_SYNC_INTERVAL.ago)
.pluck(:id)
end
def interleave(first, second)
if first.length >= second.length
first.zip(second)
else
second.zip(first).map(&:reverse)
end.flatten(1).compact.take(BATCH_SIZE)
end.flatten(1).uniq.compact.take(BATCH_SIZE)
end
def over_time?(start_time)
......
class AddLastWikiSyncedAtToProjectRegistry < ActiveRecord::Migration
DOWNTIME = false
def change
add_column :project_registry, :last_wiki_synced_at, :datetime
add_column :project_registry, :last_wiki_successful_sync_at, :datetime
end
end
class AddIndexToProjectRegistry < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :project_registry, :last_repository_synced_at
add_concurrent_index :project_registry, :last_repository_successful_sync_at
add_concurrent_index :project_registry, :resync_repository
add_concurrent_index :project_registry, :resync_wiki
end
def down
remove_concurrent_index :project_registry, :last_repository_synced_at if index_exists?(:project_registry, :last_repository_synced_at)
remove_concurrent_index :project_registry, :last_repository_successful_sync_at if index_exists?(:project_registry, :last_repository_successful_sync_at)
remove_concurrent_index :project_registry, :resync_repository if index_exists?(:project_registry, :resync_repository)
remove_concurrent_index :project_registry, :resync_wiki if index_exists?(:project_registry, :resync_wiki)
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20170606155045) do
ActiveRecord::Schema.define(version: 20170627195211) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -37,8 +37,14 @@ ActiveRecord::Schema.define(version: 20170606155045) do
t.datetime "created_at", null: false
t.boolean "resync_repository", default: true, null: false
t.boolean "resync_wiki", default: true, null: false
t.datetime "last_wiki_synced_at"
t.datetime "last_wiki_successful_sync_at"
end
add_index "project_registry", ["last_repository_successful_sync_at"], name: "index_project_registry_on_last_repository_successful_sync_at", using: :btree
add_index "project_registry", ["last_repository_synced_at"], name: "index_project_registry_on_last_repository_synced_at", using: :btree
add_index "project_registry", ["project_id"], name: "index_project_registry_on_project_id", using: :btree
add_index "project_registry", ["resync_repository"], name: "index_project_registry_on_resync_repository", using: :btree
add_index "project_registry", ["resync_wiki"], name: "index_project_registry_on_resync_wiki", using: :btree
end
FactoryGirl.define do
factory :geo_project_registry, class: Geo::ProjectRegistry do
project factory: :empty_project
last_repository_synced_at nil
last_repository_successful_sync_at nil
last_wiki_synced_at nil
last_wiki_successful_sync_at nil
resync_repository true
resync_wiki true
trait :dirty do
resync_repository true
resync_wiki true
end
trait :repository_dirty do
resync_repository true
resync_wiki false
end
trait :wiki_dirty do
resync_repository false
resync_wiki true
end
trait :synced do
last_repository_synced_at { 5.days.ago }
last_repository_successful_sync_at { 5.days.ago }
last_wiki_synced_at { 5.days.ago }
last_wiki_successful_sync_at { 5.days.ago }
resync_repository false
resync_wiki false
end
trait :sync_failed do
last_repository_synced_at { 5.days.ago }
last_repository_successful_sync_at nil
last_wiki_synced_at { 5.days.ago }
last_wiki_successful_sync_at nil
resync_repository true
resync_wiki true
end
end
end
......@@ -8,4 +8,27 @@ describe Geo::ProjectRegistry, models: true do
describe 'validations' do
it { is_expected.to validate_presence_of(:project) }
end
describe '.synced' do
let(:project) { create(:empty_project) }
let(:synced_at) { Time.now }
it 'does not return dirty projects' do
create(:geo_project_registry, :synced, :dirty, project: project)
expect(described_class.synced).to be_empty
end
it 'does not return projects where last attempt to sync failed' do
create(:geo_project_registry, :sync_failed, project: project)
expect(described_class.synced).to be_empty
end
it 'returns synced projects' do
registry = create(:geo_project_registry, :synced, project: project)
expect(described_class.synced).to match_array([registry])
end
end
end
......@@ -1569,6 +1569,37 @@ describe Project, models: true do
end
end
describe '#ensure_repository' do
let(:project) { create(:project, :repository) }
let(:shell) { Gitlab::Shell.new }
before do
allow(project).to receive(:gitlab_shell).and_return(shell)
end
it 'creates the repository if it not exist' do
allow(project).to receive(:repository_exists?)
.and_return(false)
allow(shell).to receive(:add_repository)
.with(project.repository_storage_path, project.path_with_namespace)
.and_return(true)
expect(project).to receive(:create_repository)
project.ensure_repository
end
it 'does not create the repository if it exists' do
allow(project).to receive(:repository_exists?)
.and_return(true)
expect(project).not_to receive(:create_repository)
project.ensure_repository
end
end
describe 'handling import URL' do
context 'when project is a mirror' do
it 'returns the full URL' do
......
......@@ -286,6 +286,24 @@ describe ProjectWiki, models: true do
end
end
describe '#ensure_repository' do
it 'creates the repository if it not exist' do
allow(subject).to receive(:repository_exists?).and_return(false)
expect(subject).to receive(:create_repo!)
subject.ensure_repository
end
it 'does not create the repository if it exists' do
allow(subject).to receive(:repository_exists?).and_return(true)
expect(subject).not_to receive(:create_repo!)
subject.ensure_repository
end
end
describe '#hook_attrs' do
it 'returns a hash with values' do
expect(subject.hook_attrs).to be_a Hash
......
......@@ -26,7 +26,15 @@ describe GeoRepositorySyncWorker do
last_repository_successful_sync_at: nil
)
expect(Geo::RepositorySyncService).to receive(:new).twice.and_return(spy)
Geo::ProjectRegistry.create(
project: project_2,
last_repository_synced_at: DateTime.now,
last_repository_successful_sync_at: DateTime.now,
resync_repository: false,
resync_wiki: false
)
expect(Geo::RepositorySyncService).to receive(:new).once.and_return(spy)
subject.perform
end
......@@ -35,19 +43,28 @@ describe GeoRepositorySyncWorker do
Geo::ProjectRegistry.create(
project: project_1,
last_repository_synced_at: 2.days.ago,
last_repository_successful_sync_at: 2.days.ago
last_repository_successful_sync_at: 2.days.ago,
resync_repository: true,
resync_wiki: false
)
Geo::ProjectRegistry.create(
project: project_2,
last_repository_synced_at: 2.days.ago,
last_repository_successful_sync_at: 2.days.ago
last_repository_synced_at: 10.minutes.ago,
last_repository_successful_sync_at: 10.minutes.ago,
resync_repository: false,
resync_wiki: false
)
project_1.update_attribute(:last_repository_updated_at, 2.days.ago)
project_2.update_attribute(:last_repository_updated_at, 10.minutes.ago)
Geo::ProjectRegistry.create(
project: create(:empty_project),
last_repository_synced_at: 5.minutes.ago,
last_repository_successful_sync_at: 5.minutes.ago,
resync_repository: false,
resync_wiki: true
)
expect(Geo::RepositorySyncService).to receive(:new).once.and_return(spy)
expect(Geo::RepositorySyncService).to receive(:new).twice.and_return(spy)
subject.perform
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment