Commit 94d5c97a authored by Mike Kozono's avatar Mike Kozono

Refactor: Extract blob verification specific logic

From `Gitlab::Geo::ReplicableModel` into `Geo::BlobReplicatorStrategy`.
parent 7ad5be2d
......@@ -56,6 +56,26 @@ module Geo
).execute
end
# Returns a checksum of the file
#
# @return [String] SHA256 hash of the carrierwave file
def calculate_checksum
raise 'File is not checksummable' unless checksummable?
model.hexdigest(carrierwave_uploader.path)
end
# Returns whether the file exists on disk or in remote storage
#
# Does a hard check because we are doing these checks for replication or
# verification purposes, so we should not just trust the data in the DB if
# we don't absolutely have to.
#
# @return [Boolean] whether the file exists on disk or in remote storage
def file_exist?
carrierwave_uploader.file.exists?
end
private
def download
......@@ -65,5 +85,12 @@ module Geo
def deleted_params
{ model_record_id: model_record.id, blob_path: blob_path }
end
# Return whether it's capable of generating a checksum of itself
#
# @return [Boolean] whether it can generate a checksum
def checksummable?
carrierwave_uploader.file_storage? && file_exist?
end
end
end
......@@ -138,7 +138,7 @@ module Geo
# state.
def verify
verification_state_tracker.track_checksum_attempt! do
model_record.calculate_checksum
calculate_checksum
end
end
......@@ -165,12 +165,24 @@ module Geo
Gitlab::Geo.secondary? ? registry : model_record
end
# @abstract
# @return [String] a checksum representing the data
def calculate_checksum
raise NotImplementedError, "#{self.class} does not implement #{__method__}"
end
private
def should_primary_verify?
self.class.verification_enabled? &&
primary_checksum.nil? && # Some models may populate this as part of creating the record
model_record.checksummable?
checksummable?
end
# @abstract
# @return [Boolean] whether the replicable is capable of checksumming itself
def checksummable?
raise NotImplementedError, "#{self.class} does not implement #{__method__}"
end
end
end
......@@ -59,10 +59,6 @@ module EE
super || build_merge_request_diff_detail
end
def checksummable?
stored_externally? && super
end
def log_geo_deleted_event
# Keep empty for now. Should be addressed in future
# by https://gitlab.com/gitlab-org/gitlab/issues/33817
......
......@@ -3,6 +3,7 @@
module Geo
class MergeRequestDiffReplicator < Gitlab::Geo::Replicator
include ::Geo::BlobReplicatorStrategy
extend ::Gitlab::Utils::Override
def self.model
::MergeRequestDiff
......@@ -15,5 +16,13 @@ module Geo
def carrierwave_uploader
model_record.external_diff
end
private
# Only external diffs can be checksummed
override :checksummable?
def checksummable?
model_record.stored_externally? && super
end
end
end
......@@ -48,33 +48,6 @@ module Gitlab
raise NotImplementedError, 'There is no Replicator defined for this model'
end
# Returns a checksum of the file (assumed to be a "blob" type)
#
# @return [String] SHA256 hash of the carrierwave file
def calculate_checksum
raise 'File is not checksummable' unless checksummable?
self.class.hexdigest(replicator.carrierwave_uploader.path)
end
# Return whether its capable of generating a checksum of itself
#
# @return [Boolean] whether it can generate a checksum
def checksummable?
replicator.carrierwave_uploader.file_storage? && file_exist?
end
# Returns whether the file exists on disk or in remote storage
#
# Does a hard check because we are doing these checks for replication or
# verification purposes, so we should not just trust the data in the DB if
# we don't absolutely have to.
#
# @return [Boolean] whether the file exists on disk or in remote storage
def file_exist?
replicator.carrierwave_uploader.file.exists?
end
def in_replicables_for_current_secondary?
self.class.replicables_for_current_secondary(self).exists?
end
......
......@@ -19,7 +19,7 @@ module Gitlab
def execute
return error("#{replicator.replicable_name} not found") unless recorded_file
return file_not_found(recorded_file) unless recorded_file.file_exist?
return file_not_found(recorded_file) unless replicator.file_exist?
return error('Checksum mismatch') unless matches_checksum?
success(replicator.carrierwave_uploader)
......
......@@ -4,22 +4,6 @@ require 'spec_helper'
RSpec.describe Packages::PackageFile, type: :model do
include ::EE::GeoHelpers
describe '#calculate_checksum' do
let(:package_file) { create(:conan_package_file, :conan_recipe_file) }
it 'returns SHA256 sum of the file' do
expected = Digest::SHA256.file(package_file.file.path).hexdigest
expect(package_file.calculate_checksum).to eq(expected)
end
it 'returns nil for a non-existent file' do
allow(package_file).to receive(:file_exist?).and_return(false)
expect(package_file.calculate_checksum).to eq(nil)
end
end
context 'new file' do
it 'calls checksum worker' do
stub_primary_node
......
......@@ -23,7 +23,7 @@ RSpec.describe Geo::EventService do
it 'executes the consume part of the replication' do
subject.execute
expect(model_record.file_exist?).to be_truthy
expect(model_record.file.exists?).to be_truthy
end
end
end
......@@ -169,4 +169,32 @@ RSpec.shared_examples 'a blob replicator' do
end
end
end
describe '#calculate_checksum' do
context 'when the file is locally stored' do
context 'when the file exists' do
it 'returns hexdigest of the file' do
expected = described_class.model.hexdigest(subject.carrierwave_uploader.path)
expect(subject.calculate_checksum).to eq(expected)
end
end
context 'when the file does not exist' do
it 'raises an error' do
allow(subject).to receive(:file_exist?).and_return(false)
expect { subject.calculate_checksum }.to raise_error('File is not checksummable')
end
end
end
context 'when the file is remotely stored' do
it 'raises an error' do
allow(subject.carrierwave_uploader).to receive(:file_storage?).and_return(false)
expect { subject.calculate_checksum }.to raise_error('File is not checksummable')
end
end
end
end
......@@ -301,7 +301,7 @@ RSpec.shared_examples 'a verifiable replicator' do
it 'calls verify_async if needed' do
allow(described_class).to receive(:verification_enabled?).and_return(true)
allow(replicator).to receive(:primary_checksum).and_return(nil)
allow(model_record).to receive(:checksummable?).and_return(true)
allow(replicator).to receive(:checksummable?).and_return(true)
expect(replicator).to receive(:verify_async)
......@@ -332,8 +332,8 @@ RSpec.shared_examples 'a verifiable replicator' do
it 'wraps the checksum calculation in track_checksum_attempt!' do
tracker = double('tracker')
allow(replicator).to receive(:verification_state_tracker).and_return(tracker)
allow(replicator).to receive(:calculate_checksum).and_return('abc123')
expect(model_record).to receive(:calculate_checksum)
expect(tracker).to receive(:track_checksum_attempt!).and_yield
replicator.verify
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment