Commit 36b1c082 authored by Igor Drozdov's avatar Igor Drozdov

Verify only 1mb of existing LFS object on download

If LfsObject already exists, we check whether the first 1mb of the
existing file is the same as 1mb of the remote file

We assume that first 1mb is not predictable and rely on this in
terms of security concerns:

https://gitlab.com/gitlab-org/gitlab/-/issues/214211
parent 89d0135c
......@@ -6,6 +6,9 @@ module Projects
class LfsDownloadService < BaseService
SizeError = Class.new(StandardError)
OidError = Class.new(StandardError)
ResponseError = Class.new(StandardError)
LARGE_FILE_SIZE = 1.megabytes
attr_reader :lfs_download_object
delegate :oid, :size, :credentials, :sanitized_url, to: :lfs_download_object, prefix: :lfs
......@@ -19,6 +22,7 @@ module Projects
def execute
return unless project&.lfs_enabled? && lfs_download_object
return error("LFS file with oid #{lfs_oid} has invalid attributes") unless lfs_download_object.valid?
return link_existing_lfs_object! if Feature.enabled?(:lfs_link_existing_object, project, default_enabled: true) && lfs_size > LARGE_FILE_SIZE && lfs_object
wrap_download_errors do
download_lfs_file!
......@@ -29,7 +33,7 @@ module Projects
def wrap_download_errors(&block)
yield
rescue SizeError, OidError, StandardError => e
rescue SizeError, OidError, ResponseError, StandardError => e
error("LFS file with oid #{lfs_oid} could't be downloaded from #{lfs_sanitized_url}: #{e.message}")
end
......@@ -56,15 +60,13 @@ module Projects
def download_and_save_file!(file)
digester = Digest::SHA256.new
response = Gitlab::HTTP.get(lfs_sanitized_url, download_headers) do |fragment|
fetch_file do |fragment|
digester << fragment
file.write(fragment)
raise_size_error! if file.size > lfs_size
end
raise StandardError, "Received error code #{response.code}" unless response.success?
raise_size_error! if file.size != lfs_size
raise_oid_error! if digester.hexdigest != lfs_oid
end
......@@ -78,6 +80,12 @@ module Projects
end
end
def fetch_file(&block)
response = Gitlab::HTTP.get(lfs_sanitized_url, download_headers, &block)
raise ResponseError, "Received error code #{response.code}" unless response.success?
end
def with_tmp_file
create_tmp_storage_dir
......@@ -123,6 +131,29 @@ module Projects
super
end
def lfs_object
@lfs_object ||= LfsObject.find_by_oid(lfs_oid)
end
def link_existing_lfs_object!
existing_file = lfs_object.file.open
buffer_size = 0
result = fetch_file do |fragment|
unless fragment == existing_file.read(fragment.size)
break error("LFS file with oid #{lfs_oid} cannot be linked with an existing LFS object")
end
buffer_size += fragment.size
break success if buffer_size > LARGE_FILE_SIZE
end
project.lfs_objects << lfs_object
result
ensure
existing_file&.close
end
end
end
end
---
title: Verify only 1mb of existing LFS object to improve LfsDownloadService performance
merge_request: 41770
author:
type: performance
---
name: lfs_link_existing_object
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/41770
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/249246
group: group::source code
type: development
default_enabled: true
......@@ -4,7 +4,8 @@ require 'spec_helper'
RSpec.describe Projects::LfsPointers::LfsDownloadService do
include StubRequests
let(:project) { create(:project) }
let_it_be(:project) { create(:project) }
let(:lfs_content) { SecureRandom.random_bytes(10) }
let(:oid) { Digest::SHA256.hexdigest(lfs_content) }
let(:download_link) { "http://gitlab.com/#{oid}" }
......@@ -14,9 +15,11 @@ RSpec.describe Projects::LfsPointers::LfsDownloadService do
subject { described_class.new(project, lfs_object) }
before do
before_all do
ApplicationSetting.create_from_defaults
end
before do
stub_application_setting(allow_local_requests_from_web_hooks_and_services: local_request_setting)
allow(project).to receive(:lfs_enabled?).and_return(true)
end
......@@ -226,5 +229,55 @@ RSpec.describe Projects::LfsPointers::LfsDownloadService do
subject.execute
end
end
context 'when a large lfs object with the same oid already exists' do
let!(:existing_lfs_object) { create(:lfs_object, :with_file, :correct_oid) }
before do
stub_const("#{described_class}::LARGE_FILE_SIZE", 500)
stub_full_request(download_link).to_return(body: lfs_content)
end
context 'and first fragments are the same' do
let(:lfs_content) { existing_lfs_object.file.read }
context 'when lfs_link_existing_object feature flag disabled' do
before do
stub_feature_flags(lfs_link_existing_object: false)
end
it 'does not call link_existing_lfs_object!' do
expect(subject).not_to receive(:link_existing_lfs_object!)
subject.execute
end
end
it 'returns success' do
expect(subject.execute).to eq({ status: :success })
end
it 'links existing lfs object to the project' do
expect { subject.execute }
.to change { project.lfs_objects.include?(existing_lfs_object) }.from(false).to(true)
end
end
context 'and first fragments diverges' do
let(:lfs_content) { SecureRandom.random_bytes(1000) }
let(:oid) { existing_lfs_object.oid }
it 'raises oid mismatch error' do
expect(subject.execute).to eq({
status: :error,
message: "LFS file with oid #{oid} cannot be linked with an existing LFS object"
})
end
it 'does not change lfs objects' do
expect { subject.execute }.not_to change { project.lfs_objects }
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment