Commit 949d1b37 authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'ac/lfs-direct-upload-ee-to-ce' into 'master'

LFS direct upload port to CE

Closes #44548

See merge request gitlab-org/gitlab-ce!17752
parents 092445a4 04c5e637
...@@ -118,9 +118,9 @@ gem 'carrierwave', '~> 1.2' ...@@ -118,9 +118,9 @@ gem 'carrierwave', '~> 1.2'
gem 'dropzonejs-rails', '~> 0.7.1' gem 'dropzonejs-rails', '~> 0.7.1'
# for backups # for backups
gem 'fog-aws', '~> 2.0' gem 'fog-aws', '~> 2.0.1'
gem 'fog-core', '~> 1.44' gem 'fog-core', '~> 1.44'
gem 'fog-google', '~> 0.5' gem 'fog-google', '~> 1.3.3'
gem 'fog-local', '~> 0.3' gem 'fog-local', '~> 0.3'
gem 'fog-openstack', '~> 0.1' gem 'fog-openstack', '~> 0.1'
gem 'fog-rackspace', '~> 0.1.1' gem 'fog-rackspace', '~> 0.1.1'
......
...@@ -244,10 +244,11 @@ GEM ...@@ -244,10 +244,11 @@ GEM
builder builder
excon (~> 0.58) excon (~> 0.58)
formatador (~> 0.2) formatador (~> 0.2)
fog-google (0.5.3) fog-google (1.3.3)
fog-core fog-core
fog-json fog-json
fog-xml fog-xml
google-api-client (~> 0.19.1)
fog-json (1.0.2) fog-json (1.0.2)
fog-core (~> 1.0) fog-core (~> 1.0)
multi_json (~> 1.10) multi_json (~> 1.10)
...@@ -1047,9 +1048,9 @@ DEPENDENCIES ...@@ -1047,9 +1048,9 @@ DEPENDENCIES
flipper-active_record (~> 0.13.0) flipper-active_record (~> 0.13.0)
flipper-active_support_cache_store (~> 0.13.0) flipper-active_support_cache_store (~> 0.13.0)
fog-aliyun (~> 0.2.0) fog-aliyun (~> 0.2.0)
fog-aws (~> 2.0) fog-aws (~> 2.0.1)
fog-core (~> 1.44) fog-core (~> 1.44)
fog-google (~> 0.5) fog-google (~> 1.3.3)
fog-local (~> 0.3) fog-local (~> 0.3)
fog-openstack (~> 0.1) fog-openstack (~> 0.1)
fog-rackspace (~> 0.1.1) fog-rackspace (~> 0.1.1)
......
...@@ -17,20 +17,23 @@ class Projects::LfsStorageController < Projects::GitHttpClientController ...@@ -17,20 +17,23 @@ class Projects::LfsStorageController < Projects::GitHttpClientController
def upload_authorize def upload_authorize
set_workhorse_internal_api_content_type set_workhorse_internal_api_content_type
render json: Gitlab::Workhorse.lfs_upload_ok(oid, size)
authorized = LfsObjectUploader.workhorse_authorize
authorized.merge!(LfsOid: oid, LfsSize: size)
render json: authorized
end end
def upload_finalize def upload_finalize
unless tmp_filename if store_file!(oid, size)
render_lfs_forbidden
return
end
if store_file(oid, size, tmp_filename)
head 200 head 200
else else
render plain: 'Unprocessable entity', status: 422 render plain: 'Unprocessable entity', status: 422
end end
rescue ActiveRecord::RecordInvalid
render_400
rescue ObjectStorage::RemoteStoreError
render_lfs_forbidden
end end
private private
...@@ -51,35 +54,28 @@ class Projects::LfsStorageController < Projects::GitHttpClientController ...@@ -51,35 +54,28 @@ class Projects::LfsStorageController < Projects::GitHttpClientController
params[:size].to_i params[:size].to_i
end end
def tmp_filename def store_file!(oid, size)
name = request.headers['X-Gitlab-Lfs-Tmp'] object = LfsObject.find_by(oid: oid, size: size)
return if name.include?('/') unless object&.file&.exists?
return unless oid.present? && name.start_with?(oid) object = create_file!(oid, size)
end
name
end
def store_file(oid, size, tmp_file) return unless object
# Define tmp_file_path early because we use it in "ensure"
tmp_file_path = File.join(LfsObjectUploader.workhorse_upload_path, tmp_file)
object = LfsObject.find_or_create_by(oid: oid, size: size) link_to_project!(object)
file_exists = object.file.exists? || move_tmp_file_to_storage(object, tmp_file_path)
file_exists && link_to_project(object)
ensure
FileUtils.rm_f(tmp_file_path)
end end
def move_tmp_file_to_storage(object, path) def create_file!(oid, size)
object.file = File.open(path) LfsObject.new(oid: oid, size: size).tap do |object|
object.file.store! object.file.store_workhorse_file!(params, :file)
object.save object.save!
end
end end
def link_to_project(object) def link_to_project!(object)
if object && !object.projects.exists?(storage_project.id) if object && !object.projects.exists?(storage_project.id)
object.projects << storage_project object.projects << storage_project
object.save object.save!
end end
end end
end end
...@@ -11,6 +11,12 @@ class LfsObject < ActiveRecord::Base ...@@ -11,6 +11,12 @@ class LfsObject < ActiveRecord::Base
mount_uploader :file, LfsObjectUploader mount_uploader :file, LfsObjectUploader
before_save :update_file_store
def update_file_store
self.file_store = file.object_store
end
def project_allowed_access?(project) def project_allowed_access?(project)
projects.exists?(project.lfs_storage_project.id) projects.exists?(project.lfs_storage_project.id)
end end
......
...@@ -10,6 +10,9 @@ module ObjectStorage ...@@ -10,6 +10,9 @@ module ObjectStorage
UnknownStoreError = Class.new(StandardError) UnknownStoreError = Class.new(StandardError)
ObjectStorageUnavailable = Class.new(StandardError) ObjectStorageUnavailable = Class.new(StandardError)
DIRECT_UPLOAD_TIMEOUT = 4.hours
TMP_UPLOAD_PATH = 'tmp/upload'.freeze
module Store module Store
LOCAL = 1 LOCAL = 1
REMOTE = 2 REMOTE = 2
...@@ -124,6 +127,10 @@ module ObjectStorage ...@@ -124,6 +127,10 @@ module ObjectStorage
object_store_options.enabled object_store_options.enabled
end end
def direct_upload_enabled?
object_store_options.direct_upload
end
def background_upload_enabled? def background_upload_enabled?
object_store_options.background_upload object_store_options.background_upload
end end
...@@ -147,6 +154,45 @@ module ObjectStorage ...@@ -147,6 +154,45 @@ module ObjectStorage
def serialization_column(model_class, mount_point) def serialization_column(model_class, mount_point)
model_class.uploader_options.dig(mount_point, :mount_on) || mount_point model_class.uploader_options.dig(mount_point, :mount_on) || mount_point
end end
def workhorse_authorize
if options = workhorse_remote_upload_options
{ RemoteObject: options }
else
{ TempPath: workhorse_local_upload_path }
end
end
def workhorse_local_upload_path
File.join(self.root, TMP_UPLOAD_PATH)
end
def workhorse_remote_upload_options
return unless self.object_store_enabled?
return unless self.direct_upload_enabled?
id = [CarrierWave.generate_cache_id, SecureRandom.hex].join('-')
upload_path = File.join(TMP_UPLOAD_PATH, id)
connection = ::Fog::Storage.new(self.object_store_credentials)
expire_at = Time.now + DIRECT_UPLOAD_TIMEOUT
options = { 'Content-Type' => 'application/octet-stream' }
{
ID: id,
GetURL: connection.get_object_url(remote_store_path, upload_path, expire_at),
DeleteURL: connection.delete_object_url(remote_store_path, upload_path, expire_at),
StoreURL: connection.put_object_url(remote_store_path, upload_path, expire_at, options)
}
end
end
# allow to configure and overwrite the filename
def filename
@filename || super || file&.filename # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def filename=(filename)
@filename = filename # rubocop:disable Gitlab/ModuleWithInstanceVariables
end end
def file_storage? def file_storage?
...@@ -195,10 +241,6 @@ module ObjectStorage ...@@ -195,10 +241,6 @@ module ObjectStorage
end end
end end
def filename
super || file&.filename
end
# #
# Move the file to another store # Move the file to another store
# #
...@@ -253,6 +295,18 @@ module ObjectStorage ...@@ -253,6 +295,18 @@ module ObjectStorage
} }
end end
def store_workhorse_file!(params, identifier)
filename = params["#{identifier}.name"]
if remote_object_id = params["#{identifier}.remote_id"]
store_remote_file!(remote_object_id, filename)
elsif local_path = params["#{identifier}.path"]
store_local_file!(local_path, filename)
else
raise RemoteStoreError, 'Bad file'
end
end
private private
def schedule_background_upload? def schedule_background_upload?
...@@ -261,6 +315,38 @@ module ObjectStorage ...@@ -261,6 +315,38 @@ module ObjectStorage
self.file_storage? self.file_storage?
end end
def store_remote_file!(remote_object_id, filename)
raise RemoteStoreError, 'Missing filename' unless filename
file_path = File.join(TMP_UPLOAD_PATH, remote_object_id)
file_path = Pathname.new(file_path).cleanpath.to_s
raise RemoteStoreError, 'Bad file path' unless file_path.start_with?(TMP_UPLOAD_PATH + '/')
self.object_store = Store::REMOTE
# TODO:
# This should be changed to make use of `tmp/cache` mechanism
# instead of using custom upload directory,
# using tmp/cache makes this implementation way easier than it is today
CarrierWave::Storage::Fog::File.new(self, storage, file_path).tap do |file|
raise RemoteStoreError, 'Missing file' unless file.exists?
self.filename = filename
self.file = storage.store!(file)
end
end
def store_local_file!(local_path, filename)
raise RemoteStoreError, 'Missing filename' unless filename
root_path = File.realpath(self.class.workhorse_local_upload_path)
file_path = File.realpath(local_path)
raise RemoteStoreError, 'Bad file path' unless file_path.start_with?(root_path)
self.object_store = Store::LOCAL
self.store!(UploadedFile.new(file_path, filename))
end
# this is a hack around CarrierWave. The #migrate method needs to be # this is a hack around CarrierWave. The #migrate method needs to be
# able to force the current file to the migrated file upon success. # able to force the current file to the migrated file upon success.
def file=(file) def file=(file)
......
---
title: Port direct upload of LFS artifacts from EE
merge_request: 17752
author:
type: added
...@@ -711,7 +711,7 @@ test: ...@@ -711,7 +711,7 @@ test:
provider: AWS # Only AWS supported at the moment provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1 region: us-east-1
artifacts: artifacts:
path: tmp/tests/artifacts path: tmp/tests/artifacts
enabled: true enabled: true
...@@ -725,7 +725,7 @@ test: ...@@ -725,7 +725,7 @@ test:
provider: AWS # Only AWS supported at the moment provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1 region: us-east-1
uploads: uploads:
storage_path: tmp/tests/public storage_path: tmp/tests/public
object_store: object_store:
...@@ -734,7 +734,7 @@ test: ...@@ -734,7 +734,7 @@ test:
provider: AWS # Only AWS supported at the moment provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1 region: us-east-1
gitlab: gitlab:
host: localhost host: localhost
port: 80 port: 80
......
...@@ -350,6 +350,7 @@ Settings.lfs['storage_path'] = Settings.absolute(Settings.lfs['storage_path'] || ...@@ -350,6 +350,7 @@ Settings.lfs['storage_path'] = Settings.absolute(Settings.lfs['storage_path'] ||
Settings.lfs['object_store'] ||= Settingslogic.new({}) Settings.lfs['object_store'] ||= Settingslogic.new({})
Settings.lfs['object_store']['enabled'] = false if Settings.lfs['object_store']['enabled'].nil? Settings.lfs['object_store']['enabled'] = false if Settings.lfs['object_store']['enabled'].nil?
Settings.lfs['object_store']['remote_directory'] ||= nil Settings.lfs['object_store']['remote_directory'] ||= nil
Settings.lfs['object_store']['direct_upload'] = false if Settings.lfs['object_store']['direct_upload'].nil?
Settings.lfs['object_store']['background_upload'] = true if Settings.lfs['object_store']['background_upload'].nil? Settings.lfs['object_store']['background_upload'] = true if Settings.lfs['object_store']['background_upload'].nil?
Settings.lfs['object_store']['proxy_download'] = false if Settings.lfs['object_store']['proxy_download'].nil? Settings.lfs['object_store']['proxy_download'] = false if Settings.lfs['object_store']['proxy_download'].nil?
# Convert upload connection settings to use string keys, to make Fog happy # Convert upload connection settings to use string keys, to make Fog happy
......
...@@ -28,16 +28,4 @@ if File.exist?(aws_file) ...@@ -28,16 +28,4 @@ if File.exist?(aws_file)
# when fog_public is false and provider is AWS or Google, defaults to 600 # when fog_public is false and provider is AWS or Google, defaults to 600
config.fog_authenticated_url_expiration = 1 << 29 config.fog_authenticated_url_expiration = 1 << 29
end end
# Mocking Fog requests, based on: https://github.com/carrierwaveuploader/carrierwave/wiki/How-to%3A-Test-Fog-based-uploaders
if Rails.env.test?
Fog.mock!
connection = ::Fog::Storage.new(
aws_access_key_id: AWS_CONFIG['access_key_id'],
aws_secret_access_key: AWS_CONFIG['secret_access_key'],
provider: 'AWS',
region: AWS_CONFIG['region']
)
connection.directories.create(key: AWS_CONFIG['bucket'])
end
end end
...@@ -63,6 +63,7 @@ For source installations the following settings are nested under `lfs:` and then ...@@ -63,6 +63,7 @@ For source installations the following settings are nested under `lfs:` and then
|---------|-------------|---------| |---------|-------------|---------|
| `enabled` | Enable/disable object storage | `false` | | `enabled` | Enable/disable object storage | `false` |
| `remote_directory` | The bucket name where LFS objects will be stored| | | `remote_directory` | The bucket name where LFS objects will be stored| |
| `direct_upload` | Set to true to enable direct upload of LFS without the need of local shared storage. Option may be removed once we decide to support only single storage for all files. | `false` |
| `background_upload` | Set to false to disable automatic upload. Option may be removed once upload is direct to S3 | `true` | | `background_upload` | Set to false to disable automatic upload. Option may be removed once upload is direct to S3 | `true` |
| `proxy_download` | Set to true to enable proxying all files served. Option allows to reduce egress traffic as this allows clients to download directly from remote storage instead of proxying all data | `false` | | `proxy_download` | Set to true to enable proxying all files served. Option allows to reduce egress traffic as this allows clients to download directly from remote storage instead of proxying all data | `false` |
| `connection` | Various connection options described below | | | `connection` | Various connection options described below | |
......
...@@ -39,14 +39,6 @@ module Gitlab ...@@ -39,14 +39,6 @@ module Gitlab
params params
end end
def lfs_upload_ok(oid, size)
{
StoreLFSPath: LfsObjectUploader.workhorse_upload_path,
LfsOid: oid,
LfsSize: size
}
end
def artifact_upload_ok def artifact_upload_ok
{ TempPath: JobArtifactUploader.workhorse_upload_path } { TempPath: JobArtifactUploader.workhorse_upload_path }
end end
......
...@@ -278,6 +278,10 @@ describe Backup::Manager do ...@@ -278,6 +278,10 @@ describe Backup::Manager do
connection.directories.create(key: Gitlab.config.backup.upload.remote_directory) connection.directories.create(key: Gitlab.config.backup.upload.remote_directory)
end end
after do
Fog.unmock!
end
context 'target path' do context 'target path' do
it 'uses the tar filename by default' do it 'uses the tar filename by default' do
expect_any_instance_of(Fog::Collection).to receive(:create) expect_any_instance_of(Fog::Collection).to receive(:create)
......
This diff is collapsed.
module StubConfiguration module StubConfiguration
def stub_object_storage_uploader( def stub_object_storage_uploader(
config:, uploader:, remote_directory:, config:,
uploader:,
remote_directory:,
enabled: true, enabled: true,
proxy_download: false, proxy_download: false,
background_upload: false) background_upload: false,
Fog.mock! direct_upload: false
)
allow(config).to receive(:enabled) { enabled } allow(config).to receive(:enabled) { enabled }
allow(config).to receive(:proxy_download) { proxy_download } allow(config).to receive(:proxy_download) { proxy_download }
allow(config).to receive(:background_upload) { background_upload } allow(config).to receive(:background_upload) { background_upload }
allow(config).to receive(:direct_upload) { direct_upload }
return unless enabled return unless enabled
Fog.mock!
::Fog::Storage.new(uploader.object_store_credentials).tap do |connection| ::Fog::Storage.new(uploader.object_store_credentials).tap do |connection|
begin begin
connection.directories.create(key: remote_directory) connection.directories.create(key: remote_directory)
......
...@@ -27,7 +27,7 @@ describe GitlabUploader do ...@@ -27,7 +27,7 @@ describe GitlabUploader do
describe '#file_cache_storage?' do describe '#file_cache_storage?' do
context 'when file storage is used' do context 'when file storage is used' do
before do before do
uploader_class.cache_storage(:file) expect(uploader_class).to receive(:cache_storage) { CarrierWave::Storage::File }
end end
it { is_expected.to be_file_cache_storage } it { is_expected.to be_file_cache_storage }
...@@ -35,7 +35,7 @@ describe GitlabUploader do ...@@ -35,7 +35,7 @@ describe GitlabUploader do
context 'when is remote storage' do context 'when is remote storage' do
before do before do
uploader_class.cache_storage(:fog) expect(uploader_class).to receive(:cache_storage) { CarrierWave::Storage::Fog }
end end
it { is_expected.not_to be_file_cache_storage } it { is_expected.not_to be_file_cache_storage }
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment