Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
7a0da750
Commit
7a0da750
authored
Apr 05, 2017
by
Douglas Barbosa Alexandre
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add support on GitLab Geo download scheduler for recorded files
parent
f6249945
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
51 additions
and
24 deletions
+51
-24
app/workers/geo_file_download_dispatch_worker.rb
app/workers/geo_file_download_dispatch_worker.rb
+41
-18
spec/workers/geo_file_download_dispatch_worker_spec.rb
spec/workers/geo_file_download_dispatch_worker_spec.rb
+10
-6
No files found.
app/workers/geo_file_download_dispatch_worker.rb
View file @
7a0da750
...
...
@@ -9,8 +9,8 @@ class GeoFileDownloadDispatchWorker
MAX_CONCURRENT_DOWNLOADS
=
10
.
freeze
def
initialize
@pending_
lfs_
downloads
=
[]
@scheduled_
lfs_
jobs
=
[]
@pending_downloads
=
[]
@scheduled_jobs
=
[]
end
# The scheduling works as the following:
...
...
@@ -34,6 +34,7 @@ class GeoFileDownloadDispatchWorker
update_jobs_in_progress
load_pending_downloads
if
reload_queue?
# If we are still under the limit after refreshing our DB, we can end
# after scheduling the remaining transfers.
last_batch
=
reload_queue?
...
...
@@ -41,7 +42,7 @@ class GeoFileDownloadDispatchWorker
break
if
over_time?
break
unless
downloads_remain?
schedule_
lfs_
downloads
schedule_downloads
break
if
last_batch
...
...
@@ -53,7 +54,7 @@ class GeoFileDownloadDispatchWorker
private
def
reload_queue?
@pending_
lfs_
downloads
.
size
<
MAX_CONCURRENT_DOWNLOADS
@pending_downloads
.
size
<
MAX_CONCURRENT_DOWNLOADS
end
def
over_time?
...
...
@@ -61,32 +62,54 @@ class GeoFileDownloadDispatchWorker
end
def
load_pending_downloads
@pending_lfs_downloads
=
find_lfs_object_ids
(
DB_RETRIEVE_BATCH
)
lfs_object_ids
=
find_lfs_object_ids
(
DB_RETRIEVE_BATCH
)
objects_ids
=
find_object_ids
(
DB_RETRIEVE_BATCH
-
lfs_object_ids
.
size
)
@pending_downloads
=
lfs_object_ids
+
objects_ids
end
def
downloads_remain?
@pending_
lfs_
downloads
.
size
@pending_downloads
.
size
end
def
schedule_
lfs_
downloads
num_to_schedule
=
[
MAX_CONCURRENT_DOWNLOADS
-
job_ids
.
size
,
@pending_
lfs_
downloads
.
size
].
min
def
schedule_downloads
num_to_schedule
=
[
MAX_CONCURRENT_DOWNLOADS
-
job_ids
.
size
,
@pending_downloads
.
size
].
min
return
unless
downloads_remain?
num_to_schedule
.
times
do
lfs_id
=
@pending_lfs
_downloads
.
shift
job_id
=
GeoFileDownloadWorker
.
perform_async
(
:lfs
,
lfs
_id
)
object_id
,
object_type
=
@pending
_downloads
.
shift
job_id
=
GeoFileDownloadWorker
.
perform_async
(
object_type
,
object
_id
)
if
job_id
@scheduled_
lfs_jobs
<<
{
job_id:
job_id
,
id:
lfs
_id
}
@scheduled_
jobs
<<
{
id:
object_id
,
type:
object_type
,
job_id:
job
_id
}
end
end
end
def
find_object_ids
(
limit
)
downloaded_ids
=
find_downloaded_ids
([
:attachment
,
:avatar
,
:file
])
Upload
.
where
.
not
(
id:
downloaded_ids
)
.
order
(
created_at: :desc
)
.
limit
(
limit
)
.
pluck
(
:id
,
:uploader
)
.
map
{
|
id
,
uploader
|
[
id
,
uploader
.
gsub
(
'Uploader'
,
''
).
downcase
]
}
end
def
find_lfs_object_ids
(
limit
)
downloaded_ids
=
Geo
::
FileRegistry
.
where
(
file_type:
'lfs'
).
pluck
(
:file_id
)
downloaded_ids
=
(
downloaded_ids
+
scheduled_lfs_ids
).
uniq
LfsObject
.
where
.
not
(
id:
downloaded_ids
).
order
(
created_at: :desc
).
limit
(
limit
).
pluck
(
:id
)
downloaded_ids
=
find_downloaded_ids
([
:lfs
])
LfsObject
.
where
.
not
(
id:
downloaded_ids
)
.
order
(
created_at: :desc
)
.
limit
(
limit
)
.
pluck
(
:id
)
.
map
{
|
id
|
[
id
,
:lfs
]
}
end
def
find_downloaded_ids
(
file_types
)
downloaded_lfs_ids
=
Geo
::
FileRegistry
.
where
(
file_type:
file_types
).
pluck
(
:file_id
)
downloaded_lfs_ids
=
(
downloaded_lfs_ids
+
scheduled_ids
(
file_types
)).
uniq
end
def
update_jobs_in_progress
...
...
@@ -95,15 +118,15 @@ class GeoFileDownloadDispatchWorker
# SidekiqStatus returns an array of booleans: true if the job has completed, false otherwise.
# For each entry, first use `zip` to make { job_id: 123, id: 10 } -> [ { job_id: 123, id: 10 }, bool ]
# Next, filter out the jobs that have completed.
@scheduled_
lfs_jobs
=
@scheduled_lfs
_jobs
.
zip
(
status
).
map
{
|
(
job
,
completed
)
|
job
if
completed
}.
compact
@scheduled_
jobs
=
@scheduled
_jobs
.
zip
(
status
).
map
{
|
(
job
,
completed
)
|
job
if
completed
}.
compact
end
def
job_ids
@scheduled_
lfs_
jobs
.
map
{
|
data
|
data
[
:job_id
]
}
@scheduled_jobs
.
map
{
|
data
|
data
[
:job_id
]
}
end
def
scheduled_
lfs_ids
@scheduled_
lfs_jobs
.
map
{
|
data
|
data
[
:id
]
}
def
scheduled_
ids
(
types
)
@scheduled_
jobs
.
select
{
|
data
|
types
.
include?
(
data
[
:type
])
}
.
map
{
|
data
|
data
[
:id
]
}
end
def
try_obtain_lease
...
...
spec/workers/geo_file_download_dispatch_worker_spec.rb
View file @
7a0da750
...
...
@@ -45,21 +45,25 @@ describe GeoFileDownloadDispatchWorker do
# Test the case where we have:
#
# 1. A total of
6 files in the queue, and we can load a max
mimum of 5 and send 2 at a time.
# 1. A total of
8 files in the queue, and we can load a maxi
mimum of 5 and send 2 at a time.
# 2. We send 2, wait for 1 to finish, and then send again.
it
'attempts to load a new batch without pending downloads'
do
stub_const
(
'GeoFileDownloadDispatchWorker::DB_RETRIEVE_BATCH'
,
5
)
stub_const
(
'GeoFileDownloadDispatchWorker::MAX_CONCURRENT_DOWNLOADS'
,
2
)
create_list
(
:lfs_object
,
6
,
:with_file
)
avatar
=
fixture_file_upload
(
Rails
.
root
.
join
(
'spec/fixtures/dk.png'
))
create_list
(
:lfs_object
,
2
,
:with_file
)
create_list
(
:user
,
2
,
avatar:
avatar
)
create_list
(
:note
,
2
,
:with_attachment
)
create
(
:appearance
,
logo:
avatar
,
header_logo:
avatar
)
allow_any_instance_of
(
described_class
).
to
receive
(
:over_time?
).
and_return
(
false
)
expect
(
GeoFileDownloadWorker
).
to
receive
(
:perform_async
).
exactly
(
6
).
times
.
and_call_original
# For
6
downloads, we expect three database reloads:
expect
(
GeoFileDownloadWorker
).
to
receive
(
:perform_async
).
exactly
(
8
).
times
.
and_call_original
# For
8
downloads, we expect three database reloads:
# 1. Load the first batch of 5.
# 2. 4 get sent out, 1 remains. This triggers another reload, which loads in the remaining
2
.
# 3. Since the second reload filled the pipe with
2
, we need to do a final reload to ensure
# 2. 4 get sent out, 1 remains. This triggers another reload, which loads in the remaining
4
.
# 3. Since the second reload filled the pipe with
4
, we need to do a final reload to ensure
# zero are left.
expect
(
subject
).
to
receive
(
:load_pending_downloads
).
exactly
(
3
).
times
.
and_call_original
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment