Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
c6573bd1
Commit
c6573bd1
authored
Jul 07, 2020
by
Alishan Ladhani
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implement batch counter for summing a column
parent
bd874572
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
126 additions
and
37 deletions
+126
-37
lib/gitlab/database/batch_count.rb
lib/gitlab/database/batch_count.rb
+19
-4
spec/lib/gitlab/database/batch_count_spec.rb
spec/lib/gitlab/database/batch_count_spec.rb
+107
-33
No files found.
lib/gitlab/database/batch_count.rb
View file @
c6573bd1
...
...
@@ -16,6 +16,7 @@
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
# batch_distinct_count(::Project, :creator_id)
# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
# batch_sum(User, :sign_in_count)
module
Gitlab
module
Database
module
BatchCount
...
...
@@ -27,6 +28,10 @@ module Gitlab
BatchCounter
.
new
(
relation
,
column:
column
).
count
(
mode: :distinct
,
batch_size:
batch_size
,
start:
start
,
finish:
finish
)
end
def
batch_sum
(
relation
,
column
,
batch_size:
nil
,
start:
nil
,
finish:
nil
)
BatchCounter
.
new
(
relation
,
column:
nil
,
operation: :sum
,
operation_args:
[
column
]).
count
(
batch_size:
batch_size
,
start:
start
,
finish:
finish
)
end
class
<<
self
include
BatchCount
end
...
...
@@ -35,6 +40,7 @@ module Gitlab
class
BatchCounter
FALLBACK
=
-
1
MIN_REQUIRED_BATCH_SIZE
=
1_250
DEFAULT_SUM_BATCH_SIZE
=
1_000
MAX_ALLOWED_LOOPS
=
10_000
SLEEP_TIME_IN_SECONDS
=
0.01
# 10 msec sleep
ALLOWED_MODES
=
[
:itself
,
:distinct
].
freeze
...
...
@@ -43,13 +49,16 @@ module Gitlab
DEFAULT_DISTINCT_BATCH_SIZE
=
10_000
DEFAULT_BATCH_SIZE
=
100_000
def
initialize
(
relation
,
column:
nil
)
def
initialize
(
relation
,
column:
nil
,
operation: :count
,
operation_args:
nil
)
@relation
=
relation
@column
=
column
||
relation
.
primary_key
@operation
=
operation
@operation_args
=
operation_args
end
def
unwanted_configuration?
(
finish
,
batch_size
,
start
)
batch_size
<=
MIN_REQUIRED_BATCH_SIZE
||
(
@operation
==
:count
&&
batch_size
<=
MIN_REQUIRED_BATCH_SIZE
)
||
(
@operation
==
:sum
&&
batch_size
<
DEFAULT_SUM_BATCH_SIZE
)
||
(
finish
-
start
)
/
batch_size
>=
MAX_ALLOWED_LOOPS
||
start
>
finish
end
...
...
@@ -60,7 +69,7 @@ module Gitlab
check_mode!
(
mode
)
# non-distinct have better performance
batch_size
||=
mode
==
:distinct
?
DEFAULT_DISTINCT_BATCH_SIZE
:
DEFAULT_BATCH_SIZE
batch_size
||=
batch_size_for_mode_and_operation
(
mode
,
@operation
)
start
=
actual_start
(
start
)
finish
=
actual_finish
(
finish
)
...
...
@@ -91,11 +100,17 @@ module Gitlab
def
batch_fetch
(
start
,
finish
,
mode
)
# rubocop:disable GitlabSecurity/PublicSend
@relation
.
select
(
@column
).
public_send
(
mode
).
where
(
between_condition
(
start
,
finish
)).
count
@relation
.
select
(
@column
).
public_send
(
mode
).
where
(
between_condition
(
start
,
finish
)).
send
(
@operation
,
*
@operation_args
)
end
private
def
batch_size_for_mode_and_operation
(
mode
,
operation
)
return
DEFAULT_SUM_BATCH_SIZE
if
operation
==
:sum
mode
==
:distinct
?
DEFAULT_DISTINCT_BATCH_SIZE
:
DEFAULT_BATCH_SIZE
end
def
between_condition
(
start
,
finish
)
return
@column
.
between
(
start
..
(
finish
-
1
))
if
@column
.
is_a?
(
Arel
::
Attributes
::
Attribute
)
...
...
spec/lib/gitlab/database/batch_count_spec.rb
View file @
c6573bd1
...
...
@@ -13,11 +13,34 @@ RSpec.describe Gitlab::Database::BatchCount do
let
(
:another_user
)
{
create
(
:user
)
}
before
do
create_list
(
:issue
,
3
,
author:
user
)
create_list
(
:issue
,
2
,
author:
another_user
)
create_list
(
:issue
,
3
,
author:
user
)
create_list
(
:issue
,
2
,
author:
another_user
)
allow
(
ActiveRecord
::
Base
.
connection
).
to
receive
(
:transaction_open?
).
and_return
(
in_transaction
)
end
shared_examples
'disallowed configurations'
do
|
method
|
it
'returns fallback if start is bigger than finish'
do
expect
(
described_class
.
public_send
(
method
,
*
args
,
start:
1
,
finish:
0
)).
to
eq
(
fallback
)
end
it
'returns fallback if loops more than allowed'
do
large_finish
=
Gitlab
::
Database
::
BatchCounter
::
MAX_ALLOWED_LOOPS
*
default_batch_size
+
1
expect
(
described_class
.
public_send
(
method
,
*
args
,
start:
1
,
finish:
large_finish
)).
to
eq
(
fallback
)
end
it
'returns fallback if batch size is less than min required'
do
expect
(
described_class
.
public_send
(
method
,
*
args
,
batch_size:
small_batch_size
)).
to
eq
(
fallback
)
end
end
shared_examples
'when a transaction is open'
do
let
(
:in_transaction
)
{
true
}
it
'raises an error'
do
expect
{
subject
}.
to
raise_error
(
'BatchCount can not be run inside a transaction'
)
end
end
describe
'#batch_count'
do
it
'counts table'
do
expect
(
described_class
.
batch_count
(
model
)).
to
eq
(
5
)
...
...
@@ -53,38 +76,32 @@ RSpec.describe Gitlab::Database::BatchCount do
[
1
,
2
,
4
,
5
,
6
].
each
{
|
i
|
expect
(
described_class
.
batch_count
(
model
,
batch_size:
i
)).
to
eq
(
5
)
}
end
it
'will raise an error if distinct count is requested'
do
expect
do
described_class
.
batch_count
(
model
.
distinct
(
column
))
end
.
to
raise_error
'Use distinct count for optimized distinct counting'
it
'counts with a start and finish'
do
expect
(
described_class
.
batch_count
(
model
,
start:
model
.
minimum
(
:id
),
finish:
model
.
maximum
(
:id
))).
to
eq
(
5
)
end
context
'in a transaction'
do
let
(
:in_transaction
)
{
true
}
it
"defaults the batch size to
#{
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_BATCH_SIZE
}
"
do
min_id
=
model
.
minimum
(
:id
)
it
'cannot count'
do
expect
do
described_class
.
batch_count
(
model
)
end
.
to
raise_error
'BatchCount can not be run inside a transaction'
expect_next_instance_of
(
Gitlab
::
Database
::
BatchCounter
)
do
|
batch_counter
|
expect
(
batch_counter
).
to
receive
(
:batch_fetch
).
with
(
min_id
,
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_BATCH_SIZE
+
min_id
,
:itself
).
once
.
and_call_original
end
end
it
'counts with a start and finish'
do
expect
(
described_class
.
batch_count
(
model
,
start:
model
.
minimum
(
:id
),
finish:
model
.
maximum
(
:id
))).
to
eq
(
5
)
described_class
.
batch_count
(
model
)
end
context
'disallowed configurations'
do
it
'returns fallback if start is bigger than finish'
do
expect
(
described_class
.
batch_count
(
model
,
start:
1
,
finish:
0
)).
to
eq
(
fallback
)
end
it_behaves_like
'when a transaction is open'
do
subject
{
described_class
.
batch_count
(
model
)
}
end
it
'returns fallback if loops more than allowed'
do
large_finish
=
Gitlab
::
Database
::
BatchCounter
::
MAX_ALLOWED_LOOPS
*
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_BATCH_SIZE
+
1
expect
(
described_class
.
batch_count
(
model
,
start:
1
,
finish:
large_finish
)).
to
eq
(
fallback
)
context
'disallowed_configurations'
do
include_examples
'disallowed configurations'
,
:batch_count
do
let
(
:args
)
{
[
Issue
]
}
let
(
:default_batch_size
)
{
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_BATCH_SIZE
}
end
it
'r
eturns fallback if batch size is less than min requir
ed'
do
expect
(
described_class
.
batch_count
(
model
,
batch_size:
small_batch_size
)).
to
eq
(
fallback
)
it
'r
aises an error if distinct count is request
ed'
do
expect
{
described_class
.
batch_count
(
model
.
distinct
(
column
))
}.
to
raise_error
'Use distinct count for optimized distinct counting'
end
end
end
...
...
@@ -128,18 +145,24 @@ RSpec.describe Gitlab::Database::BatchCount do
expect
(
described_class
.
batch_distinct_count
(
model
,
column
,
start:
User
.
minimum
(
:id
),
finish:
User
.
maximum
(
:id
))).
to
eq
(
2
)
end
context
'disallowed configurations'
do
it
'returns fallback if start is bigger than finish'
do
expect
(
described_class
.
batch_distinct_count
(
model
,
column
,
start:
1
,
finish:
0
)).
to
eq
(
fallback
)
end
it
"defaults the batch size to
#{
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_DISTINCT_BATCH_SIZE
}
"
do
min_id
=
model
.
minimum
(
:id
)
it
'returns fallback if loops more than allowed'
do
large_finish
=
Gitlab
::
Database
::
BatchCounter
::
MAX_ALLOWED_LOOPS
*
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_DISTINCT_BATCH_SIZE
+
1
expect
(
described_class
.
batch_distinct_count
(
model
,
column
,
start:
1
,
finish:
large_finish
)).
to
eq
(
fallback
)
expect_next_instance_of
(
Gitlab
::
Database
::
BatchCounter
)
do
|
batch_counter
|
expect
(
batch_counter
).
to
receive
(
:batch_fetch
).
with
(
min_id
,
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_DISTINCT_BATCH_SIZE
+
min_id
,
:distinct
).
once
.
and_call_original
end
it
'returns fallback if batch size is less than min required'
do
expect
(
described_class
.
batch_distinct_count
(
model
,
column
,
batch_size:
small_batch_size
)).
to
eq
(
fallback
)
described_class
.
batch_distinct_count
(
model
)
end
it_behaves_like
'when a transaction is open'
do
subject
{
described_class
.
batch_distinct_count
(
model
,
column
)
}
end
context
'disallowed configurations'
do
include_examples
'disallowed configurations'
,
:batch_distinct_count
do
let
(
:args
)
{
[
model
,
column
]
}
let
(
:default_batch_size
)
{
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_DISTINCT_BATCH_SIZE
}
end
it
'will raise an error if distinct count with the :id column is requested'
do
...
...
@@ -149,4 +172,55 @@ RSpec.describe Gitlab::Database::BatchCount do
end
end
end
describe
'#batch_sum'
do
let
(
:column
)
{
:weight
}
before
do
Issue
.
first
.
update_attribute
(
column
,
3
)
Issue
.
last
.
update_attribute
(
column
,
4
)
end
it
'returns the sum of values in the given column'
do
expect
(
described_class
.
batch_sum
(
model
,
column
)).
to
eq
(
7
)
end
it
'works when given an Arel column'
do
expect
(
described_class
.
batch_sum
(
model
,
model
.
arel_table
[
column
])).
to
eq
(
7
)
end
it
'works with a batch size of 50K'
do
expect
(
described_class
.
batch_sum
(
model
,
column
,
batch_size:
50_000
)).
to
eq
(
7
)
end
it
'works with start and finish provided'
do
expect
(
described_class
.
batch_sum
(
model
,
column
,
start:
model
.
minimum
(
:id
),
finish:
model
.
maximum
(
:id
))).
to
eq
(
7
)
end
it
'returns the same result regardless of batch size'
do
stub_const
(
'Gitlab::Database::BatchCounter::DEFAULT_SUM_BATCH_SIZE'
,
0
)
(
1
..
(
model
.
count
+
1
)).
each
{
|
i
|
expect
(
described_class
.
batch_sum
(
model
,
column
,
batch_size:
i
)).
to
eq
(
7
)
}
end
it
"defaults the batch size to
#{
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_SUM_BATCH_SIZE
}
"
do
min_id
=
model
.
minimum
(
:id
)
expect_next_instance_of
(
Gitlab
::
Database
::
BatchCounter
)
do
|
batch_counter
|
expect
(
batch_counter
).
to
receive
(
:batch_fetch
).
with
(
min_id
,
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_SUM_BATCH_SIZE
+
min_id
,
:itself
).
once
.
and_call_original
end
described_class
.
batch_sum
(
model
,
column
)
end
it_behaves_like
'when a transaction is open'
do
subject
{
described_class
.
batch_sum
(
model
,
column
)
}
end
it_behaves_like
'disallowed configurations'
,
:batch_sum
do
let
(
:args
)
{
[
model
,
column
]
}
let
(
:default_batch_size
)
{
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_SUM_BATCH_SIZE
}
let
(
:small_batch_size
)
{
Gitlab
::
Database
::
BatchCounter
::
DEFAULT_SUM_BATCH_SIZE
-
1
}
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment