Fix edge cases while iterating over model and registry table

There are some edge cases to take into acocunt:

1. When there are unused registries, but there no
   replicable records next_range! returns nil;
2. When the unused registry foreign key ids are greater
   than the last replicable record id;
3. When the unused registry foreign key ids are lower
   than the first replicable record id;
parent 82e8ed7b
...@@ -2,8 +2,9 @@ ...@@ -2,8 +2,9 @@
module Gitlab module Gitlab
module Geo module Geo
# Returns an ID range within a table so it can be iterated over. Repeats from # Returns an ID range to allow iteration over a registry table and its
# the beginning after it reaches the end. # source replicable table. Repeats from the beginning after it reaches
# the end.
# #
# Used by Geo in particular to iterate over a replicable and its registry # Used by Geo in particular to iterate over a replicable and its registry
# table. # table.
...@@ -93,11 +94,18 @@ module Gitlab ...@@ -93,11 +94,18 @@ module Gitlab
# @private # @private
# #
# This method is used to remove orphaned registries where the foreign key # Get the last ID of the of the batch (not the table) for the registry
# IDs are greather than last replicable ID. The difference here is that # and check if there are more rows in the table.
# we need to check against the foreign key IDS not the registry ID and #
# for the existence of more rows to check we need to check against the # This query differs from the replicable query by:
# the first ID of the batch. #
# - We check against the foreign key IDs not the registry IDs;
# - In the where clause of the more_rows part, we use greater
# than or equal. This allows the batcher to switch to the
# registry table while getting the last ID of the batch
# when the previous batch included the end of the replicable
# table but there are orphaned registries where the foreign key
# ids are higher than the last replicable id;
# #
# @param [Integer] batch_first_id the first ID of the batch # @param [Integer] batch_first_id the first ID of the batch
# @return [Integer, Boolean] A tuple with the the last ID of the batch (not the table), # @return [Integer, Boolean] A tuple with the the last ID of the batch (not the table),
...@@ -108,12 +116,12 @@ module Gitlab ...@@ -108,12 +116,12 @@ module Gitlab
EXISTS ( EXISTS (
SELECT #{model_foreign_key} SELECT #{model_foreign_key}
FROM #{registry_class.table_name} FROM #{registry_class.table_name}
WHERE #{model_foreign_key} > #{batch_first_id} WHERE #{model_foreign_key} >= MAX(batch.#{model_foreign_key})
) AS more_rows ) AS more_rows
FROM ( FROM (
SELECT #{model_foreign_key} SELECT #{model_foreign_key}
FROM #{registry_class.table_name} FROM #{registry_class.table_name}
WHERE #{model_foreign_key} > #{batch_first_id} WHERE #{model_foreign_key} >= #{batch_first_id}
ORDER BY #{model_foreign_key} ORDER BY #{model_foreign_key}
LIMIT #{batch_size}) AS batch; LIMIT #{batch_size}) AS batch;
SQL SQL
......
...@@ -41,13 +41,29 @@ describe Gitlab::Geo::RegistryBatcher, :geo, :use_clean_rails_memory_store_cachi ...@@ -41,13 +41,29 @@ describe Gitlab::Geo::RegistryBatcher, :geo, :use_clean_rails_memory_store_cachi
end end
context 'when it was called before' do context 'when it was called before' do
before do context 'when the previous batch included the end of the table' do
described_class.new(registry_class, key: key, batch_size: batch_size).next_range! before do
described_class.new(registry_class, key: key, batch_size: registry_class.count).next_range!
end
it { is_expected.to be_nil }
end end
it { is_expected.to be_nil } context 'when the previous batch did not include the end of the table' do
before do
described_class.new(registry_class, key: key, batch_size: registry_class.count - 1).next_range!
end
it 'starts after the previous batch' do
expect(subject).to eq(registries.last.public_send(model_foreign_key)..registries.last.public_send(model_foreign_key))
end
end
context 'if cache is cleared' do context 'if cache is cleared' do
before do
described_class.new(registry_class, key: key, batch_size: batch_size).next_range!
end
it 'starts from the beginning' do it 'starts from the beginning' do
Rails.cache.clear Rails.cache.clear
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment