Commit d936bda7 authored by Matthias Kaeppler's avatar Matthias Kaeppler

Only de-dup hashes when they have an `id`

Since hashes largely represent entities in the import/export,
we need to be careful not to de-dup entities that are equal up to their
respective IDs, since those might get stripped out in the future.
parent 6b426001
......@@ -50,7 +50,7 @@ module Gitlab
# - a node has not been cached yet but should be; descend into its children
# - a node is neither cached nor qualifies for caching; this is a no-op
def dedup_tree(node, nodes_seen = {})
if nodes_seen.key?(node)
if nodes_seen.key?(node) && distinguishable?(node)
yield nodes_seen[node]
elsif should_dedup?(node)
nodes_seen[node] = node
......@@ -69,6 +69,8 @@ module Gitlab
end
end
end
else
node
end
end
......@@ -76,6 +78,19 @@ module Gitlab
def should_dedup?(node)
node && !(node.is_a?(Numeric) || node.is_a?(TrueClass) || node.is_a?(FalseClass))
end
# We can only safely de-dup values that are distinguishable. True value objects
# are always distinguishable by nature. Hashes however can represent entities,
# which are identified by ID, not value. We therefore disallow de-duping hashes
# that do not have an `id` field, since we might risk dropping entities that
# have equal attributes yet different identities.
def distinguishable?(node)
if node.is_a?(Hash)
node.key?('id')
else
true
end
end
end
end
end
......@@ -6,18 +6,20 @@ describe Gitlab::ImportExport::ProjectTreeProcessor do
let(:tree) do
{
simple: 42,
k1: { "v1": 1 },
k2: ["v2"],
duped_hash_with_id: { "id": 0, "v1": 1 },
duped_hash_no_id: { "v1": 1 },
duped_array: ["v2"],
array: [
{ k1: { "v1": 1 } },
{ k2: ["v2"] }
{ duped_hash_with_id: { "id": 0, "v1": 1 } },
{ duped_array: ["v2"] },
{ duped_hash_no_id: { "v1": 1 } }
],
nested: {
k1: { "v1": 1 },
k2: ["v2"],
k3: ["don't touch"]
}
duped_hash_with_id: { "id": 0, "v1": 1 },
duped_array: ["v2"],
array: ["don't touch"]
}
}.with_indifferent_access
end
let(:processed_tree) { subject.process(tree) }
......@@ -27,15 +29,20 @@ describe Gitlab::ImportExport::ProjectTreeProcessor do
end
it 'de-duplicates equal values' do
expect(processed_tree[:k1]).to be(processed_tree[:array][0][:k1])
expect(processed_tree[:k1]).to be(processed_tree[:nested][:k1])
expect(processed_tree[:k2]).to be(processed_tree[:array][1][:k2])
expect(processed_tree[:k2]).to be(processed_tree[:nested][:k2])
expect(processed_tree[:duped_hash_with_id]).to be(processed_tree[:array][0][:duped_hash_with_id])
expect(processed_tree[:duped_hash_with_id]).to be(processed_tree[:nested][:duped_hash_with_id])
expect(processed_tree[:duped_array]).to be(processed_tree[:array][1][:duped_array])
expect(processed_tree[:duped_array]).to be(processed_tree[:nested][:duped_array])
end
it 'does not de-duplicate hashes without IDs' do
expect(processed_tree[:duped_hash_no_id]).to eq(processed_tree[:array][2][:duped_hash_no_id])
expect(processed_tree[:duped_hash_no_id]).not_to be(processed_tree[:array][2][:duped_hash_no_id])
end
it 'keeps unique entries intact' do
it 'keeps single entries intact' do
expect(processed_tree[:simple]).to eq(42)
expect(processed_tree[:nested][:k3]).to eq(["don't touch"])
expect(processed_tree[:nested][:array]).to eq(["don't touch"])
end
it 'maintains object equality' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment