Commit 6ca0ce41 authored by Kamil Trzciński's avatar Kamil Trzciński Committed by Aleksei Lipniagov

Refactor ProjectTreeRestorer logic

This rewrites a majority of the logic
for tree restorer to make it much cleaner
in terms of relations between objects
and make it much easier to persist objects
at very explicit points.
parent 94bc7284
......@@ -1962,27 +1962,6 @@ class Project < ApplicationRecord
(auto_devops || build_auto_devops)&.predefined_variables
end
def append_or_update_attribute(name, value)
if Project.reflect_on_association(name).try(:macro) == :has_many
# if this is 1-to-N relation, update the parent object
value.each do |item|
item.update!(
Project.reflect_on_association(name).foreign_key => id)
end
# force to drop relation cache
public_send(name).reset # rubocop:disable GitlabSecurity/PublicSend
# succeeded
true
else
# if this is another relation or attribute, update just object
update_attribute(name, value)
end
rescue ActiveRecord::RecordInvalid => e
raise e, "Failed to set #{name}: #{e.message}"
end
# Tries to set repository as read_only, checking for existing Git transfers in progress beforehand
#
# @return [Boolean] true when set to read_only or false when an existing git transfer is in progress
......
......@@ -15,7 +15,6 @@ module Gitlab
@user = user
@shared = shared
@project = project
@saved = true
end
def restore
......@@ -33,7 +32,8 @@ module Gitlab
ActiveRecord::Base.uncached do
ActiveRecord::Base.no_touching do
update_project_params!
create_relations
create_project_relations!
post_import!
end
end
......@@ -69,68 +69,64 @@ module Gitlab
# in the DB. The structure and relationships between models are guessed from
# the configuration yaml file too.
# Finally, it updates each attribute in the newly imported project.
def create_relations
project_relations.each do |relation_key, relation_definition|
relation_key_s = relation_key.to_s
if relation_definition.present?
create_sub_relations(relation_key_s, relation_definition, @tree_hash)
elsif @tree_hash[relation_key_s].present?
save_relation_hash(relation_key_s, @tree_hash[relation_key_s])
end
def create_project_relations!
project_relations.each(&method(
:process_project_relation!))
end
def post_import!
@project.merge_requests.set_latest_merge_request_diff_ids!
@saved
end
def save_relation_hash(relation_key, relation_hash_batch)
relation_hash = create_relation(relation_key, relation_hash_batch)
def process_project_relation!(relation_key, relation_definition)
data_hashes = @tree_hash.delete(relation_key)
return unless data_hashes
remove_group_models(relation_hash) if relation_hash.is_a?(Array)
# we do not care if we process array or hash
data_hashes = [data_hashes] unless data_hashes.is_a?(Array)
@saved = false unless @project.append_or_update_attribute(relation_key, relation_hash)
# consume and remove objects from memory
while data_hash = data_hashes.shift
process_project_relation_item!(relation_key, relation_definition, data_hash)
end
end
save_id_mappings(relation_key, relation_hash_batch, relation_hash)
def process_project_relation_item!(relation_key, relation_definition, data_hash)
relation_object = build_relation(relation_key, relation_definition, data_hash)
return unless relation_object
return if is_group_model?(relation_object)
@project.reset
relation_object.project = @project
relation_object.save!
save_id_mapping(relation_key, data_hash, relation_object)
end
# Older, serialized CI pipeline exports may only have a
# merge_request_id and not the full hash of the merge request. To
# import these pipelines, we need to preserve the mapping between
# the old and new the merge request ID.
def save_id_mappings(relation_key, relation_hash_batch, relation_hash)
def save_id_mapping(relation_key, data_hash, relation_object)
return unless relation_key == 'merge_requests'
relation_hash = Array(relation_hash)
Array(relation_hash_batch).each_with_index do |raw_data, index|
merge_requests_mapping[raw_data['id']] = relation_hash[index]['id']
end
end
# Remove project models that became group models as we found them at group level.
# This no longer required saving them at the root project level.
# For example, in the case of an existing group label that matched the title.
def remove_group_models(relation_hash)
relation_hash.reject! do |value|
GROUP_MODELS.include?(value.class) && value.group_id
end
merge_requests_mapping[data_hash['id']] = relation_object.id
end
def project_relations
@project_relations ||= reader.attributes_finder.find_relations_tree(:project)
@project_relations ||=
reader
.attributes_finder
.find_relations_tree(:project)
.deep_stringify_keys
end
def update_project_params!
Gitlab::Timeless.timeless(@project) do
project_params = @tree_hash.reject do |key, value|
project_relations.include?(key.to_sym)
project_relations.include?(key)
end
project_params = project_params.merge(present_project_override_params)
project_params = project_params.merge(
present_project_override_params)
# Cleaning all imported and overridden params
project_params = Gitlab::ImportExport::AttributeCleaner.clean(
......@@ -140,6 +136,8 @@ module Gitlab
@project.assign_attributes(project_params)
@project.drop_visibility_level!
Gitlab::Timeless.timeless(@project) do
@project.save!
end
end
......@@ -156,73 +154,61 @@ module Gitlab
@project_override_params ||= @project.import_data&.data&.fetch('override_params', nil) || {}
end
# Given a relation hash containing one or more models and its relationships,
# loops through each model and each object from a model type and
# and assigns its correspondent attributes hash from +tree_hash+
# Example:
# +relation_key+ issues, loops through the list of *issues* and for each individual
# issue, finds any subrelations such as notes, creates them and assign them back to the hash
#
# Recursively calls this method if the sub-relation is a hash containing more sub-relations
def create_sub_relations(relation_key, relation_definition, tree_hash, save: true)
return if tree_hash[relation_key].blank?
tree_array = [tree_hash[relation_key]].flatten
# Avoid keeping a possible heavy object in memory once we are done with it
while relation_item = tree_array.shift
# The transaction at this level is less speedy than one single transaction
# But we can't have it in the upper level or GC won't get rid of the AR objects
# after we save the batch.
Project.transaction do
process_sub_relation(relation_key, relation_definition, relation_item)
# For every subrelation that hangs from Project, save the associated records altogether
# This effectively batches all records per subrelation item, only keeping those in memory
# We have to keep in mind that more batch granularity << Memory, but >> Slowness
if save
save_relation_hash(relation_key, [relation_item])
tree_hash[relation_key].delete(relation_item)
end
end
def build_relations(relation_key, relation_definition, data_hashes)
data_hashes.map do |data_hash|
build_relation(relation_key, relation_definition, data_hash)
end.compact
end
tree_hash.delete(relation_key) if save
end
def build_relation(relation_key, relation_definition, data_hash)
# TODO: This is hack to not create relation for the author
# Rather make `RelationFactory#set_note_author` to take care of that
return data_hash if relation_key == 'author'
def process_sub_relation(relation_key, relation_definition, relation_item)
# create relation objects recursively for all sub-objects
relation_definition.each do |sub_relation_key, sub_relation_definition|
# We just use author to get the user ID, do not attempt to create an instance.
next if sub_relation_key == :author
sub_relation_key_s = sub_relation_key.to_s
# create dependent relations if present
if sub_relation_definition.present?
create_sub_relations(sub_relation_key_s, sub_relation_definition, relation_item, save: false)
end
# transform relation hash to actual object
sub_relation_hash = relation_item[sub_relation_key_s]
if sub_relation_hash.present?
relation_item[sub_relation_key_s] = create_relation(sub_relation_key, sub_relation_hash)
end
end
transform_sub_relations!(data_hash, sub_relation_key, sub_relation_definition)
end
def create_relation(relation_key, relation_hash_list)
relation_array = [relation_hash_list].flatten.map do |relation_hash|
Gitlab::ImportExport::RelationFactory.create(
relation_sym: relation_key.to_sym,
relation_hash: relation_hash,
relation_hash: data_hash,
members_mapper: members_mapper,
merge_requests_mapping: merge_requests_mapping,
user: @user,
project: @project,
excluded_keys: excluded_keys_for_relation(relation_key))
end.compact
end
def transform_sub_relations!(data_hash, sub_relation_key, sub_relation_definition)
sub_data_hash = data_hash[sub_relation_key]
return unless sub_data_hash
# if object is a hash we can create simple object
# as it means that this is 1-to-1 vs 1-to-many
sub_data_hash =
if sub_data_hash.is_a?(Array)
build_relations(
sub_relation_key,
sub_relation_definition,
sub_data_hash).presence
else
build_relation(
sub_relation_key,
sub_relation_definition,
sub_data_hash)
end
# persist object(s) or delete from relation
if sub_data_hash
data_hash[sub_relation_key] = sub_data_hash
else
data_hash.delete(sub_relation_key)
end
end
relation_hash_list.is_a?(Array) ? relation_array : relation_array.first
def is_group_model?(relation_object)
GROUP_MODELS.include?(relation_object.class) && relation_object.group_id
end
def reader
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment