Commit 31245e13 authored by George Koltsov's avatar George Koltsov

Preserve user authorship during bulk imports

 - Cache source_user_id => destination_user_id during
   MembersPipeline in redis in order to use it later on
   in other pipelines that reference users
 - Replace MembersMapper with UsersMapper in NdjsonPipeline
   as it has the same public API which implements user
   authorship in ndjson piplines that reference users
   (e.g. epics)

Changelog: added
parent feecc08a
...@@ -22,6 +22,7 @@ module BulkImports ...@@ -22,6 +22,7 @@ module BulkImports
integer_value: integerValue integer_value: integerValue
} }
user { user {
user_gid: id
public_email: publicEmail public_email: publicEmail
} }
} }
......
...@@ -15,6 +15,9 @@ module BulkImports ...@@ -15,6 +15,9 @@ module BulkImports
def load(context, data) def load(context, data)
return unless data return unless data
# Current user is already a member
return if data['user_id'].to_i == context.current_user.id
context.group.members.create!(data) context.group.members.create!(data)
end end
end end
......
...@@ -6,18 +6,20 @@ module BulkImports ...@@ -6,18 +6,20 @@ module BulkImports
class MemberAttributesTransformer class MemberAttributesTransformer
def transform(context, data) def transform(context, data)
data data
.then { |data| add_user(data) } .then { |data| add_user(data, context) }
.then { |data| add_access_level(data) } .then { |data| add_access_level(data) }
.then { |data| add_author(data, context) } .then { |data| add_author(data, context) }
end end
private private
def add_user(data) def add_user(data, context)
user = find_user(data&.dig('user', 'public_email')) user = find_user(data&.dig('user', 'public_email'))
return unless user return unless user
cache_source_user_id(data, user, context)
data data
.except('user') .except('user')
.merge('user_id' => user.id) .merge('user_id' => user.id)
...@@ -48,6 +50,16 @@ module BulkImports ...@@ -48,6 +50,16 @@ module BulkImports
data.merge('created_by_id' => context.current_user.id) data.merge('created_by_id' => context.current_user.id)
end end
def cache_source_user_id(data, user, context)
gid = data&.dig('user', 'user_gid')
return unless gid
source_user_id = GlobalID.parse(gid).model_id
::BulkImports::UsersMapper.new(context: context).cache_source_user_id(source_user_id, user.id)
end
end end
end end
end end
......
...@@ -88,11 +88,7 @@ module BulkImports ...@@ -88,11 +88,7 @@ module BulkImports
end end
def members_mapper def members_mapper
@members_mapper ||= Gitlab::ImportExport::MembersMapper.new( @members_mapper ||= BulkImports::UsersMapper.new(context: context)
exported_members: [],
user: current_user,
importable: portable
)
end end
end end
end end
......
# frozen_string_literal: true
module BulkImports
class UsersMapper
include Gitlab::Utils::StrongMemoize
SOURCE_USER_IDS_CACHE_KEY = 'bulk_imports/%{bulk_import}/%{entity}/source_user_ids'
def initialize(context:)
@context = context
@cache_key = SOURCE_USER_IDS_CACHE_KEY % {
bulk_import: @context.bulk_import.id,
entity: @context.entity.id
}
end
def map
strong_memoize(:map) do
map = hash_with_default
cached_source_user_ids.each_pair do |source_id, destination_id|
map[source_id.to_i] = destination_id.to_i
end
map
end
end
def include?(source_user_id)
map.has_key?(source_user_id)
end
def default_user_id
@context.current_user.id
end
def cache_source_user_id(source_id, destination_id)
::Gitlab::Cache::Import::Caching.hash_add(@cache_key, source_id, destination_id)
end
private
def hash_with_default
Hash.new { default_user_id }
end
def cached_source_user_ids
::Gitlab::Cache::Import::Caching.values_from_hash(@cache_key)
end
end
end
...@@ -173,6 +173,34 @@ module Gitlab ...@@ -173,6 +173,34 @@ module Gitlab
val ? true : false val ? true : false
end end
# Adds a value to a hash.
#
# raw_key - The key of the hash to add to.
# field - The field to add to the hash.
# value - The field value to add to the hash.
# timeout - The new timeout of the key.
def self.hash_add(raw_key, field, value, timeout: TIMEOUT)
key = cache_key_for(raw_key)
Redis::Cache.with do |redis|
redis.multi do |m|
m.hset(key, field, value)
m.expire(key, timeout)
end
end
end
# Returns the values of the given hash.
#
# raw_key - The key of the set to check.
def self.values_from_hash(raw_key)
key = cache_key_for(raw_key)
Redis::Cache.with do |redis|
redis.hgetall(key)
end
end
def self.cache_key_for(raw_key) def self.cache_key_for(raw_key)
"#{Redis::Cache::CACHE_NAMESPACE}:#{raw_key}" "#{Redis::Cache::CACHE_NAMESPACE}:#{raw_key}"
end end
......
...@@ -63,6 +63,14 @@ RSpec.describe BulkImports::Groups::Pipelines::MembersPipeline do ...@@ -63,6 +63,14 @@ RSpec.describe BulkImports::Groups::Pipelines::MembersPipeline do
expect(member.updated_at).to eq('2020-01-01T00:00:00Z') expect(member.updated_at).to eq('2020-01-01T00:00:00Z')
expect(member.expires_at).to eq(nil) expect(member.expires_at).to eq(nil)
end end
context 'when user_id is current user id' do
it 'does not create new member' do
data = { 'user_id' => user.id }
expect { subject.load(context, data) }.not_to change(GroupMember, :count)
end
end
end end
describe 'pipeline parts' do describe 'pipeline parts' do
......
...@@ -84,9 +84,34 @@ RSpec.describe BulkImports::Groups::Transformers::MemberAttributesTransformer do ...@@ -84,9 +84,34 @@ RSpec.describe BulkImports::Groups::Transformers::MemberAttributesTransformer do
expect(subject.transform(context, data)).to be_nil expect(subject.transform(context, data)).to be_nil
end end
end end
context 'source user id caching' do
context 'when user gid is present' do
it 'caches source user id' do
gid = 'gid://gitlab/User/7'
data = member_data(email: user.email, gid: gid)
expect_next_instance_of(BulkImports::UsersMapper) do |mapper|
expect(mapper).to receive(:cache_source_user_id).with('7', user.id)
end
subject.transform(context, data)
end
end
context 'when user gid is missing' do
it 'does not use caching' do
data = member_data(email: user.email)
expect(BulkImports::UsersMapper).not_to receive(:new)
subject.transform(context, data)
end
end
end
end end
def member_data(email: '', access_level: 30) def member_data(email: '', gid: nil, access_level: 30)
{ {
'created_at' => '2020-01-01T00:00:00Z', 'created_at' => '2020-01-01T00:00:00Z',
'updated_at' => '2020-01-01T00:00:00Z', 'updated_at' => '2020-01-01T00:00:00Z',
...@@ -95,6 +120,7 @@ RSpec.describe BulkImports::Groups::Transformers::MemberAttributesTransformer do ...@@ -95,6 +120,7 @@ RSpec.describe BulkImports::Groups::Transformers::MemberAttributesTransformer do
'integer_value' => access_level 'integer_value' => access_level
}, },
'user' => { 'user' => {
'user_gid' => gid,
'public_email' => email 'public_email' => email
} }
} }
......
...@@ -106,8 +106,11 @@ RSpec.describe BulkImports::NdjsonPipeline do ...@@ -106,8 +106,11 @@ RSpec.describe BulkImports::NdjsonPipeline do
data = [hash, 1] data = [hash, 1]
user = double user = double
config = double(relation_excluded_keys: nil, top_relation_tree: []) config = double(relation_excluded_keys: nil, top_relation_tree: [])
context = double(portable: group, current_user: user, import_export_config: config) import_double = instance_double(BulkImport, id: 1)
entity_double = instance_double(BulkImports::Entity, id: 2)
context = double(portable: group, current_user: user, import_export_config: config, bulk_import: import_double, entity: entity_double)
allow(subject).to receive(:import_export_config).and_return(config) allow(subject).to receive(:import_export_config).and_return(config)
allow(subject).to receive(:context).and_return(context)
expect(Gitlab::ImportExport::Group::RelationFactory) expect(Gitlab::ImportExport::Group::RelationFactory)
.to receive(:create) .to receive(:create)
...@@ -116,7 +119,7 @@ RSpec.describe BulkImports::NdjsonPipeline do ...@@ -116,7 +119,7 @@ RSpec.describe BulkImports::NdjsonPipeline do
relation_sym: :test, relation_sym: :test,
relation_hash: hash, relation_hash: hash,
importable: group, importable: group,
members_mapper: instance_of(Gitlab::ImportExport::MembersMapper), members_mapper: instance_of(BulkImports::UsersMapper),
object_builder: Gitlab::ImportExport::Group::ObjectBuilder, object_builder: Gitlab::ImportExport::Group::ObjectBuilder,
user: user, user: user,
excluded_keys: nil excluded_keys: nil
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe BulkImports::UsersMapper do
let_it_be(:user) { create(:user) }
let_it_be(:import) { create(:bulk_import, user: user) }
let_it_be(:entity) { create(:bulk_import_entity, bulk_import: import) }
let(:context) do
instance_double(
BulkImports::Pipeline::Context,
bulk_import: import,
entity: entity,
current_user: user
)
end
subject { described_class.new(context: context) }
describe '#map' do
context 'when value for specified key exists' do
it 'returns a map of source & destination user ids from redis' do
allow(Gitlab::Cache::Import::Caching).to receive(:values_from_hash).and_return({ "1" => "2" })
expect(subject.map).to eq({ 1 => 2 })
end
end
context 'when value for specified key does not exist' do
it 'returns default value' do
expect(subject.map[:non_existent_key]).to eq(user.id)
end
end
end
describe '#default_user_id' do
it 'returns current user id' do
expect(subject.default_user_id).to eq(user.id)
end
end
describe '#include?' do
context 'when source user id is present in the map' do
it 'returns true' do
allow(subject).to receive(:map).and_return({ 1 => 2 })
expect(subject.include?(1)).to eq(true)
end
end
context 'when source user id is missing in the map' do
it 'returns false' do
allow(subject).to receive(:map).and_return({})
expect(subject.include?(1)).to eq(false)
end
end
end
describe '#cache_source_user_id' do
it 'caches provided source & destination user ids in redis' do
expect(Gitlab::Cache::Import::Caching).to receive(:hash_add).with("bulk_imports/#{import.id}/#{entity.id}/source_user_ids", 1, 2)
subject.cache_source_user_id(1, 2)
end
end
end
...@@ -100,6 +100,30 @@ RSpec.describe Gitlab::Cache::Import::Caching, :clean_gitlab_redis_cache do ...@@ -100,6 +100,30 @@ RSpec.describe Gitlab::Cache::Import::Caching, :clean_gitlab_redis_cache do
end end
end end
describe '.hash_add' do
it 'adds a value to a hash' do
described_class.hash_add('foo', 1, 1)
described_class.hash_add('foo', 2, 2)
key = described_class.cache_key_for('foo')
values = Gitlab::Redis::Cache.with { |r| r.hgetall(key) }
expect(values).to eq({ '1' => '1', '2' => '2' })
end
end
describe '.values_from_hash' do
it 'returns empty hash when the hash is empty' do
expect(described_class.values_from_hash('foo')).to eq({})
end
it 'returns the set list of values' do
described_class.hash_add('foo', 1, 1)
expect(described_class.values_from_hash('foo')).to eq({ '1' => '1' })
end
end
describe '.write_multiple' do describe '.write_multiple' do
it 'sets multiple keys when key_prefix not set' do it 'sets multiple keys when key_prefix not set' do
mapping = { 'foo' => 10, 'bar' => 20 } mapping = { 'foo' => 10, 'bar' => 20 }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment