Commit 1ed0b801 authored by Micaël Bergeron's avatar Micaël Bergeron

wip: making the query paginated

parent 7d5f50f4
......@@ -4,7 +4,7 @@ require 'csv'
require 'yaml'
module Pseudonymizer
PAGE_SIZE = 1000
PAGE_SIZE = 10000
class Anon
def initialize(fields)
......@@ -73,8 +73,7 @@ module Pseudonymizer
table_to_schema(table)
write_to_csv_file(table, table_page_results(table, whitelist_columns, pseudonymity_columns))
rescue => e
binding.pry
Rails.logger.error(e.message)
Rails.logger.error("Failed to export #{table}: #{e}")
end
# yield every results, pagined, anonymized
......@@ -91,16 +90,16 @@ module Pseudonymizer
# a page of results
results = ActiveRecord::Base.connection.exec_query(sql)
raise StopIteration if results.empty?
anonymizer.anonymize(results).lazy.each do |result|
anonymizer.anonymize(results).each do |result|
has_more = true
yielder << result
end
raise StopIteration unless has_more
page += 1
end
end
end.lazy
end
def table_to_schema(table)
......@@ -141,8 +140,6 @@ module Pseudonymizer
end
end
GC.start
file_path
end
......
......@@ -9,15 +9,11 @@ module Pseudonymizer
end
def output_dir
File.join('/tmp', 'gitlab-pseudonymizer', self.start_at.iso8601)
File.join('/tmp', 'gitlab-pseudonymizer', start_at.iso8601)
end
def upload_dir
File.join(self.start_at.iso8601)
end
def object_store_credentials
config.upload.connection.to_hash.deep_symbolize_keys
File.join(start_at.iso8601)
end
end
end
......@@ -2,12 +2,21 @@ module Pseudonymizer
class Uploader
RemoteStorageUnavailableError = Class.new(StandardError)
def self.object_store_credentials
Gitlab.config.pseudonymizer.upload.connection.to_hash.deep_symbolize_keys
end
def self.remote_directory
Gitlab.config.pseudonymizer.upload.remote_directory
end
def initialize(options, progress = nil)
@progress = progress || $stdout
@config = options.config
@output_dir = options.output_dir
@upload_dir = options.upload_dir
@connection_params = options.object_store_credentials
@remote_dir = self.class.remote_directory
@connection_params = self.class.object_store_credentials
end
def upload
......@@ -57,15 +66,14 @@ module Pseudonymizer
def connect_to_remote_directory
# our settings use string keys, but Fog expects symbols
connection = ::Fog::Storage.new(@connection_params)
remote_dir = @config.upload.remote_directory
# We only attempt to create the directory for local backups. For AWS
# and other cloud providers, we cannot guarantee the user will have
# permission to create the bucket.
if connection.service == ::Fog::Storage::Local
connection.directories.create(key: remote_dir)
connection.directories.create(key: @remote_dir)
else
connection.directories.get(remote_dir)
connection.directories.get(@remote_dir)
end
end
......
......@@ -93,9 +93,7 @@ namespace :gitlab do
def progress
if ENV['CRON']
# We need an object we can say 'puts' and 'print' to; let's use a
# StringIO.
require 'stringio'
# Do not output progress for Cron
StringIO.new
else
$stdout
......
......@@ -64,7 +64,7 @@ module StubObjectStorage
end
def stub_object_storage_pseudonymizer(options:)
stub_object_storage(connection_params: options.object_store_credentials,
stub_object_storage(connection_params: Pseudonymizer::Uploader.object_store_credentials,
remote_directory: options.config.upload.remote_directory)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment