Commit 8b4b687d authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets

Merge branch 'google-code-import-performance' into 'master'

Decrease memory use and increase performance of Google Code importer.

Addresses private issue https://dev.gitlab.org/gitlab/gitlabhq/issues/2241.

See merge request !536
parent 047dacc7
...@@ -54,6 +54,11 @@ class Import::GoogleCodeController < Import::BaseController ...@@ -54,6 +54,11 @@ class Import::GoogleCodeController < Import::BaseController
render "new_user_map" and return render "new_user_map" and return
end end
# This is the default, so let's not save it into the database.
user_map.reject! do |key, value|
value == Gitlab::GoogleCodeImport::Client.mask_email(key)
end
session[:google_code_user_map] = user_map session[:google_code_user_map] = user_map
flash[:notice] = "The user map has been saved. Continue by selecting the projects you want to import." flash[:notice] = "The user map has been saved. Continue by selecting the projects you want to import."
......
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
# import_type :string(255) # import_type :string(255)
# import_source :string(255) # import_source :string(255)
# avatar :string(255) # avatar :string(255)
# import_data :text
# #
require 'carrierwave/orm/activerecord' require 'carrierwave/orm/activerecord'
...@@ -51,8 +50,6 @@ class Project < ActiveRecord::Base ...@@ -51,8 +50,6 @@ class Project < ActiveRecord::Base
default_value_for :wall_enabled, false default_value_for :wall_enabled, false
default_value_for :snippets_enabled, gitlab_config_features.snippets default_value_for :snippets_enabled, gitlab_config_features.snippets
serialize :import_data, JSON
# set last_activity_at to the same as created_at # set last_activity_at to the same as created_at
after_create :set_last_activity_at after_create :set_last_activity_at
def set_last_activity_at def set_last_activity_at
...@@ -117,6 +114,8 @@ class Project < ActiveRecord::Base ...@@ -117,6 +114,8 @@ class Project < ActiveRecord::Base
has_many :users_star_projects, dependent: :destroy has_many :users_star_projects, dependent: :destroy
has_many :starrers, through: :users_star_projects, source: :user has_many :starrers, through: :users_star_projects, source: :user
has_one :import_data, dependent: :destroy, class_name: "ProjectImportData"
delegate :name, to: :owner, allow_nil: true, prefix: true delegate :name, to: :owner, allow_nil: true, prefix: true
delegate :members, to: :team, prefix: true delegate :members, to: :team, prefix: true
...@@ -267,8 +266,7 @@ class Project < ActiveRecord::Base ...@@ -267,8 +266,7 @@ class Project < ActiveRecord::Base
end end
def clear_import_data def clear_import_data
self.import_data = nil self.import_data.destroy if self.import_data
self.save
end end
def import? def import?
......
# == Schema Information
#
# Table name: project_import_datas
#
# id :integer not null, primary key
# project_id :integer
# data :text
#
require 'carrierwave/orm/activerecord'
require 'file_size_validator'
class ProjectImportData < ActiveRecord::Base
belongs_to :project
serialize :data, JSON
validates :project, presence: true
end
class CreateProjectImportData < ActiveRecord::Migration
def change
create_table :project_import_data do |t|
t.references :project
t.text :data
end
end
end
class RemoveImportDataFromProject < ActiveRecord::Migration
def change
remove_column :projects, :import_data
end
end
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20150413192223) do ActiveRecord::Schema.define(version: 20150417122318) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
...@@ -323,6 +323,11 @@ ActiveRecord::Schema.define(version: 20150413192223) do ...@@ -323,6 +323,11 @@ ActiveRecord::Schema.define(version: 20150413192223) do
add_index "oauth_applications", ["owner_id", "owner_type"], name: "index_oauth_applications_on_owner_id_and_owner_type", using: :btree add_index "oauth_applications", ["owner_id", "owner_type"], name: "index_oauth_applications_on_owner_id_and_owner_type", using: :btree
add_index "oauth_applications", ["uid"], name: "index_oauth_applications_on_uid", unique: true, using: :btree add_index "oauth_applications", ["uid"], name: "index_oauth_applications_on_uid", unique: true, using: :btree
create_table "project_import_data", force: true do |t|
t.integer "project_id"
t.text "data"
end
create_table "projects", force: true do |t| create_table "projects", force: true do |t|
t.string "name" t.string "name"
t.string "path" t.string "path"
...@@ -348,7 +353,6 @@ ActiveRecord::Schema.define(version: 20150413192223) do ...@@ -348,7 +353,6 @@ ActiveRecord::Schema.define(version: 20150413192223) do
t.integer "star_count", default: 0, null: false t.integer "star_count", default: 0, null: false
t.string "import_type" t.string "import_type"
t.string "import_source" t.string "import_source"
t.text "import_data"
end end
add_index "projects", ["created_at", "id"], name: "index_projects_on_created_at_and_id", using: :btree add_index "projects", ["created_at", "id"], name: "index_projects_on_created_at_and_id", using: :btree
......
...@@ -5,7 +5,10 @@ module Gitlab ...@@ -5,7 +5,10 @@ module Gitlab
def initialize(project) def initialize(project)
@project = project @project = project
@repo = GoogleCodeImport::Repository.new(project.import_data["repo"])
import_data = project.import_data.try(:data)
repo_data = import_data["repo"] if import_data
@repo = GoogleCodeImport::Repository.new(repo_data)
@closed_statuses = [] @closed_statuses = []
@known_labels = Set.new @known_labels = Set.new
...@@ -27,9 +30,10 @@ module Gitlab ...@@ -27,9 +30,10 @@ module Gitlab
def user_map def user_map
@user_map ||= begin @user_map ||= begin
user_map = Hash.new { |hash, user| hash[user] = Client.mask_email(user) } user_map = Hash.new { |hash, user| Client.mask_email(user) }
stored_user_map = project.import_data["user_map"] import_data = project.import_data.try(:data)
stored_user_map = import_data["user_map"] if import_data
user_map.update(stored_user_map) if stored_user_map user_map.update(stored_user_map) if stored_user_map
user_map user_map
...@@ -58,24 +62,7 @@ module Gitlab ...@@ -58,24 +62,7 @@ module Gitlab
def import_issues def import_issues
return unless repo.issues return unless repo.issues
last_id = 0 while raw_issue = repo.issues.shift
deleted_issues = []
repo.issues.each do |raw_issue|
while raw_issue["id"] > last_id + 1
last_id += 1
issue = project.issues.create!(
title: "Deleted issue",
description: "*This issue has been deleted*",
author_id: project.creator_id,
state: "closed"
)
deleted_issues << issue
end
last_id = raw_issue["id"]
author = user_map[raw_issue["author"]["name"]] author = user_map[raw_issue["author"]["name"]]
date = DateTime.parse(raw_issue["published"]).to_formatted_s(:long) date = DateTime.parse(raw_issue["published"]).to_formatted_s(:long)
...@@ -112,7 +99,8 @@ module Gitlab ...@@ -112,7 +99,8 @@ module Gitlab
end end
end end
issue = project.issues.create!( issue = Issue.create!(
project_id: project.id,
title: raw_issue["title"], title: raw_issue["title"],
description: body, description: body,
author_id: project.creator_id, author_id: project.creator_id,
...@@ -121,39 +109,46 @@ module Gitlab ...@@ -121,39 +109,46 @@ module Gitlab
) )
issue.add_labels_by_names(labels) issue.add_labels_by_names(labels)
if issue.iid != raw_issue["id"]
issue.update_attribute(:iid, raw_issue["id"])
end
import_issue_comments(issue, comments) import_issue_comments(issue, comments)
end end
deleted_issues.each(&:destroy!)
end end
def import_issue_comments(issue, comments) def import_issue_comments(issue, comments)
comments.each do |raw_comment| Note.transaction do
next if raw_comment.has_key?("deletedBy") while raw_comment = comments.shift
next if raw_comment.has_key?("deletedBy")
content = format_content(raw_comment["content"])
updates = format_updates(raw_comment["updates"]) content = format_content(raw_comment["content"])
attachments = format_attachments(issue.iid, raw_comment["id"], raw_comment["attachments"]) updates = format_updates(raw_comment["updates"])
attachments = format_attachments(issue.iid, raw_comment["id"], raw_comment["attachments"])
next if content.blank? && updates.blank? && attachments.blank?
next if content.blank? && updates.blank? && attachments.blank?
author = user_map[raw_comment["author"]["name"]]
date = DateTime.parse(raw_comment["published"]).to_formatted_s(:long) author = user_map[raw_comment["author"]["name"]]
date = DateTime.parse(raw_comment["published"]).to_formatted_s(:long)
body = format_issue_comment_body(
raw_comment["id"], body = format_issue_comment_body(
author, raw_comment["id"],
date, author,
content, date,
updates, content,
attachments updates,
) attachments
)
issue.notes.create!( # Needs to match order of `comment_columns` below.
project_id: project.id, Note.create!(
author_id: project.creator_id, project_id: project.id,
note: body noteable_type: "Issue",
) noteable_id: issue.id,
author_id: project.creator_id,
note: body
)
end
end end
end end
...@@ -232,7 +227,7 @@ module Gitlab ...@@ -232,7 +227,7 @@ module Gitlab
def create_label(name) def create_label(name)
color = nice_label_color(name) color = nice_label_color(name)
project.labels.create!(name: name, color: color) Label.create!(project_id: project.id, name: name, color: color)
end end
def format_content(raw_content) def format_content(raw_content)
......
...@@ -11,12 +11,7 @@ module Gitlab ...@@ -11,12 +11,7 @@ module Gitlab
end end
def execute def execute
import_data = { project = ::Projects::CreateService.new(current_user,
"repo" => repo.raw_data,
"user_map" => user_map
}
@project = Project.new(
name: repo.name, name: repo.name,
path: repo.name, path: repo.name,
description: repo.summary, description: repo.summary,
...@@ -25,21 +20,17 @@ module Gitlab ...@@ -25,21 +20,17 @@ module Gitlab
visibility_level: Gitlab::VisibilityLevel::PUBLIC, visibility_level: Gitlab::VisibilityLevel::PUBLIC,
import_type: "google_code", import_type: "google_code",
import_source: repo.name, import_source: repo.name,
import_url: repo.import_url, import_url: repo.import_url
import_data: import_data ).execute
)
if @project.save! import_data = project.create_import_data(
@project.reload data: {
"repo" => repo.raw_data,
if @project.import_failed? "user_map" => user_map
@project.import_retry }
else )
@project.import_start
end
end
@project project
end end
end end
end end
......
...@@ -12,9 +12,13 @@ describe Gitlab::GoogleCodeImport::Importer do ...@@ -12,9 +12,13 @@ describe Gitlab::GoogleCodeImport::Importer do
} }
} }
} }
let(:project) { create(:project, import_data: import_data) } let(:project) { create(:project) }
subject { described_class.new(project) } subject { described_class.new(project) }
before do
project.create_import_data(data: import_data)
end
describe "#execute" do describe "#execute" do
it "imports status labels" do it "imports status labels" do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment