Commit 8957293d authored by Timothy Andrew's avatar Timothy Andrew

Implement review comments from @yorickpeterse

1. Change multiple updates to a single `update_all`

2. Use cascading deletes

3. Extract an average function for the database median.

4. Move database median to `lib/gitlab/database`

5. Use `delete_all` instead of `destroy_all`

6. Minor refactoring
parent fa890604
...@@ -57,15 +57,13 @@ module Ci ...@@ -57,15 +57,13 @@ module Ci
end end
after_transition [:created, :pending] => :running do |pipeline| after_transition [:created, :pending] => :running do |pipeline|
pipeline.merge_requests.each do |merge_request| MergeRequest::Metrics.where(merge_request_id: pipeline.merge_requests.map(&:id)).
merge_request.metrics.record_latest_build_start_time!(pipeline.started_at) if merge_request.metrics.present? update_all(latest_build_started_at: pipeline.started_at, latest_build_finished_at: nil)
end
end end
after_transition any => [:success] do |pipeline| after_transition any => [:success] do |pipeline|
pipeline.merge_requests.each do |merge_request| MergeRequest::Metrics.where(merge_request_id: pipeline.merge_requests.map(&:id)).
merge_request.metrics.record_latest_build_finish_time!(pipeline.finished_at) if merge_request.metrics.present? update_all(latest_build_finished_at: pipeline.finished_at)
end
end end
before_transition do |pipeline| before_transition do |pipeline|
...@@ -292,8 +290,11 @@ module Ci ...@@ -292,8 +290,11 @@ module Ci
# Merge requests for which the current pipeline is running against # Merge requests for which the current pipeline is running against
# the merge request's latest commit. # the merge request's latest commit.
def merge_requests def merge_requests
project.merge_requests.where(source_branch: self.ref). @merge_requests ||=
select { |merge_request| merge_request.pipeline.try(:id) == self.id } begin
project.merge_requests.where(source_branch: self.ref).
select { |merge_request| merge_request.pipeline.try(:id) == self.id }
end
end end
private private
......
# https://www.periscopedata.com/blog/medians-in-sql.html
module DatabaseMedian
extend ActiveSupport::Concern
def median_datetime(arel_table, query_so_far, column_sym)
case ActiveRecord::Base.connection.adapter_name
when 'PostgreSQL'
pg_median_datetime(arel_table, query_so_far, column_sym)
when 'Mysql2'
mysql_median_datetime(arel_table, query_so_far, column_sym)
else
raise NotImplementedError, "We haven't implemented a database median strategy for your database type."
end
end
def mysql_median_datetime(arel_table, query_so_far, column_sym)
query = arel_table.
from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)).
project(Arel::Nodes::NamedFunction.new("AVG", [arel_table[column_sym]]).as('median')).
where(Arel::Nodes::Between.new(Arel.sql("(select @row_id := @row_id + 1)"),
Arel::Nodes::And.new([Arel.sql('@ct/2.0'),
Arel.sql('@ct/2.0 + 1')]))).
# Disallow negative values
where(arel_table[column_sym].gteq(0))
[Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"),
Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"),
Arel.sql("set @row_id := 0;"),
query,
Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};")]
end
def pg_median_datetime(arel_table, query_so_far, column_sym)
# Create a CTE with the column we're operating on, row number (after sorting by the column
# we're operating on), and count of the table we're operating on (duplicated across) all rows
# of the CTE. For example, if we're looking to find the median of the `projects.star_count`
# column, the CTE might look like this:
#
# star_count | row_id | ct
# ------------+--------+----
# 5 | 1 | 3
# 9 | 2 | 3
# 15 | 3 | 3
cte_table = Arel::Table.new("ordered_records")
cte = Arel::Nodes::As.new(cte_table,
arel_table.
project(arel_table[column_sym].as(column_sym.to_s),
Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []),
Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'),
arel_table.project("COUNT(1)").as('ct')).
# Disallow negative values
where(arel_table[column_sym].gteq(zero_interval)))
# From the CTE, select either the middle row or the middle two rows (this is accomplished
# by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
# selected rows, and this is the median value.
cte_table.project(Arel::Nodes::NamedFunction.new("AVG",
[extract_epoch(cte_table[column_sym])],
"median")).
where(Arel::Nodes::Between.new(cte_table[:row_id],
Arel::Nodes::And.new([(cte_table[:ct] / Arel.sql('2.0')),
(cte_table[:ct] / Arel.sql('2.0') + 1)]))).
with(query_so_far, cte)
end
private
def extract_epoch(arel_attribute)
Arel.sql("EXTRACT(EPOCH FROM \"#{arel_attribute.relation.name}\".\"#{arel_attribute.name}\")")
end
# Need to cast '0' to an INTERVAL before we can check if the interval is positive
def zero_interval
Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")])
end
end
class CycleAnalytics class CycleAnalytics
include DatabaseMedian include Gitlab::Database::Median
def initialize(project, from:) def initialize(project, from:)
@project = project @project = project
...@@ -122,7 +122,9 @@ class CycleAnalytics ...@@ -122,7 +122,9 @@ class CycleAnalytics
case ActiveRecord::Base.connection.adapter_name case ActiveRecord::Base.connection.adapter_name
when 'PostgreSQL' when 'PostgreSQL'
result.first['median'].to_f result = result.first.presence
median = result['median'] if result
median.to_f if median
when 'Mysql2' when 'Mysql2'
result.to_a.flatten.first result.to_a.flatten.first
end end
......
class CycleAnalytics
class Queries
def initialize(project)
@project = project
end
def issues(options = {})
@issues_data ||=
begin
issues_query(options).preload(:metrics).map { |issue| { issue: issue } }
end
end
def merge_requests_closing_issues(options = {})
@merge_requests_closing_issues_data ||=
begin
merge_requests_closing_issues = MergeRequestsClosingIssues.where(issue: issues_query(options)).preload(issue: [:metrics], merge_request: [:metrics])
merge_requests_closing_issues.map do |record|
{ issue: record.issue, merge_request: record.merge_request }
end
end
end
def issue_first_associated_with_milestone_at
lambda do |data_point|
issue = data_point[:issue]
issue.metrics.first_associated_with_milestone_at if issue.metrics.present?
end
end
def issue_first_added_to_list_label_at
lambda do |data_point|
issue = data_point[:issue]
issue.metrics.first_added_to_board_at if issue.metrics.present?
end
end
def issue_first_mentioned_in_commit_at
lambda do |data_point|
issue = data_point[:issue]
issue.metrics.first_mentioned_in_commit_at if issue.metrics.present?
end
end
def merge_request_merged_at
lambda do |data_point|
merge_request = data_point[:merge_request]
merge_request.metrics.merged_at if merge_request.metrics.present?
end
end
def merge_request_build_started_at
lambda do |data_point|
merge_request = data_point[:merge_request]
merge_request.metrics.latest_build_started_at if merge_request.metrics.present?
end
end
def merge_request_build_finished_at
lambda do |data_point|
merge_request = data_point[:merge_request]
merge_request.metrics.latest_build_finished_at if merge_request.metrics.present?
end
end
def merge_request_deployed_to_production_at
lambda do |data_point|
merge_request = data_point[:merge_request]
merge_request.metrics.first_deployed_to_production_at if merge_request.metrics.present?
end
end
private
def issues_query(created_after:)
@project.issues.where("created_at >= ?", created_after)
end
end
end
...@@ -42,4 +42,35 @@ class Deployment < ActiveRecord::Base ...@@ -42,4 +42,35 @@ class Deployment < ActiveRecord::Base
project.repository.is_ancestor?(commit.id, sha) project.repository.is_ancestor?(commit.id, sha)
end end
def update_merge_request_metrics
if environment.update_merge_request_metrics?
query = project.merge_requests.
joins(:metrics).
where(target_branch: self.ref, merge_request_metrics: { first_deployed_to_production_at: nil })
merge_requests =
if previous_deployment
query.where("merge_request_metrics.merged_at <= ? AND merge_request_metrics.merged_at >= ?",
self.created_at,
previous_deployment.created_at)
else
query.where("merge_request_metrics.merged_at <= ?", self.created_at)
end
# Need to use `map` instead of `select` because MySQL doesn't allow `SELECT`ing from the same table
# that we're updating.
MergeRequest::Metrics.where(merge_request_id: merge_requests.map(&:id), first_deployed_to_production_at: nil).
update_all(first_deployed_to_production_at: self.created_at)
end
end
def previous_deployment
@previous_deployment ||=
project.deployments.joins(:environment).
where(environments: { name: self.environment.name }, ref: self.ref).
where.not(id: self.id).
take
end
end end
...@@ -43,4 +43,8 @@ class Environment < ActiveRecord::Base ...@@ -43,4 +43,8 @@ class Environment < ActiveRecord::Base
last_deployment.includes_commit?(commit) last_deployment.includes_commit?(commit)
end end
def update_merge_request_metrics?
self.name == "production"
end
end end
...@@ -23,9 +23,9 @@ class Issue < ActiveRecord::Base ...@@ -23,9 +23,9 @@ class Issue < ActiveRecord::Base
has_many :events, as: :target, dependent: :destroy has_many :events, as: :target, dependent: :destroy
has_one :metrics, dependent: :destroy has_one :metrics
has_many :merge_requests_closing_issues, class_name: MergeRequestsClosingIssues has_many :merge_requests_closing_issues, class_name: 'MergeRequestsClosingIssues'
has_many :closed_by_merge_requests, through: :merge_requests_closing_issues, source: :merge_request has_many :closed_by_merge_requests, through: :merge_requests_closing_issues, source: :merge_request
validates :project, presence: true validates :project, presence: true
...@@ -202,7 +202,7 @@ class Issue < ActiveRecord::Base ...@@ -202,7 +202,7 @@ class Issue < ActiveRecord::Base
# From all notes on this issue, we'll select the system notes about linked # From all notes on this issue, we'll select the system notes about linked
# merge requests. Of those, the MRs closing `self` are returned. # merge requests. Of those, the MRs closing `self` are returned.
def closed_by_merge_requests(current_user = nil) def closed_by_merge_requests(current_user = nil)
return [] if !open? return [] unless open?
ext = all_references(current_user) ext = all_references(current_user)
......
...@@ -13,10 +13,6 @@ class Issue::Metrics < ActiveRecord::Base ...@@ -13,10 +13,6 @@ class Issue::Metrics < ActiveRecord::Base
self.save if self.changed? self.save if self.changed?
end end
def record_commit_mention!(commit_time)
self.update(first_mentioned_in_commit_at: commit_time) if self.first_mentioned_in_commit_at.blank?
end
private private
def issue_assigned_to_list_label? def issue_assigned_to_list_label?
......
...@@ -13,11 +13,11 @@ class MergeRequest < ActiveRecord::Base ...@@ -13,11 +13,11 @@ class MergeRequest < ActiveRecord::Base
has_many :merge_request_diffs, dependent: :destroy has_many :merge_request_diffs, dependent: :destroy
has_one :merge_request_diff, has_one :merge_request_diff,
-> { order('merge_request_diffs.id DESC') } -> { order('merge_request_diffs.id DESC') }
has_one :metrics, dependent: :destroy has_one :metrics
has_many :events, as: :target, dependent: :destroy has_many :events, as: :target, dependent: :destroy
has_many :merge_requests_closing_issues, class_name: MergeRequestsClosingIssues has_many :merge_requests_closing_issues, class_name: 'MergeRequestsClosingIssues'
has_many :issues_closed, through: :merge_requests_closing_issues, source: :issue has_many :issues_closed, through: :merge_requests_closing_issues, source: :issue
serialize :merge_params, Hash serialize :merge_params, Hash
...@@ -513,7 +513,7 @@ class MergeRequest < ActiveRecord::Base ...@@ -513,7 +513,7 @@ class MergeRequest < ActiveRecord::Base
# running `ReferenceExtractor` on each of them separately. # running `ReferenceExtractor` on each of them separately.
def cache_merge_request_closes_issues!(current_user = self.author) def cache_merge_request_closes_issues!(current_user = self.author)
transaction do transaction do
self.merge_requests_closing_issues.destroy_all self.merge_requests_closing_issues.delete_all
closes_issues(current_user).each do |issue| closes_issues(current_user).each do |issue|
MergeRequestsClosingIssues.create!(merge_request: self, issue: issue) MergeRequestsClosingIssues.create!(merge_request: self, issue: issue)
end end
......
...@@ -8,16 +8,4 @@ class MergeRequest::Metrics < ActiveRecord::Base ...@@ -8,16 +8,4 @@ class MergeRequest::Metrics < ActiveRecord::Base
self.save if self.changed? self.save if self.changed?
end end
def record_production_deploy!(deploy_time)
self.update(first_deployed_to_production_at: deploy_time) if self.first_deployed_to_production_at.blank?
end
def record_latest_build_start_time!(start_time)
self.update(latest_build_started_at: start_time, latest_build_finished_at: nil)
end
def record_latest_build_finish_time!(finish_time)
self.update(latest_build_finished_at: finish_time)
end
end end
...@@ -13,39 +13,13 @@ class CreateDeploymentService < BaseService ...@@ -13,39 +13,13 @@ class CreateDeploymentService < BaseService
deployable: deployable deployable: deployable
) )
update_merge_request_metrics(deployment, environment) deployment.update_merge_request_metrics
deployment deployment
end end
private private
def update_merge_request_metrics(deployment, environment)
if environment.name == "production"
query = project.merge_requests.joins("LEFT OUTER JOIN merge_request_metrics ON merge_request_metrics.merge_request_id = merge_requests.id").
where(target_branch: params[:ref], "merge_request_metrics.first_deployed_to_production_at" => nil)
previous_deployment = previous_deployment_for_ref(deployment)
merge_requests_deployed_to_production_for_first_time = if previous_deployment
query.where("merge_request_metrics.merged_at < ? AND merge_request_metrics.merged_at > ?", deployment.created_at, previous_deployment.created_at)
else
query.where("merge_request_metrics.merged_at < ?", deployment.created_at)
end
merge_requests_deployed_to_production_for_first_time.each { |merge_request| merge_request.metrics.record_production_deploy!(deployment.created_at) }
end
end
def previous_deployment_for_ref(current_deployment)
@previous_deployment_for_ref ||=
project.deployments.joins(:environment).
where("environments.name": params[:environment], ref: params[:ref]).
where.not(id: current_deployment.id).
first
end
private
def find_or_create_environment def find_or_create_environment
project.environments.find_or_create_by(name: expanded_name) do |environment| project.environments.find_or_create_by(name: expanded_name) do |environment|
environment.external_url = expanded_url environment.external_url = expanded_url
......
...@@ -190,6 +190,8 @@ class GitPushService < BaseService ...@@ -190,6 +190,8 @@ class GitPushService < BaseService
def update_issue_metrics(commit, authors) def update_issue_metrics(commit, authors)
mentioned_issues = commit.all_references(authors[commit]).issues mentioned_issues = commit.all_references(authors[commit]).issues
mentioned_issues.each { |issue| issue.metrics.record_commit_mention!(commit.committed_date) if issue.metrics.present? }
Issue::Metrics.where(issue_id: mentioned_issues.map(&:id), first_mentioned_in_commit_at: nil).
update_all(first_mentioned_in_commit_at: commit.committed_date)
end end
end end
...@@ -4,7 +4,7 @@ require './spec/support/test_env' ...@@ -4,7 +4,7 @@ require './spec/support/test_env'
class Gitlab::Seeder::CycleAnalytics class Gitlab::Seeder::CycleAnalytics
def initialize(project, perf: false) def initialize(project, perf: false)
@project = project @project = project
@user = User.find(1) @user = User.order(:id).last
@issue_count = perf ? 1000 : 5 @issue_count = perf ? 1000 : 5
stub_git_pre_receive! stub_git_pre_receive!
end end
......
...@@ -5,12 +5,12 @@ class AddTableIssueMetrics < ActiveRecord::Migration ...@@ -5,12 +5,12 @@ class AddTableIssueMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime. # Set this constant to true if this migration requires downtime.
DOWNTIME = false DOWNTIME = true
# When a migration requires downtime you **must** uncomment the following # When a migration requires downtime you **must** uncomment the following
# constant and define a short and easy to understand explanation as to why the # constant and define a short and easy to understand explanation as to why the
# migration requires downtime. # migration requires downtime.
# DOWNTIME_REASON = '' DOWNTIME_REASON = 'Adding foreign key'
# When using the methods "add_concurrent_index" or "add_column_with_default" # When using the methods "add_concurrent_index" or "add_column_with_default"
# you must disable the use of transactions as these methods can not run in an # you must disable the use of transactions as these methods can not run in an
...@@ -25,7 +25,7 @@ class AddTableIssueMetrics < ActiveRecord::Migration ...@@ -25,7 +25,7 @@ class AddTableIssueMetrics < ActiveRecord::Migration
def change def change
create_table :issue_metrics do |t| create_table :issue_metrics do |t|
t.references :issue, index: { name: "index_issue_metrics" }, foreign_key: true, null: false t.references :issue, index: { name: "index_issue_metrics" }, foreign_key: true, dependent: :delete, null: false
t.datetime 'first_mentioned_in_commit_at' t.datetime 'first_mentioned_in_commit_at'
t.datetime 'first_associated_with_milestone_at' t.datetime 'first_associated_with_milestone_at'
......
...@@ -5,12 +5,12 @@ class AddTableMergeRequestMetrics < ActiveRecord::Migration ...@@ -5,12 +5,12 @@ class AddTableMergeRequestMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime. # Set this constant to true if this migration requires downtime.
DOWNTIME = false DOWNTIME = true
# When a migration requires downtime you **must** uncomment the following # When a migration requires downtime you **must** uncomment the following
# constant and define a short and easy to understand explanation as to why the # constant and define a short and easy to understand explanation as to why the
# migration requires downtime. # migration requires downtime.
# DOWNTIME_REASON = '' DOWNTIME_REASON = 'Adding foreign key'
# When using the methods "add_concurrent_index" or "add_column_with_default" # When using the methods "add_concurrent_index" or "add_column_with_default"
# you must disable the use of transactions as these methods can not run in an # you must disable the use of transactions as these methods can not run in an
...@@ -25,7 +25,7 @@ class AddTableMergeRequestMetrics < ActiveRecord::Migration ...@@ -25,7 +25,7 @@ class AddTableMergeRequestMetrics < ActiveRecord::Migration
def change def change
create_table :merge_request_metrics do |t| create_table :merge_request_metrics do |t|
t.references :merge_request, index: { name: "index_merge_request_metrics" }, foreign_key: true, null: false t.references :merge_request, index: { name: "index_merge_request_metrics" }, foreign_key: true, dependent: :delete, null: false
t.datetime 'latest_build_started_at' t.datetime 'latest_build_started_at'
t.datetime 'latest_build_finished_at' t.datetime 'latest_build_finished_at'
......
...@@ -25,8 +25,8 @@ class CreateMergeRequestsClosingIssues < ActiveRecord::Migration ...@@ -25,8 +25,8 @@ class CreateMergeRequestsClosingIssues < ActiveRecord::Migration
def change def change
create_table :merge_requests_closing_issues do |t| create_table :merge_requests_closing_issues do |t|
t.references :merge_request, foreign_key: true, index: true, null: false t.references :merge_request, foreign_key: true, index: true, dependent: :delete, null: false
t.references :issue, foreign_key: true, index: true, null: false t.references :issue, foreign_key: true, index: true, dependent: :delete, null: false
t.timestamps null: false t.timestamps null: false
end end
......
# https://www.periscopedata.com/blog/medians-in-sql.html
module Gitlab
module Database
module Median
def median_datetime(arel_table, query_so_far, column_sym)
case ActiveRecord::Base.connection.adapter_name
when 'PostgreSQL'
pg_median_datetime(arel_table, query_so_far, column_sym)
when 'Mysql2'
mysql_median_datetime(arel_table, query_so_far, column_sym)
else
raise NotImplementedError, "We haven't implemented a database median strategy for your database type."
end
end
def mysql_median_datetime(arel_table, query_so_far, column_sym)
query = arel_table.
from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name)).
project(average([arel_table[column_sym]], 'median')).
where(Arel::Nodes::Between.new(Arel.sql("(select @row_id := @row_id + 1)"),
Arel::Nodes::And.new([Arel.sql('@ct/2.0'),
Arel.sql('@ct/2.0 + 1')]))).
# Disallow negative values
where(arel_table[column_sym].gteq(0))
[Arel.sql("CREATE TEMPORARY TABLE IF NOT EXISTS #{query_so_far.to_sql}"),
Arel.sql("set @ct := (select count(1) from #{arel_table.table_name});"),
Arel.sql("set @row_id := 0;"),
query,
Arel.sql("DROP TEMPORARY TABLE IF EXISTS #{arel_table.table_name};")]
end
def pg_median_datetime(arel_table, query_so_far, column_sym)
# Create a CTE with the column we're operating on, row number (after sorting by the column
# we're operating on), and count of the table we're operating on (duplicated across) all rows
# of the CTE. For example, if we're looking to find the median of the `projects.star_count`
# column, the CTE might look like this:
#
# star_count | row_id | ct
# ------------+--------+----
# 5 | 1 | 3
# 9 | 2 | 3
# 15 | 3 | 3
cte_table = Arel::Table.new("ordered_records")
cte = Arel::Nodes::As.new(cte_table,
arel_table.
project(arel_table[column_sym].as(column_sym.to_s),
Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []),
Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'),
arel_table.project("COUNT(1)").as('ct')).
# Disallow negative values
where(arel_table[column_sym].gteq(zero_interval)))
# From the CTE, select either the middle row or the middle two rows (this is accomplished
# by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
# selected rows, and this is the median value.
cte_table.project(average([extract_epoch(cte_table[column_sym])], "median")).
where(Arel::Nodes::Between.new(cte_table[:row_id],
Arel::Nodes::And.new([(cte_table[:ct] / Arel.sql('2.0')),
(cte_table[:ct] / Arel.sql('2.0') + 1)]))).
with(query_so_far, cte)
end
private
def average(args, as)
Arel::Nodes::NamedFunction.new("AVG", args, as)
end
def extract_epoch(arel_attribute)
Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")})
end
# Need to cast '0' to an INTERVAL before we can check if the interval is positive
def zero_interval
Arel::Nodes::NamedFunction.new("CAST", [Arel.sql("'0' AS INTERVAL")])
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment