Commit a312ce0c authored by Mikolaj Wawrzyniak's avatar Mikolaj Wawrzyniak

Include joins into names suggestions

For database sourced product intelligence metrics
we should include joined relations into names
suggestons to make them more descriptive
parent c1a951cf
......@@ -48,48 +48,144 @@ module Gitlab
def name_suggestion(relation:, column: nil, prefix: nil, distinct: nil)
parts = [prefix]
if column
parts << parse_target(column)
arel_column = arelize_column(relation, column)
# nil as column indicates that the counting would use fallback value of primary key.
# Because counting primary key from relation is the conceptual equal to counting all
# records from given relation, in order to keep name suggestion more condensed
# primary key column is skipped.
# eg: SELECT COUNT(id) FROM issues would translate as count_issues and not
# as count_id_from_issues since it does not add more information to the name suggestion
if arel_column != Arel::Table.new(relation.table_name)[relation.primary_key]
parts << arel_column.name
parts << 'from'
end
source = parse_source(relation)
constraints = parse_constraints(relation: relation, column: column, distinct: distinct)
if constraints.include?(source)
arel = arel_query(relation: relation, column: arel_column, distinct: distinct)
constraints = parse_constraints(relation: relation, arel: arel)
# In some cases due to performance reasons metrics are instrumented with joined relations
# where relation listed in FROM statement is not the one that includes counted attribute
# in such situations to make name suggestion more intuitive source should be inferred based
# on the relation that provide counted attribute
# EG: SELECT COUNT(deployments.environment_id) FROM clusters
# JOIN deployments ON deployments.cluster_id = cluster.id
# should be translated into:
# count_environment_id_from_deployments_with_clusters
# instead of
# count_environment_id_from_clusters_with_deployments
actual_source = parse_source(relation, arel_column)
if constraints.include?(actual_source)
parts << "<adjective describing: '#{constraints}'>"
end
parts << source
parts << actual_source
parts += process_joined_relations(actual_source, arel, relation)
parts.compact.join('_')
end
def parse_constraints(relation:, column: nil, distinct: nil)
def parse_constraints(relation:, arel:)
connection = relation.connection
::Gitlab::Usage::Metrics::NamesSuggestions::RelationParsers::Constraints
.new(connection)
.accept(arel(relation: relation, column: column, distinct: distinct), collector(connection))
.accept(arel, collector(connection))
.value
end
def parse_target(column)
if column.is_a?(Arel::Attribute)
"#{column.relation.name}.#{column.name}"
else
# TODO: joins with `USING` keyword
def process_joined_relations(actual_source, arel, relation)
joins = parse_joins(connection: relation.connection, arel: arel)
return [] unless joins.any?
sources = [relation.table_name, *joins.map { |join| join[:source] }]
joins = extract_joins_targets(joins, sources)
relations = if actual_source != relation.table_name
build_relations_tree(joins + [{ source: relation.table_name }], actual_source)
else
# in case where counter attribute comes from joined relations, the relations
# diagram has to be built bottom up, thus source and target are reverted
build_relations_tree(joins + [{ source: relation.table_name }], actual_source, source_key: :target, target_key: :source)
end
collect_join_parts(relations[actual_source])
end
def parse_joins(connection:, arel:)
::Gitlab::Usage::Metrics::NamesSuggestions::RelationParsers::Joins
.new(connection)
.accept(arel)
end
def extract_joins_targets(joins, sources)
joins.map do |join|
source_regex = /(#{join[:source]})\.(\w+_)*id/i
tables_except_src = (sources - [join[:source]]).join('|')
target_regex = /(?<target>#{tables_except_src})\.(\w+_)*id/i
join_cond_regex = /(#{source_regex}\s+=\s+#{target_regex})|(#{target_regex}\s+=\s+#{source_regex})/i
matched = join_cond_regex.match(join[:constraints])
join[:target] = matched[:target] if matched
join
end
end
def build_relations_tree(joins, parent, source_key: :source, target_key: :target)
return [] if joins.blank?
tree = {}
tree[parent] = []
joins.each do |join|
if join[source_key] == parent
tree[parent] << build_relations_tree(joins - [join], join[target_key], source_key: source_key, target_key: target_key)
end
end
tree
end
def collect_join_parts(joined_relations, parts = [], conjunctions = %w[with having including].cycle)
conjunction = conjunctions.next
joined_relations.each do |subtree|
subtree.each do |parent, children|
parts << "<#{conjunction}>"
parts << parent
collect_join_parts(children, parts, conjunctions)
end
end
parts
end
def arelize_column(relation, column)
case column
when Arel::Attribute
column
when NilClass
Arel::Table.new(relation.table_name)[relation.primary_key]
when String
if column.include?('.')
table, col = column.split('.')
Arel::Table.new(table)[col]
else
Arel::Table.new(relation.table_name)[column]
end
when Symbol
arelize_column(relation, column.to_s)
end
end
def parse_source(relation)
relation.table_name
def parse_source(relation, column)
column.relation.name || relation.table_name
end
def collector(connection)
Arel::Collectors::SubstituteBinds.new(connection, Arel::Collectors::SQLString.new)
end
def arel(relation:, column: nil, distinct: nil)
def arel_query(relation:, column: nil, distinct: nil)
column ||= relation.primary_key
if column.is_a?(Arel::Attribute)
......
......@@ -12,7 +12,7 @@ RSpec.describe Gitlab::Usage::Metrics::NamesSuggestions::Generator do
describe '#generate' do
shared_examples 'name suggestion' do
it 'return correct name' do
expect(described_class.generate(key_path)).to eq name_suggestion
expect(described_class.generate(key_path)).to match name_suggestion
end
end
......@@ -20,7 +20,7 @@ RSpec.describe Gitlab::Usage::Metrics::NamesSuggestions::Generator do
it_behaves_like 'name suggestion' do
# corresponding metric is collected with count(Board)
let(:key_path) { 'counts.boards' }
let(:name_suggestion) { 'count_boards' }
let(:name_suggestion) { /count_boards/ }
end
end
......@@ -28,7 +28,32 @@ RSpec.describe Gitlab::Usage::Metrics::NamesSuggestions::Generator do
it_behaves_like 'name suggestion' do
# corresponding metric is collected with distinct_count(ZoomMeeting, :issue_id)
let(:key_path) { 'counts.issues_using_zoom_quick_actions' }
let(:name_suggestion) { 'count_distinct_issue_id_from_zoom_meetings' }
let(:name_suggestion) { /count_distinct_issue_id_from_zoom_meetings/ }
end
end
context 'joined relations' do
context 'counted attribute comes from joined relation' do
it_behaves_like 'name suggestion' do
# corresponding metric is collected with:
# distinct_count(
# ::Clusters::Applications::Ingress.modsecurity_enabled.logging
# .joins(cluster: :deployments)
# .merge(::Clusters::Cluster.enabled)
# .merge(Deployment.success),
# ::Deployment.arel_table[:environment_id]
# )
let(:key_path) { 'counts.ingress_modsecurity_logging' }
let(:name_suggestion) { /count_distinct_environment_id_from_<adjective describing\: '\(clusters_applications_ingress\.modsecurity_enabled = TRUE AND clusters_applications_ingress\.modsecurity_mode = \d+ AND clusters.enabled = TRUE AND deployments.status = \d+\)'>_deployments_<with>_clusters_<having>_clusters_applications_ingress/ }
end
end
context 'counted attribute comes from source relation' do
it_behaves_like 'name suggestion' do
# corresponding metric is collected with count(Issue.with_alert_management_alerts.not_authored_by(::User.alert_bot), start: issue_minimum_id, finish: issue_maximum_id)
let(:key_path) { 'counts.issues_created_manually_from_alerts' }
let(:name_suggestion) { /count_<adjective describing\: '\(issues\.author_id != \d+\)'>_issues_<with>_alert_management_alerts/ }
end
end
end
......@@ -36,7 +61,7 @@ RSpec.describe Gitlab::Usage::Metrics::NamesSuggestions::Generator do
it_behaves_like 'name suggestion' do
# corresponding metric is collected with sum(JiraImportState.finished, :imported_issues_count)
let(:key_path) { 'counts.jira_imports_total_imported_issues_count' }
let(:name_suggestion) { "sum_imported_issues_count_from_<adjective describing: '(jira_imports.status = 4)'>_jira_imports" }
let(:name_suggestion) { /sum_imported_issues_count_from_<adjective describing\: '\(jira_imports\.status = \d+\)'>_jira_imports/ }
end
end
......@@ -44,7 +69,7 @@ RSpec.describe Gitlab::Usage::Metrics::NamesSuggestions::Generator do
it_behaves_like 'name suggestion' do
# corresponding metric is collected with add(data[:personal_snippets], data[:project_snippets])
let(:key_path) { 'counts.snippets' }
let(:name_suggestion) { "add_count_<adjective describing: '(snippets.type = 'PersonalSnippet')'>_snippets_and_count_<adjective describing: '(snippets.type = 'ProjectSnippet')'>_snippets" }
let(:name_suggestion) { /add_count_<adjective describing\: '\(snippets\.type = 'PersonalSnippet'\)'>_snippets_and_count_<adjective describing\: '\(snippets\.type = 'ProjectSnippet'\)'>_snippets/ }
end
end
......@@ -52,7 +77,7 @@ RSpec.describe Gitlab::Usage::Metrics::NamesSuggestions::Generator do
it_behaves_like 'name suggestion' do
# corresponding metric is collected with redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics) }
let(:key_path) { 'analytics_unique_visits.analytics_unique_visits_for_any_target' }
let(:name_suggestion) { '<please fill metric name>' }
let(:name_suggestion) { /<please fill metric name>/ }
end
end
......@@ -60,7 +85,7 @@ RSpec.describe Gitlab::Usage::Metrics::NamesSuggestions::Generator do
it_behaves_like 'name suggestion' do
# corresponding metric is collected with alt_usage_data(fallback: nil) { operating_system }
let(:key_path) { 'settings.operating_system' }
let(:name_suggestion) { '<please fill metric name>' }
let(:name_suggestion) { /<please fill metric name>/ }
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment