Commit f787caa5 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Merge branch '217580-improve-performance-of-blob' into 'master'

Improve performance of Banzai::Filter::ReferenceFilter

See merge request gitlab-org/gitlab!34741
parents c6ce6578 e9c0e204
...@@ -146,16 +146,16 @@ module Banzai ...@@ -146,16 +146,16 @@ module Banzai
link_pattern_start = /\A#{link_pattern}/ link_pattern_start = /\A#{link_pattern}/
link_pattern_anchor = /\A#{link_pattern}\z/ link_pattern_anchor = /\A#{link_pattern}\z/
nodes.each do |node| nodes.each_with_index do |node, index|
if text_node?(node) && ref_pattern if text_node?(node) && ref_pattern
replace_text_when_pattern_matches(node, ref_pattern) do |content| replace_text_when_pattern_matches(node, index, ref_pattern) do |content|
object_link_filter(content, ref_pattern) object_link_filter(content, ref_pattern)
end end
elsif element_node?(node) elsif element_node?(node)
yield_valid_link(node) do |link, inner_html| yield_valid_link(node) do |link, inner_html|
if ref_pattern && link =~ ref_pattern_anchor if ref_pattern && link =~ ref_pattern_anchor
replace_link_node_with_href(node, link) do replace_link_node_with_href(node, index, link) do
object_link_filter(link, ref_pattern, link_content: inner_html) object_link_filter(link, ref_pattern, link_content: inner_html)
end end
...@@ -165,7 +165,7 @@ module Banzai ...@@ -165,7 +165,7 @@ module Banzai
next unless link_pattern next unless link_pattern
if link == inner_html && inner_html =~ link_pattern_start if link == inner_html && inner_html =~ link_pattern_start
replace_link_node_with_text(node, link) do replace_link_node_with_text(node, index) do
object_link_filter(inner_html, link_pattern, link_reference: true) object_link_filter(inner_html, link_pattern, link_reference: true)
end end
...@@ -173,7 +173,7 @@ module Banzai ...@@ -173,7 +173,7 @@ module Banzai
end end
if link =~ link_pattern_anchor if link =~ link_pattern_anchor
replace_link_node_with_href(node, link) do replace_link_node_with_href(node, index, link) do
object_link_filter(link, link_pattern, link_content: inner_html, link_reference: true) object_link_filter(link, link_pattern, link_content: inner_html, link_reference: true)
end end
......
...@@ -34,16 +34,16 @@ module Banzai ...@@ -34,16 +34,16 @@ module Banzai
ref_pattern = issue_reference_pattern ref_pattern = issue_reference_pattern
ref_start_pattern = /\A#{ref_pattern}\z/ ref_start_pattern = /\A#{ref_pattern}\z/
each_node do |node| nodes.each_with_index do |node, index|
if text_node?(node) if text_node?(node)
replace_text_when_pattern_matches(node, ref_pattern) do |content| replace_text_when_pattern_matches(node, index, ref_pattern) do |content|
issue_link_filter(content) issue_link_filter(content)
end end
elsif element_node?(node) elsif element_node?(node)
yield_valid_link(node) do |link, inner_html| yield_valid_link(node) do |link, inner_html|
if link =~ ref_start_pattern if link =~ ref_start_pattern
replace_link_node_with_href(node, link) do replace_link_node_with_href(node, index, link) do
issue_link_filter(link, link_content: inner_html) issue_link_filter(link, link_content: inner_html)
end end
end end
......
...@@ -27,15 +27,15 @@ module Banzai ...@@ -27,15 +27,15 @@ module Banzai
ref_pattern = Project.markdown_reference_pattern ref_pattern = Project.markdown_reference_pattern
ref_pattern_start = /\A#{ref_pattern}\z/ ref_pattern_start = /\A#{ref_pattern}\z/
nodes.each do |node| nodes.each_with_index do |node, index|
if text_node?(node) if text_node?(node)
replace_text_when_pattern_matches(node, ref_pattern) do |content| replace_text_when_pattern_matches(node, index, ref_pattern) do |content|
project_link_filter(content) project_link_filter(content)
end end
elsif element_node?(node) elsif element_node?(node)
yield_valid_link(node) do |link, inner_html| yield_valid_link(node) do |link, inner_html|
if link =~ ref_pattern_start if link =~ ref_pattern_start
replace_link_node_with_href(node, link) do replace_link_node_with_href(node, index, link) do
project_link_filter(link, link_content: inner_html) project_link_filter(link, link_content: inner_html)
end end
end end
......
...@@ -16,6 +16,23 @@ module Banzai ...@@ -16,6 +16,23 @@ module Banzai
class << self class << self
attr_accessor :reference_type attr_accessor :reference_type
def call(doc, context = nil, result = nil)
new(doc, context, result).call_and_update_nodes
end
end
def initialize(doc, context = nil, result = nil)
super
if update_nodes_enabled?
@new_nodes = {}
@nodes = self.result[:reference_filter_nodes]
end
end
def call_and_update_nodes
update_nodes_enabled? ? with_update_nodes { call } : call
end end
# Returns a data attribute String to attach to a reference link # Returns a data attribute String to attach to a reference link
...@@ -89,11 +106,6 @@ module Banzai ...@@ -89,11 +106,6 @@ module Banzai
def each_node def each_node
return to_enum(__method__) unless block_given? return to_enum(__method__) unless block_given?
query = %Q{descendant-or-self::text()[not(#{ignore_ancestor_query})]
| descendant-or-self::a[
not(contains(concat(" ", @class, " "), " gfm ")) and not(@href = "")
]}
doc.xpath(query).each do |node| doc.xpath(query).each do |node|
yield node yield node
end end
...@@ -114,25 +126,25 @@ module Banzai ...@@ -114,25 +126,25 @@ module Banzai
yield link, inner_html yield link, inner_html
end end
def replace_text_when_pattern_matches(node, pattern) def replace_text_when_pattern_matches(node, index, pattern)
return unless node.text =~ pattern return unless node.text =~ pattern
content = node.to_html content = node.to_html
html = yield content html = yield content
node.replace(html) unless content == html replace_text_with_html(node, index, html) unless html == content
end end
def replace_link_node_with_text(node, link) def replace_link_node_with_text(node, index)
html = yield html = yield
node.replace(html) unless html == node.text replace_text_with_html(node, index, html) unless html == node.text
end end
def replace_link_node_with_href(node, link) def replace_link_node_with_href(node, index, link)
html = yield html = yield
node.replace(html) unless html == link replace_text_with_html(node, index, html) unless html == link
end end
def text_node?(node) def text_node?(node)
...@@ -145,9 +157,62 @@ module Banzai ...@@ -145,9 +157,62 @@ module Banzai
private private
def query
@query ||= %Q{descendant-or-self::text()[not(#{ignore_ancestor_query})]
| descendant-or-self::a[
not(contains(concat(" ", @class, " "), " gfm ")) and not(@href = "")
]}
end
def replace_text_with_html(node, index, html)
if update_nodes_enabled?
replace_and_update_new_nodes(node, index, html)
else
node.replace(html)
end
end
def replace_and_update_new_nodes(node, index, html)
previous_node = node.previous
next_node = node.next
parent_node = node.parent
# Unfortunately node.replace(html) returns re-parented nodes, not the actual replaced nodes in the doc
# We need to find the actual nodes in the doc that were replaced
node.replace(html)
@new_nodes[index] = []
# We replaced node with new nodes, so we find first new node. If previous_node is nil, we take first parent child
new_node = previous_node ? previous_node.next : parent_node&.children&.first
# We iterate from first to last replaced node and store replaced nodes in @new_nodes
while new_node && new_node != next_node
@new_nodes[index] << new_node.xpath(query)
new_node = new_node.next
end
@new_nodes[index].flatten!
end
def only_path? def only_path?
context[:only_path] context[:only_path]
end end
def with_update_nodes
@new_nodes = {}
yield.tap { update_nodes! }
end
# Once Filter completes replacing nodes, we update nodes with @new_nodes
def update_nodes!
@new_nodes.sort_by { |index, _new_nodes| -index }.each do |index, new_nodes|
nodes[index, 1] = new_nodes
end
result[:reference_filter_nodes] = nodes
end
def update_nodes_enabled?
Feature.enabled?(:update_nodes_for_banzai_reference_filter, project)
end
end end
end end
end end
...@@ -31,15 +31,15 @@ module Banzai ...@@ -31,15 +31,15 @@ module Banzai
ref_pattern = User.reference_pattern ref_pattern = User.reference_pattern
ref_pattern_start = /\A#{ref_pattern}\z/ ref_pattern_start = /\A#{ref_pattern}\z/
nodes.each do |node| nodes.each_with_index do |node, index|
if text_node?(node) if text_node?(node)
replace_text_when_pattern_matches(node, ref_pattern) do |content| replace_text_when_pattern_matches(node, index, ref_pattern) do |content|
user_link_filter(content) user_link_filter(content)
end end
elsif element_node?(node) elsif element_node?(node)
yield_valid_link(node) do |link, inner_html| yield_valid_link(node) do |link, inner_html|
if link =~ ref_pattern_start if link =~ ref_pattern_start
replace_link_node_with_href(node, link) do replace_link_node_with_href(node, index, link) do
user_link_filter(link, link_content: inner_html) user_link_filter(link, link_content: inner_html)
end end
end end
......
...@@ -44,4 +44,249 @@ RSpec.describe Banzai::Filter::ReferenceFilter do ...@@ -44,4 +44,249 @@ RSpec.describe Banzai::Filter::ReferenceFilter do
expect(filter.nodes).to eq([document.children[0]]) expect(filter.nodes).to eq([document.children[0]])
end end
end end
RSpec.shared_context 'document nodes' do
let(:document) { Nokogiri::HTML.fragment('<p data-sourcepos="1:1-1:18"></p>') }
let(:nodes) { [] }
let(:filter) { described_class.new(document, project: project) }
let(:ref_pattern) { nil }
let(:href_link) { nil }
before do
nodes.each do |node|
document.children.first.add_child(node)
end
end
end
RSpec.shared_context 'new nodes' do
let(:nodes) { [{ value: "1" }, { value: "2" }, { value: "3" }] }
let(:expected_nodes) { [{ value: "1.1" }, { value: "1.2" }, { value: "1.3" }, { value: "2.1" }, { value: "2.2" }, { value: "2.3" }, { value: "3.1" }, { value: "3.2" }, { value: "3.3" }] }
let(:new_nodes) do
{
0 => [{ value: "1.1" }, { value: "1.2" }, { value: "1.3" }],
2 => [{ value: "3.1" }, { value: "3.2" }, { value: "3.3" }],
1 => [{ value: "2.1" }, { value: "2.2" }, { value: "2.3" }]
}
end
end
RSpec.shared_examples 'replaces text' do |method_name, index|
let(:args) { [filter.nodes[index], index, ref_pattern || href_link].compact }
context 'when content didnt change' do
it 'does not replace link node with html' do
filter.send(method_name, *args) do
existing_content
end
expect(filter).not_to receive(:replace_text_with_html)
end
end
context 'when link node has changed' do
let(:html) { %(text <a href="reference_url" class="gfm gfm-user" title="reference">Reference</a>) }
it 'replaces reference node' do
filter.send(method_name, *args) do
html
end
expect(document.css('a').length).to eq 1
end
it 'calls replace_and_update_new_nodes' do
expect(filter).to receive(:replace_and_update_new_nodes).with(filter.nodes[index], index, html)
filter.send(method_name, *args) do
html
end
end
it 'stores filtered new nodes' do
filter.send(method_name, *args) do
html
end
expect(filter.instance_variable_get(:@new_nodes)).to eq({ index => [filter.each_node.to_a[index]] })
end
context "with update_nodes_for_banzai_reference_filter feature flag disabled" do
before do
stub_feature_flags(update_nodes_for_banzai_reference_filter: false)
end
it 'does not call replace_and_update_new_nodes' do
expect(filter).not_to receive(:replace_and_update_new_nodes).with(filter.nodes[index], index, html)
filter.send(method_name, *args) do
html
end
end
end
end
end
RSpec.shared_examples 'replaces document node' do |method_name|
context 'when parent has only one node' do
let(:nodes) { [node] }
it_behaves_like 'replaces text', method_name, 0
end
context 'when parent has multiple nodes' do
let(:node1) { Nokogiri::HTML.fragment('<span>span text</span>') }
let(:node2) { Nokogiri::HTML.fragment('<span>text</span>') }
context 'when pattern matches in the first node' do
let(:nodes) { [node, node1, node2] }
it_behaves_like 'replaces text', method_name, 0
end
context 'when pattern matches in the middle node' do
let(:nodes) { [node1, node, node2] }
it_behaves_like 'replaces text', method_name, 1
end
context 'when pattern matches in the last node' do
let(:nodes) { [node1, node2, node] }
it_behaves_like 'replaces text', method_name, 2
end
end
end
describe '#replace_text_when_pattern_matches' do
include_context 'document nodes'
let(:node) { Nokogiri::HTML.fragment('text @reference') }
let(:ref_pattern) { %r{(?<!\w)@(?<user>[a-zA-Z0-9_\-\.]*)}x }
context 'when node has no reference pattern' do
let(:node) { Nokogiri::HTML.fragment('random text') }
let(:nodes) { [node] }
it 'skips node' do
expect { |b| filter.replace_text_when_pattern_matches(filter.nodes[0], 0, ref_pattern, &b) }.not_to yield_control
end
end
it_behaves_like 'replaces document node', :replace_text_when_pattern_matches do
let(:existing_content) { node.to_html }
end
end
describe '#replace_link_node_with_text' do
include_context 'document nodes'
let(:node) { Nokogiri::HTML.fragment('<a>end text</a>') }
it_behaves_like 'replaces document node', :replace_link_node_with_text do
let(:existing_content) { node.text }
end
end
describe '#replace_link_node_with_href' do
include_context 'document nodes'
let(:node) { Nokogiri::HTML.fragment('<a href="link">end text</a>') }
let(:href_link) { CGI.unescape(node.attr('href').to_s) }
it_behaves_like 'replaces document node', :replace_link_node_with_href do
let(:existing_content) { href_link }
end
end
describe "#call_and_update_nodes" do
context "with update_nodes_for_banzai_reference_filter feature flag enabled" do
include_context 'new nodes'
let(:document) { Nokogiri::HTML.fragment('<a href="foo">foo</a>') }
let(:filter) { described_class.new(document, project: project) }
before do
stub_feature_flags(update_nodes_for_banzai_reference_filter: true)
end
it "updates all new nodes", :aggregate_failures do
filter.instance_variable_set('@nodes', nodes)
expect(filter).to receive(:call) { filter.instance_variable_set('@new_nodes', new_nodes) }
expect(filter).to receive(:with_update_nodes).and_call_original
expect(filter).to receive(:update_nodes!).and_call_original
filter.call_and_update_nodes
expect(filter.result[:reference_filter_nodes]).to eq(expected_nodes)
end
end
context "with update_nodes_for_banzai_reference_filter feature flag disabled" do
include_context 'new nodes'
before do
stub_feature_flags(update_nodes_for_banzai_reference_filter: false)
end
it "does not change nodes", :aggregate_failures do
document = Nokogiri::HTML.fragment('<a href="foo">foo</a>')
filter = described_class.new(document, project: project)
filter.instance_variable_set('@nodes', nodes)
expect(filter).to receive(:call) { filter.instance_variable_set('@new_nodes', new_nodes) }
expect(filter).not_to receive(:with_update_nodes)
expect(filter).not_to receive(:update_nodes!)
filter.call_and_update_nodes
expect(filter.nodes).to eq(nodes)
expect(filter.result[:reference_filter_nodes]).to be nil
end
end
end
describe ".call" do
include_context 'new nodes'
let(:document) { Nokogiri::HTML.fragment('<a href="foo">foo</a>') }
let(:result) { { reference_filter_nodes: nodes } }
before do
stub_feature_flags(update_nodes_for_banzai_reference_filter: true)
end
it "updates all nodes", :aggregate_failures do
expect_next_instance_of(described_class) do |filter|
expect(filter).to receive(:call_and_update_nodes).and_call_original
expect(filter).to receive(:with_update_nodes).and_call_original
expect(filter).to receive(:call) { filter.instance_variable_set('@new_nodes', new_nodes) }
expect(filter).to receive(:update_nodes!).and_call_original
end
described_class.call(document, { project: project }, result)
expect(result[:reference_filter_nodes]).to eq(expected_nodes)
end
context "with update_nodes_for_banzai_reference_filter feature flag disabled" do
let(:result) { {} }
before do
stub_feature_flags(update_nodes_for_banzai_reference_filter: false)
end
it "updates all nodes", :aggregate_failures do
expect_next_instance_of(described_class) do |filter|
expect(filter).to receive(:call_and_update_nodes).and_call_original
expect(filter).not_to receive(:with_update_nodes)
expect(filter).to receive(:call) { filter.instance_variable_set('@new_nodes', new_nodes) }
expect(filter).not_to receive(:update_nodes!)
end
described_class.call(document, { project: project }, result)
expect(result[:reference_filter_nodes]).to be nil
end
end
end
end end
...@@ -7,6 +7,57 @@ RSpec.describe Banzai::Pipeline::GfmPipeline do ...@@ -7,6 +7,57 @@ RSpec.describe Banzai::Pipeline::GfmPipeline do
let(:project) { create(:redmine_project, :public) } let(:project) { create(:redmine_project, :public) }
context 'when internal issue tracker is enabled' do context 'when internal issue tracker is enabled' do
context 'when shorthand pattern #ISSUE_ID is used' do
it 'links an internal issues and keep updated nodes in result[:reference_filter_nodes]', :aggregate_failures do
issue = create(:issue, project: project)
markdown = "text #{issue.to_reference(project, full: true)}"
result = described_class.call(markdown, project: project)
link = result[:output].css('a').first
text = result[:output].children.first
expect(link['href']).to eq(Gitlab::Routing.url_helpers.project_issue_path(project, issue))
expect(result[:reference_filter_nodes]).to eq([text])
end
end
it 'executes :each_node only once for first reference filter', :aggregate_failures do
issue = create(:issue, project: project)
markdown = "text #{issue.to_reference(project, full: true)}"
expect_any_instance_of(Banzai::Filter::ReferenceFilter).to receive(:each_node).once
described_class.call(markdown, project: project)
end
context "with update_nodes_for_banzai_reference_filter feature flag disabled" do
before do
stub_feature_flags(update_nodes_for_banzai_reference_filter: false)
end
context 'when shorthand pattern #ISSUE_ID is used' do
it 'links an internal issues and doesnt store nodes in result[:reference_filter_nodes]', :aggregate_failures do
issue = create(:issue, project: project)
markdown = "text #{issue.to_reference(project, full: true)}"
result = described_class.call(markdown, project: project)
link = result[:output].css('a').first
expect(link['href']).to eq(Gitlab::Routing.url_helpers.project_issue_path(project, issue))
expect(result[:reference_filter_nodes]).to eq nil
end
end
it 'execute :each_node for each reference_filter', :aggregate_failures do
issue = create(:issue, project: project)
markdown = "text #{issue.to_reference(project, full: true)}"
described_class.reference_filters do |reference_filter|
expect_any_instance_of(reference_filter).to receive(:each_node).once
end
described_class.call(markdown, project: project)
end
end
context 'when shorthand pattern #ISSUE_ID is used' do context 'when shorthand pattern #ISSUE_ID is used' do
it 'links an internal issue if it exists' do it 'links an internal issue if it exists' do
issue = create(:issue, project: project) issue = create(:issue, project: project)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment