Merge branch '12111-snippet-index' into 'master'

Limit max index size for Snippet content See merge request gitlab-org/gitlab-ee!15215

Merge branch '12111-snippet-index' into 'master'
Limit max index size for Snippet content See merge request gitlab-org/gitlab-ee!15215
0fe0161c · Grzegorz Bizon · 456670ef · 98e5001a · 0fe0161c · 0fe0161c
Commit 0fe0161c authored Aug 16, 2019 by Grzegorz Bizon
5 changed files
--- a/doc/integration/elasticsearch.md
+++ b/doc/integration/elasticsearch.md
@@ -333,6 +333,10 @@ curl --request PUT localhost:9200/gitlab-production/_settings --data '{
 Enable Elasticsearch search in **Admin > Settings > Integrations**. That's it. Enjoy it!
+### Index limit
+Currently for repository and snippet files, GitLab would only index up to 1 MB of content, in order to avoid indexing timeout.
 ## GitLab Elasticsearch Rake Tasks
 There are several rake tasks available to you via the command line:

--- a/ee/changelogs/unreleased/12111-snippet-index.yml
+++ b/ee/changelogs/unreleased/12111-snippet-index.yml
+---
+title: "Elasticsearch: index snippet content only up to 1 MB"
+merge_request: 15215
+author:
+type: changed
--- a/ee/lib/elastic/latest/snippet_instance_proxy.rb
+++ b/ee/lib/elastic/latest/snippet_instance_proxy.rb
@@ -3,6 +3,8 @@
 module Elastic
  module Latest
    class SnippetInstanceProxy < ApplicationInstanceProxy
+      MAX_INDEX_SIZE = 1.megabyte
      def as_indexed_json(options = {})
        # We don't use as_json(only: ...) because it calls all virtual and serialized attributes
        # https://gitlab.com/gitlab-org/gitlab-ee/issues/349
@@ -22,6 +24,10 @@ module Elastic
          data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
        end
+        if data['content'].bytesize > MAX_INDEX_SIZE
+          data['content'] = data['content'].mb_chars.limit(MAX_INDEX_SIZE).to_s # rubocop: disable CodeReuse/ActiveRecord
+        end
        # ES6 is now single-type per index, so we implement our own typing
        data['type'] = es_type

--- a/ee/spec/lib/elastic/latest/snippet_instance_proxy_spec.rb
+++ b/ee/spec/lib/elastic/latest/snippet_instance_proxy_spec.rb
+# frozen_string_literal: true
+require 'spec_helper'
+describe Elastic::Latest::SnippetInstanceProxy do
+  let(:snippet) { create(:personal_snippet) }
+  subject { described_class.new(snippet) }
+  context '#as_indexed_json' do
+    it 'serializes snippet as hash' do
+      expect(subject.as_indexed_json.with_indifferent_access).to include(
+        id: snippet.id,
+        title: snippet.title,
+        file_name: snippet.file_name,
+        content: snippet.content,
+        created_at: snippet.created_at,
+        updated_at: snippet.updated_at,
+        project_id: snippet.project_id,
+        author_id: snippet.author_id,
+        visibility_level: snippet.visibility_level
+      )
+    end
+  end
+end
--- a/ee/spec/lib/gitlab/elastic/snippet_search_results_spec.rb
+++ b/ee/spec/lib/gitlab/elastic/snippet_search_results_spec.rb
@@ -61,4 +61,14 @@ describe Gitlab::Elastic::SnippetSearchResults, :elastic do
      expect(results.snippet_blobs_count).to eq(1)
    end
  end
+  context 'when content is too long' do
+    let(:content) { "abc" + (" " * Elastic::Latest::SnippetInstanceProxy::MAX_INDEX_SIZE) + "xyz" }
+    let(:snippet) { create(:personal_snippet, :public, content: content) }
+    it 'indexes up to a limit' do
+      expect(described_class.new(nil, 'abc').snippet_blobs_count).to eq(1)
+      expect(described_class.new(nil, 'xyz').snippet_blobs_count).to eq(0)
+    end
+  end
 end