Commit 0fe0161c authored by Grzegorz Bizon's avatar Grzegorz Bizon

Merge branch '12111-snippet-index' into 'master'

Limit max index size for Snippet content

See merge request gitlab-org/gitlab-ee!15215
parents 456670ef 98e5001a
...@@ -333,6 +333,10 @@ curl --request PUT localhost:9200/gitlab-production/_settings --data '{ ...@@ -333,6 +333,10 @@ curl --request PUT localhost:9200/gitlab-production/_settings --data '{
Enable Elasticsearch search in **Admin > Settings > Integrations**. That's it. Enjoy it! Enable Elasticsearch search in **Admin > Settings > Integrations**. That's it. Enjoy it!
### Index limit
Currently for repository and snippet files, GitLab would only index up to 1 MB of content, in order to avoid indexing timeout.
## GitLab Elasticsearch Rake Tasks ## GitLab Elasticsearch Rake Tasks
There are several rake tasks available to you via the command line: There are several rake tasks available to you via the command line:
......
---
title: "Elasticsearch: index snippet content only up to 1 MB"
merge_request: 15215
author:
type: changed
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
module Elastic module Elastic
module Latest module Latest
class SnippetInstanceProxy < ApplicationInstanceProxy class SnippetInstanceProxy < ApplicationInstanceProxy
MAX_INDEX_SIZE = 1.megabyte
def as_indexed_json(options = {}) def as_indexed_json(options = {})
# We don't use as_json(only: ...) because it calls all virtual and serialized attributes # We don't use as_json(only: ...) because it calls all virtual and serialized attributes
# https://gitlab.com/gitlab-org/gitlab-ee/issues/349 # https://gitlab.com/gitlab-org/gitlab-ee/issues/349
...@@ -22,6 +24,10 @@ module Elastic ...@@ -22,6 +24,10 @@ module Elastic
data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr) data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
end end
if data['content'].bytesize > MAX_INDEX_SIZE
data['content'] = data['content'].mb_chars.limit(MAX_INDEX_SIZE).to_s # rubocop: disable CodeReuse/ActiveRecord
end
# ES6 is now single-type per index, so we implement our own typing # ES6 is now single-type per index, so we implement our own typing
data['type'] = es_type data['type'] = es_type
......
# frozen_string_literal: true
require 'spec_helper'
describe Elastic::Latest::SnippetInstanceProxy do
let(:snippet) { create(:personal_snippet) }
subject { described_class.new(snippet) }
context '#as_indexed_json' do
it 'serializes snippet as hash' do
expect(subject.as_indexed_json.with_indifferent_access).to include(
id: snippet.id,
title: snippet.title,
file_name: snippet.file_name,
content: snippet.content,
created_at: snippet.created_at,
updated_at: snippet.updated_at,
project_id: snippet.project_id,
author_id: snippet.author_id,
visibility_level: snippet.visibility_level
)
end
end
end
...@@ -61,4 +61,14 @@ describe Gitlab::Elastic::SnippetSearchResults, :elastic do ...@@ -61,4 +61,14 @@ describe Gitlab::Elastic::SnippetSearchResults, :elastic do
expect(results.snippet_blobs_count).to eq(1) expect(results.snippet_blobs_count).to eq(1)
end end
end end
context 'when content is too long' do
let(:content) { "abc" + (" " * Elastic::Latest::SnippetInstanceProxy::MAX_INDEX_SIZE) + "xyz" }
let(:snippet) { create(:personal_snippet, :public, content: content) }
it 'indexes up to a limit' do
expect(described_class.new(nil, 'abc').snippet_blobs_count).to eq(1)
expect(described_class.new(nil, 'xyz').snippet_blobs_count).to eq(0)
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment