Commit 98e5001a authored by Mark Chao's avatar Mark Chao

ES: index snippet content only up to 1 MB

To avoid indexing timeout.
1 MB is chosen because it's repo file index limit,
and snippet will be migrated from database to repo.
parent 6f070411
......@@ -333,6 +333,10 @@ curl --request PUT localhost:9200/gitlab-production/_settings --data '{
Enable Elasticsearch search in **Admin > Settings > Integrations**. That's it. Enjoy it!
### Index limit
Currently for repository and snippet files, GitLab would only index up to 1 MB of content, in order to avoid indexing timeout.
## GitLab Elasticsearch Rake Tasks
There are several rake tasks available to you via the command line:
......
---
title: "Elasticsearch: index snippet content only up to 1 MB"
merge_request: 15215
author:
type: changed
......@@ -3,6 +3,8 @@
module Elastic
module Latest
class SnippetInstanceProxy < ApplicationInstanceProxy
MAX_INDEX_SIZE = 1.megabyte
def as_indexed_json(options = {})
# We don't use as_json(only: ...) because it calls all virtual and serialized attributes
# https://gitlab.com/gitlab-org/gitlab-ee/issues/349
......@@ -22,6 +24,10 @@ module Elastic
data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr)
end
if data['content'].bytesize > MAX_INDEX_SIZE
data['content'] = data['content'].mb_chars.limit(MAX_INDEX_SIZE).to_s # rubocop: disable CodeReuse/ActiveRecord
end
# ES6 is now single-type per index, so we implement our own typing
data['type'] = es_type
......
# frozen_string_literal: true
require 'spec_helper'
describe Elastic::Latest::SnippetInstanceProxy do
let(:snippet) { create(:personal_snippet) }
subject { described_class.new(snippet) }
context '#as_indexed_json' do
it 'serializes snippet as hash' do
expect(subject.as_indexed_json.with_indifferent_access).to include(
id: snippet.id,
title: snippet.title,
file_name: snippet.file_name,
content: snippet.content,
created_at: snippet.created_at,
updated_at: snippet.updated_at,
project_id: snippet.project_id,
author_id: snippet.author_id,
visibility_level: snippet.visibility_level
)
end
end
end
......@@ -61,4 +61,14 @@ describe Gitlab::Elastic::SnippetSearchResults, :elastic do
expect(results.snippet_blobs_count).to eq(1)
end
end
context 'when content is too long' do
let(:content) { "abc" + (" " * Elastic::Latest::SnippetInstanceProxy::MAX_INDEX_SIZE) + "xyz" }
let(:snippet) { create(:personal_snippet, :public, content: content) }
it 'indexes up to a limit' do
expect(described_class.new(nil, 'abc').snippet_blobs_count).to eq(1)
expect(described_class.new(nil, 'xyz').snippet_blobs_count).to eq(0)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment