Commit 01c6323d authored by Toon Claes's avatar Toon Claes

UNION of SELECT/WHERE is faster than WHERE on UNION

Instead of applying WHERE on a UNION, apply the WHERE on each of the seperate
SELECT statements, and do UNION on that.

Local tests with about 2_000_000 projects:
 - 1_500_000 private projects
 -    40_000 internal projects
 -   400_000 public projects

For the API endpoint `/api/v4/projects?visibility=private` the slowest query was:

```sql
SELECT "projects".*
FROM "projects"
WHERE ...
```

The original query took 1073.8ms.
The query refactored to UNION of SELECT/WHERE took 2.3ms.

The original query was:

```sql
SELECT "projects".*
FROM "projects"
WHERE "projects"."pending_delete" = $1
  AND (projects.id IN
         (SELECT "projects"."id"
          FROM "projects"
          INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id"
          WHERE "projects"."pending_delete" = 'f'
            AND "project_authorizations"."user_id" = 23
          UNION SELECT "projects"."id"
          FROM "projects"
          WHERE "projects"."visibility_level" IN (20,
                                                  10)))
  AND "projects"."visibility_level" = $2
  AND "projects"."archived" = $3
ORDER BY "projects"."created_at" DESC
LIMIT 20
OFFSET 0 [["pending_delete", "f"],
       ["visibility_level", 0],
       ["archived", "f"]]
```

The refactored query:
```sql
SELECT "projects".*
FROM "projects"
WHERE "projects"."pending_delete" = $1
  AND (projects.id IN
         (SELECT "projects"."id"
          FROM "projects"
          INNER JOIN "project_authorizations" ON "projects"."id" = "project_authorizations"."project_id"
          WHERE "projects"."pending_delete" = 'f'
            AND "project_authorizations"."user_id" = 23
            AND "projects"."visibility_level" = 0
            AND "projects"."archived" = 'f'
          UNION SELECT "projects"."id"
          FROM "projects"
          WHERE "projects"."visibility_level" IN (20,
                                                  10)
            AND "projects"."visibility_level" = 0
            AND "projects"."archived" = 'f'))
ORDER BY "projects"."created_at" DESC
LIMIT 20
OFFSET 0 [["pending_delete", "f"]]
```
parent a1deed62
......@@ -28,14 +28,16 @@ class ProjectsFinder < UnionFinder
def execute
items = init_collection
items = by_ids(items)
items = items.map do |item|
item = by_ids(item)
item = by_personal(item)
item = by_starred(item)
item = by_visibilty_level(item)
item = by_tags(item)
item = by_search(item)
by_archived(item)
end
items = union(items)
items = by_personal(items)
items = by_starred(items)
items = by_visibilty_level(items)
items = by_tags(items)
items = by_search(items)
items = by_archived(items)
sort(items)
end
......@@ -55,7 +57,7 @@ class ProjectsFinder < UnionFinder
end
def by_ids(items)
project_ids_relation ? items.map { |item| item.where(id: project_ids_relation) } : items
project_ids_relation ? items.where(id: project_ids_relation) : items
end
def union(items)
......
---
title: Improve performance of ProjectFinder used in /projects API endpoint
merge_request: 11666
author:
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment