Commit c41c006b authored by Adam Hegyi's avatar Adam Hegyi

Merge branch 'ab/product-analytics' into 'master'

Product Analytics MVP: Basic database schema

See merge request gitlab-org/gitlab!35168
parents cdb86daf fc6c53e6
---
title: Add migration for experimental product analytics table
merge_request: 35168
author:
type: added
# frozen_string_literal: true
class AddProductAnalyticsTable < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
include Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
# Table is based on https://github.com/snowplow/snowplow/blob/master/4-storage/postgres-storage/sql/atomic-def.sql 6e07b1c, with the following differences:
# * app_id varchar -> project_id integer (+ FK)
# * Add `id bigserial`
# * Hash partitioning based on `project_id`
# * Timestamp columns: Change type to timestamp with time zone
#
# This table is part of the "product analytics experiment" and as such marked "experimental". The goal here is to
# explore the product analytics as a MVP feature more. We are explicitly not spending time on relational modeling
# here.
#
# We expect significant changes to the database part of this once the feature has been validated.
# Therefore, we expect to drop the table when feature validation is complete. All data will be lost.
def up
with_lock_retries do
execute <<~SQL
CREATE TABLE "product_analytics_events_experimental" (
id bigserial NOT NULL,
-- App
"project_id" integer NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
"platform" varchar(255),
-- Date/time
"etl_tstamp" timestamp with time zone,
"collector_tstamp" timestamp with time zone NOT NULL,
"dvce_created_tstamp" timestamp with time zone,
-- Date/time
"event" varchar(128),
"event_id" char(36) NOT NULL,
"txn_id" integer,
-- Versioning
"name_tracker" varchar(128),
"v_tracker" varchar(100),
"v_collector" varchar(100) NOT NULL,
"v_etl" varchar(100) NOT NULL,
-- User and visit
"user_id" varchar(255),
"user_ipaddress" varchar(45),
"user_fingerprint" varchar(50),
"domain_userid" varchar(36),
"domain_sessionidx" smallint,
"network_userid" varchar(38),
-- Location
"geo_country" char(2),
"geo_region" char(3),
"geo_city" varchar(75),
"geo_zipcode" varchar(15),
"geo_latitude" double precision,
"geo_longitude" double precision,
"geo_region_name" varchar(100),
-- IP lookups
"ip_isp" varchar(100),
"ip_organization" varchar(100),
"ip_domain" varchar(100),
"ip_netspeed" varchar(100),
-- Page
"page_url" text,
"page_title" varchar(2000),
"page_referrer" text,
-- Page URL components
"page_urlscheme" varchar(16),
"page_urlhost" varchar(255),
"page_urlport" integer,
"page_urlpath" varchar(3000),
"page_urlquery" varchar(6000),
"page_urlfragment" varchar(3000),
-- Referrer URL components
"refr_urlscheme" varchar(16),
"refr_urlhost" varchar(255),
"refr_urlport" integer,
"refr_urlpath" varchar(6000),
"refr_urlquery" varchar(6000),
"refr_urlfragment" varchar(3000),
-- Referrer details
"refr_medium" varchar(25),
"refr_source" varchar(50),
"refr_term" varchar(255),
-- Marketing
"mkt_medium" varchar(255),
"mkt_source" varchar(255),
"mkt_term" varchar(255),
"mkt_content" varchar(500),
"mkt_campaign" varchar(255),
-- Custom structured event
"se_category" varchar(1000),
"se_action" varchar(1000),
"se_label" varchar(1000),
"se_property" varchar(1000),
"se_value" double precision,
-- Ecommerce
"tr_orderid" varchar(255),
"tr_affiliation" varchar(255),
"tr_total" decimal(18,2),
"tr_tax" decimal(18,2),
"tr_shipping" decimal(18,2),
"tr_city" varchar(255),
"tr_state" varchar(255),
"tr_country" varchar(255),
"ti_orderid" varchar(255),
"ti_sku" varchar(255),
"ti_name" varchar(255),
"ti_category" varchar(255),
"ti_price" decimal(18,2),
"ti_quantity" integer,
-- Page ping
"pp_xoffset_min" integer,
"pp_xoffset_max" integer,
"pp_yoffset_min" integer,
"pp_yoffset_max" integer,
-- User Agent
"useragent" varchar(1000),
-- Browser
"br_name" varchar(50),
"br_family" varchar(50),
"br_version" varchar(50),
"br_type" varchar(50),
"br_renderengine" varchar(50),
"br_lang" varchar(255),
"br_features_pdf" boolean,
"br_features_flash" boolean,
"br_features_java" boolean,
"br_features_director" boolean,
"br_features_quicktime" boolean,
"br_features_realplayer" boolean,
"br_features_windowsmedia" boolean,
"br_features_gears" boolean,
"br_features_silverlight" boolean,
"br_cookies" boolean,
"br_colordepth" varchar(12),
"br_viewwidth" integer,
"br_viewheight" integer,
-- Operating System
"os_name" varchar(50),
"os_family" varchar(50),
"os_manufacturer" varchar(50),
"os_timezone" varchar(50),
-- Device/Hardware
"dvce_type" varchar(50),
"dvce_ismobile" boolean,
"dvce_screenwidth" integer,
"dvce_screenheight" integer,
-- Document
"doc_charset" varchar(128),
"doc_width" integer,
"doc_height" integer,
-- Currency
"tr_currency" char(3),
"tr_total_base" decimal(18, 2),
"tr_tax_base" decimal(18, 2),
"tr_shipping_base" decimal(18, 2),
"ti_currency" char(3),
"ti_price_base" decimal(18, 2),
"base_currency" char(3),
-- Geolocation
"geo_timezone" varchar(64),
-- Click ID
"mkt_clickid" varchar(128),
"mkt_network" varchar(64),
-- ETL tags
"etl_tags" varchar(500),
-- Time event was sent
"dvce_sent_tstamp" timestamp with time zone,
-- Referer
"refr_domain_userid" varchar(36),
"refr_dvce_tstamp" timestamp with time zone,
-- Session ID
"domain_sessionid" char(36),
-- Derived timestamp
"derived_tstamp" timestamp with time zone,
-- Event schema
"event_vendor" varchar(1000),
"event_name" varchar(1000),
"event_format" varchar(128),
"event_version" varchar(128),
-- Event fingerprint
"event_fingerprint" varchar(128),
-- True timestamp
"true_tstamp" timestamp with time zone,
PRIMARY KEY (id, project_id)
) PARTITION BY HASH (project_id)
WITHOUT OIDS;
CREATE INDEX index_product_analytics_events_experimental_project_and_time ON product_analytics_events_experimental (project_id, collector_tstamp);
SQL
create_hash_partitions :product_analytics_events_experimental, 64
end
end
def down
with_lock_retries do
execute 'DROP TABLE product_analytics_events_experimental'
end
end
end
This diff is collapsed.
......@@ -63,6 +63,7 @@ RSpec.describe 'Database schema' do
oauth_access_tokens: %w[resource_owner_id application_id],
oauth_applications: %w[owner_id],
open_project_tracker_data: %w[closed_status_id],
product_analytics_events_experimental: %w[event_id txn_id user_id],
project_group_links: %w[group_id],
project_statistics: %w[namespace_id],
projects: %w[creator_id ci_id mirror_user_id],
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment