Commit 1d14176e authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-26519: Improved histograms: Make JSON parser efficient

Previous JSON parser was using an API which made the parsing
inefficient: the same JSON contents was parsed again and again.

Switch to using a lower-level parsing API which allows to do
parsing in an efficient way.
parent be55ad0d
...@@ -4263,54 +4263,79 @@ UPDATE mysql.column_stats ...@@ -4263,54 +4263,79 @@ UPDATE mysql.column_stats
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json'; SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: Root JSON element must be a JSON object at offset 0. id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: Root JSON element must be a JSON object at offset 1.
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":"not-histogram"}' WHERE table_name='t1_json'; SET histogram='{"histogram_hb_v2":"not-histogram"}' WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: A JSON array expected at offset 0. id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: histogram_hb_v2 must contain an array at offset 35.
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":["not-a-bucket"]}' SET histogram='{"histogram_hb_v2":["not-a-bucket"]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: Object expected at offset 19. id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: Expected an object in the buckets array at offset 35.
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"no-expected-members":1}]}' SET histogram='{"histogram_hb_v2":[{"no-expected-members":1}]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .start member must be present and be a scalar at offset 20. id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: "start" element not present at offset 45.
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":{}}]}' SET histogram='{"histogram_hb_v2":[{"start":{}}]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .start member must be present and be a scalar at offset 20. id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: "size" element not present at offset 31.
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":"not-an-integer"}]}' SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":"not-an-integer"}]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .size member must be present and be a scalar at offset 20. id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: "ndv" element not present at offset 60.
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25}]}' SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25}]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .ndv member must be present and be a scalar at offset 20. id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: "ndv" element not present at offset 48.
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25, "ndv":1}]}' SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25, "ndv":1}]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .end must be present in the last bucket and only there at offset 0. id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[]}' SET histogram='{"histogram_hb_v2":[]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .end must be present in the last bucket and only there at offset 0. id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: Histogram must have at least one bucket at offset 21.
create table t2 ( create table t2 (
city varchar(100) city varchar(100)
); );
......
...@@ -46,62 +46,53 @@ drop table ten; ...@@ -46,62 +46,53 @@ drop table ten;
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json'; SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":"not-histogram"}' WHERE table_name='t1_json'; SET histogram='{"histogram_hb_v2":"not-histogram"}' WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":["not-a-bucket"]}' SET histogram='{"histogram_hb_v2":["not-a-bucket"]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"no-expected-members":1}]}' SET histogram='{"histogram_hb_v2":[{"no-expected-members":1}]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":{}}]}' SET histogram='{"histogram_hb_v2":[{"start":{}}]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":"not-an-integer"}]}' SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":"not-an-integer"}]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25}]}' SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25}]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25, "ndv":1}]}' SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25, "ndv":1}]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
UPDATE mysql.column_stats UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[]}' SET histogram='{"histogram_hb_v2":[]}'
WHERE table_name='t1_json'; WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1; explain select * from t1_json limit 1;
--source include/have_sequence.inc --source include/have_sequence.inc
......
This diff is collapsed.
...@@ -76,7 +76,8 @@ class Histogram_json_hb : public Histogram_base ...@@ -76,7 +76,8 @@ class Histogram_json_hb : public Histogram_base
public: public:
static constexpr const char* JSON_NAME="histogram_hb_v2"; static constexpr const char* JSON_NAME="histogram_hb_v2";
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, bool parse(MEM_ROOT *mem_root, const char *db_name, const char *table_name,
Field *field, Histogram_type type_arg,
const char *hist_data, size_t hist_data_len) override; const char *hist_data, size_t hist_data_len) override;
void serialize(Field *field) override; void serialize(Field *field) override;
...@@ -122,6 +123,9 @@ class Histogram_json_hb : public Histogram_base ...@@ -122,6 +123,9 @@ class Histogram_json_hb : public Histogram_base
} }
private: private:
int parse_bucket(json_engine_t *je, Field *field, double *cumulative_size,
bool *assigned_last_end, const char **err);
double get_left_fract(int idx); double get_left_fract(int idx);
std::string& get_end_value(int idx); std::string& get_end_value(int idx);
int find_bucket(const Field *field, const uchar *lookup_val, bool *equal); int find_bucket(const Field *field, const uchar *lookup_val, bool *equal);
......
...@@ -8914,4 +8914,4 @@ ER_PARTITION_CONVERT_SUBPARTITIONED ...@@ -8914,4 +8914,4 @@ ER_PARTITION_CONVERT_SUBPARTITIONED
ER_PROVIDER_NOT_LOADED ER_PROVIDER_NOT_LOADED
eng "MariaDB tried to use the %s, but its provider plugin is not loaded" eng "MariaDB tried to use the %s, but its provider plugin is not loaded"
ER_JSON_HISTOGRAM_PARSE_FAILED ER_JSON_HISTOGRAM_PARSE_FAILED
eng "Failed to parse histogram: %s at offset %d." eng "Failed to parse histogram for table %s.%s: %s at offset %d."
...@@ -1233,7 +1233,8 @@ class Column_stat: public Stat_table ...@@ -1233,7 +1233,8 @@ class Column_stat: public Stat_table
if (!(hist= create_histogram(mem_root, hist_type, NULL))) if (!(hist= create_histogram(mem_root, hist_type, NULL)))
return NULL; return NULL;
Field *field= table->field[table_field->field_index]; Field *field= table->field[table_field->field_index];
if (!hist->parse(mem_root, field, hist_type, if (!hist->parse(mem_root, db_name->str, table_name->str,
field, hist_type,
val.ptr(), val.length())) val.ptr(), val.length()))
{ {
table_field->read_stats->histogram= hist; table_field->read_stats->histogram= hist;
...@@ -1247,9 +1248,9 @@ class Column_stat: public Stat_table ...@@ -1247,9 +1248,9 @@ class Column_stat: public Stat_table
}; };
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field*, bool Histogram_binary::parse(MEM_ROOT *mem_root, const char*, const char*,
Histogram_type type_arg, const char *hist_data, Field*, Histogram_type type_arg,
size_t hist_data_len) const char *hist_data, size_t hist_data_len)
{ {
/* On-disk an in-memory formats are the same. Just copy the data. */ /* On-disk an in-memory formats are the same. Just copy the data. */
type= type_arg; type= type_arg;
......
...@@ -154,7 +154,9 @@ class Histogram_builder; ...@@ -154,7 +154,9 @@ class Histogram_builder;
class Histogram_base class Histogram_base
{ {
public: public:
virtual bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, virtual bool parse(MEM_ROOT *mem_root,
const char *db_name, const char *table_name,
Field *field, Histogram_type type_arg,
const char *hist_data, size_t hist_data_len)= 0; const char *hist_data, size_t hist_data_len)= 0;
virtual void serialize(Field *to_field)= 0; virtual void serialize(Field *to_field)= 0;
...@@ -311,8 +313,9 @@ class Histogram_binary : public Histogram_base ...@@ -311,8 +313,9 @@ class Histogram_binary : public Histogram_base
Histogram_type get_type() override { return type; } Histogram_type get_type() override { return type; }
bool parse(MEM_ROOT *mem_root, Field *, Histogram_type type_arg, bool parse(MEM_ROOT *mem_root, const char*, const char*, Field*,
const char *hist_data, size_t hist_data_len) override; Histogram_type type_arg, const char *hist_data,
size_t hist_data_len) override;
void serialize(Field *to_field) override; void serialize(Field *to_field) override;
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
ulonglong size) override; ulonglong size) override;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment