Commit 1d14176e authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-26519: Improved histograms: Make JSON parser efficient

Previous JSON parser was using an API which made the parsing
inefficient: the same JSON contents was parsed again and again.

Switch to using a lower-level parsing API which allows to do
parsing in an efficient way.
parent be55ad0d
......@@ -4263,54 +4263,79 @@ UPDATE mysql.column_stats
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: Root JSON element must be a JSON object at offset 0.
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: Root JSON element must be a JSON object at offset 1.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":"not-histogram"}' WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: A JSON array expected at offset 0.
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: histogram_hb_v2 must contain an array at offset 35.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":["not-a-bucket"]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: Object expected at offset 19.
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: Expected an object in the buckets array at offset 35.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"no-expected-members":1}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .start member must be present and be a scalar at offset 20.
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: "start" element not present at offset 45.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":{}}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .start member must be present and be a scalar at offset 20.
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: "size" element not present at offset 31.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":"not-an-integer"}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .size member must be present and be a scalar at offset 20.
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: "ndv" element not present at offset 60.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .ndv member must be present and be a scalar at offset 20.
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: "ndv" element not present at offset 48.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25, "ndv":1}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .end must be present in the last bucket and only there at offset 0.
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .end must be present in the last bucket and only there at offset 0.
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10
Warnings:
Warning 4186 Failed to parse histogram for table test.t1_json: Histogram must have at least one bucket at offset 21.
create table t2 (
city varchar(100)
);
......
......@@ -46,62 +46,53 @@ drop table ten;
UPDATE mysql.column_stats
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":"not-histogram"}' WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":["not-a-bucket"]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"no-expected-members":1}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":{}}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":"not-an-integer"}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25, "ndv":1}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
--source include/have_sequence.inc
......
This diff is collapsed.
......@@ -76,7 +76,8 @@ class Histogram_json_hb : public Histogram_base
public:
static constexpr const char* JSON_NAME="histogram_hb_v2";
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
bool parse(MEM_ROOT *mem_root, const char *db_name, const char *table_name,
Field *field, Histogram_type type_arg,
const char *hist_data, size_t hist_data_len) override;
void serialize(Field *field) override;
......@@ -122,6 +123,9 @@ class Histogram_json_hb : public Histogram_base
}
private:
int parse_bucket(json_engine_t *je, Field *field, double *cumulative_size,
bool *assigned_last_end, const char **err);
double get_left_fract(int idx);
std::string& get_end_value(int idx);
int find_bucket(const Field *field, const uchar *lookup_val, bool *equal);
......
......@@ -8914,4 +8914,4 @@ ER_PARTITION_CONVERT_SUBPARTITIONED
ER_PROVIDER_NOT_LOADED
eng "MariaDB tried to use the %s, but its provider plugin is not loaded"
ER_JSON_HISTOGRAM_PARSE_FAILED
eng "Failed to parse histogram: %s at offset %d."
eng "Failed to parse histogram for table %s.%s: %s at offset %d."
......@@ -1233,7 +1233,8 @@ class Column_stat: public Stat_table
if (!(hist= create_histogram(mem_root, hist_type, NULL)))
return NULL;
Field *field= table->field[table_field->field_index];
if (!hist->parse(mem_root, field, hist_type,
if (!hist->parse(mem_root, db_name->str, table_name->str,
field, hist_type,
val.ptr(), val.length()))
{
table_field->read_stats->histogram= hist;
......@@ -1247,9 +1248,9 @@ class Column_stat: public Stat_table
};
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field*,
Histogram_type type_arg, const char *hist_data,
size_t hist_data_len)
bool Histogram_binary::parse(MEM_ROOT *mem_root, const char*, const char*,
Field*, Histogram_type type_arg,
const char *hist_data, size_t hist_data_len)
{
/* On-disk an in-memory formats are the same. Just copy the data. */
type= type_arg;
......
......@@ -154,7 +154,9 @@ class Histogram_builder;
class Histogram_base
{
public:
virtual bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
virtual bool parse(MEM_ROOT *mem_root,
const char *db_name, const char *table_name,
Field *field, Histogram_type type_arg,
const char *hist_data, size_t hist_data_len)= 0;
virtual void serialize(Field *to_field)= 0;
......@@ -311,8 +313,9 @@ class Histogram_binary : public Histogram_base
Histogram_type get_type() override { return type; }
bool parse(MEM_ROOT *mem_root, Field *, Histogram_type type_arg,
const char *hist_data, size_t hist_data_len) override;
bool parse(MEM_ROOT *mem_root, const char*, const char*, Field*,
Histogram_type type_arg, const char *hist_data,
size_t hist_data_len) override;
void serialize(Field *to_field) override;
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
ulonglong size) override;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment