Commit e778d12f authored by Michael Okoko's avatar Michael Okoko Committed by Sergei Petrunia

report parse error when parsing JSON histogram fails

Signed-off-by: default avatarMichael Okoko <okokomichaels@outlook.com>
parent fe2e516a
...@@ -67,6 +67,41 @@ test t1 d 1 25 0.0000 8.0000 0.0000 10 JSON [ ...@@ -67,6 +67,41 @@ test t1 d 1 25 0.0000 8.0000 0.0000 10 JSON [
"21", "21",
"23" "23"
] ]
SELECT * FROM t1;
a b c d
1 1 1 1
2 2 2 2
3 3 3 3
4 4 4 4
5 5 5 5
6 6 6 6
7 7 7 7
8 8 8 8
9 9 9 9
10 10 10 10
11 11 11 11
12 12 12 12
13 13 13 13
14 14 14 14
15 15 15 15
16 16 16 16
17 17 17 17
18 18 18 18
19 19 19 19
20 20 20 20
21 21 21 21
22 22 22 22
23 23 23 23
24 24 24 24
25 25 25 25
UPDATE mysql.column_stats SET histogram='["1", {"a": "b"}, "2"]' WHERE table_name='t1';
FLUSH TABLES;
SELECT * FROM t1;
ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '1'.
UPDATE mysql.column_stats SET histogram='{}' WHERE table_name='t1';
FLUSH TABLES;
SELECT * FROM t1;
ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '32608'.
DELETE FROM mysql.column_stats; DELETE FROM mysql.column_stats;
DROP TABLE t1; DROP TABLE t1;
create schema world; create schema world;
......
...@@ -28,6 +28,19 @@ set histogram_size=10; ...@@ -28,6 +28,19 @@ set histogram_size=10;
ANALYZE TABLE t1 PERSISTENT FOR ALL; ANALYZE TABLE t1 PERSISTENT FOR ALL;
SELECT * FROM mysql.column_stats WHERE table_name='t1'; SELECT * FROM mysql.column_stats WHERE table_name='t1';
SELECT * FROM t1;
# We then test different valid JSON strings that are invalid histograms.
UPDATE mysql.column_stats SET histogram='["1", {"a": "b"}, "2"]' WHERE table_name='t1';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
SELECT * FROM t1;
UPDATE mysql.column_stats SET histogram='{}' WHERE table_name='t1';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
SELECT * FROM t1;
DELETE FROM mysql.column_stats; DELETE FROM mysql.column_stats;
DROP TABLE t1; DROP TABLE t1;
......
...@@ -8913,3 +8913,5 @@ ER_PARTITION_CONVERT_SUBPARTITIONED ...@@ -8913,3 +8913,5 @@ ER_PARTITION_CONVERT_SUBPARTITIONED
eng "Convert partition is not supported for subpartitioned table." eng "Convert partition is not supported for subpartitioned table."
ER_PROVIDER_NOT_LOADED ER_PROVIDER_NOT_LOADED
eng "MariaDB tried to use the %s, but its provider plugin is not loaded" eng "MariaDB tried to use the %s, but its provider plugin is not loaded"
ER_JSON_HISTOGRAM_PARSE_FAILED
eng "Failed to parse histogram, encountered JSON_TYPE '%d'."
...@@ -67,15 +67,11 @@ ...@@ -67,15 +67,11 @@
* json_get_array_items expects a JSON array as argument, * json_get_array_items expects a JSON array as argument,
* and pushes the elements of the array into the `container` vector. * and pushes the elements of the array into the `container` vector.
* It only works if all the elements in the original JSON array * It only works if all the elements in the original JSON array
* are scalar values (i.e., strings, numbers, true or false), and returns JSV_BAD_JSON if: * are scalar values (i.e., strings, numbers, true or false),
* the original JSON is not an array OR the JSON array contains non-scalar elements. * else, the JSON type encountered is stored in value_type and the function returns false.
*/ */
bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container); bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container);
std::vector<std::string> parse_histogram_from_json(const char *json);
void test_parse_histogram_from_json();
Histogram_base *create_histogram(Histogram_type hist_type); Histogram_base *create_histogram(Histogram_type hist_type);
/* Currently there are only 3 persistent statistical tables */ /* Currently there are only 3 persistent statistical tables */
...@@ -1221,18 +1217,29 @@ class Column_stat: public Stat_table ...@@ -1221,18 +1217,29 @@ class Column_stat: public Stat_table
of read_stats->histogram. of read_stats->histogram.
*/ */
Histogram_binary * load_histogram(MEM_ROOT *mem_root) Histogram_base * load_histogram(MEM_ROOT *mem_root)
{ {
if (find_stat()) if (find_stat())
{ {
char buff[MAX_FIELD_WIDTH]; char buff[MAX_FIELD_WIDTH];
String val(buff, sizeof(buff), &my_charset_bin); String val(buff, sizeof(buff), &my_charset_bin);
uint fldno= COLUMN_STAT_HISTOGRAM; uint fldno= COLUMN_STAT_HISTOGRAM;
Histogram_base *hist;
Field *stat_field= stat_table->field[fldno]; Field *stat_field= stat_table->field[fldno];
table_field->read_stats->set_not_null(fldno); table_field->read_stats->set_not_null(fldno);
stat_field->val_str(&val); stat_field->val_str(&val);
// histogram-todo: here, create the histogram of appropriate type. switch (table_field->read_stats->histogram_type_on_disk)
Histogram_binary *hist= new (mem_root) Histogram_binary(); {
case SINGLE_PREC_HB:
case DOUBLE_PREC_HB:
hist = new (mem_root) Histogram_binary();
break;
case JSON:
hist = new (mem_root) Histogram_json();
break;
default:
return NULL;
}
if (!hist->parse(mem_root, table_field->read_stats->histogram_type_on_disk, if (!hist->parse(mem_root, table_field->read_stats->histogram_type_on_disk,
(const uchar*)val.ptr(), val.length())) (const uchar*)val.ptr(), val.length()))
{ {
...@@ -1283,21 +1290,17 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htyp ...@@ -1283,21 +1290,17 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htyp
bool Histogram_json::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *ptr, uint size_arg) bool Histogram_json::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *ptr, uint size_arg)
{ {
DBUG_ENTER("Histogram_json::parse");
type = type_arg; type = type_arg;
// I think we could use memcpy here, but not sure about how to get the right size
// since we can't depend on size_arg (it's zero for json histograms)
// also, does it make sense to cast here? or we can modify json_get_array_items
// to accept uchar*
const char *json = (char *)ptr; const char *json = (char *)ptr;
int vt; int vt;
bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets); bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets);
fprintf(stderr,"==============\n"); if (!result)
fprintf(stderr,"histogram: %s\n", json); {
fprintf(stderr, "json_get_array_items() returned %s\n", result ? "true" : "false"); my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), vt);
fprintf(stderr, "value type after json_get_array_items() is %d\n", vt); DBUG_RETURN(true);
fprintf(stderr, " JSV_BAD_JSON=%d, JSON_VALUE_ARRAY=%d\n", (int)JSV_BAD_JSON, (int)JSON_VALUE_ARRAY); }
fprintf(stderr, "hist_buckets.size()=%zu\n", hist_buckets.size()); DBUG_RETURN(false);
return false;
} }
void Histogram_json::serialize(Field *field) void Histogram_json::serialize(Field *field)
...@@ -1753,11 +1756,6 @@ class Histogram_builder_json : public Histogram_builder ...@@ -1753,11 +1756,6 @@ class Histogram_builder_json : public Histogram_builder
histogram->set_size(bucket_bounds.size()); histogram->set_size(bucket_bounds.size());
Binary_string *json_string = (Binary_string *) writer->output.get_string(); Binary_string *json_string = (Binary_string *) writer->output.get_string();
histogram->set_values((uchar *) json_string->c_ptr()); histogram->set_values((uchar *) json_string->c_ptr());
std::vector<std::string> buckets = parse_histogram_from_json(json_string->c_ptr());
printf("%zu", buckets.size());
test_parse_histogram_from_json();
} }
}; };
...@@ -1770,41 +1768,6 @@ Histogram_base *create_histogram(Histogram_type hist_type) ...@@ -1770,41 +1768,6 @@ Histogram_base *create_histogram(Histogram_type hist_type)
return new Histogram_binary; return new Histogram_binary;
} }
void test_parse_histogram_from_json()
{
std::vector<std::string> bucket = {};
std::string json;
std::string tests[7] = {
R"(["aabbb", "ccccdd", "eeefff"])",
R"(["aabbb", "ccc{}dd", "eeefff"])",
R"(["aabbb", {"a": "b"}, "eeefff"])",
R"({})",
R"([1,2,3, null])",
R"([null])",
R"([])"
};
for(const auto& test : tests) {
json = test;
bucket = parse_histogram_from_json(json.c_str());
}
}
std::vector<std::string> parse_histogram_from_json(const char *json)
{
std::vector<std::string> hist_buckets= {};
int vt;
bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets);
fprintf(stderr,"==============\n");
fprintf(stderr,"histogram: %s\n", json);
fprintf(stderr, "json_get_array_items() returned %s\n", result ? "true" : "false");
fprintf(stderr, "value type after json_get_array_items() is %d\n", vt);
fprintf(stderr, " JSV_BAD_JSON=%d, JSON_VALUE_ARRAY=%d\n", (int)JSV_BAD_JSON, (int)JSON_VALUE_ARRAY);
fprintf(stderr, "hist_buckets.size()=%zu\n", hist_buckets.size());
return hist_buckets;
}
bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container) { bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container) {
json_engine_t je; json_engine_t je;
int vl; int vl;
...@@ -1814,7 +1777,6 @@ bool json_get_array_items(const char *json, const char *json_end, int *value_typ ...@@ -1814,7 +1777,6 @@ bool json_get_array_items(const char *json, const char *json_end, int *value_typ
if (json_read_value(&je) || je.value_type != JSON_VALUE_ARRAY) if (json_read_value(&je) || je.value_type != JSON_VALUE_ARRAY)
{ {
*value_type = JSV_BAD_JSON;
return false; return false;
} }
*value_type = je.value_type; *value_type = je.value_type;
...@@ -1831,16 +1793,15 @@ bool json_get_array_items(const char *json, const char *json_end, int *value_typ ...@@ -1831,16 +1793,15 @@ bool json_get_array_items(const char *json, const char *json_end, int *value_typ
je.value_type != JSON_VALUE_TRUE && je.value_type != JSON_VALUE_TRUE &&
je.value_type != JSON_VALUE_FALSE) je.value_type != JSON_VALUE_FALSE)
{ {
*value_type = JSV_BAD_JSON;
return false; return false;
} }
val = std::string(v, vl); val = std::string(v, vl);
container.emplace_back(val); container.emplace_back(val);
break;
case JST_ARRAY_END: case JST_ARRAY_END:
break; break;
} }
} }
return true; return true;
} }
...@@ -4101,8 +4062,7 @@ double get_column_range_cardinality(Field *field, ...@@ -4101,8 +4062,7 @@ double get_column_range_cardinality(Field *field,
if (avg_frequency > 1.0 + 0.000001 && if (avg_frequency > 1.0 + 0.000001 &&
col_stats->min_max_values_are_provided()) col_stats->min_max_values_are_provided())
{ {
Histogram_binary *hist= Histogram_base *hist = col_stats->histogram_;
dynamic_cast<Histogram_binary *>(col_stats->histogram_);
if (hist && hist->is_usable(thd)) if (hist && hist->is_usable(thd))
{ {
store_key_image_to_rec(field, (uchar *) min_endp->key, store_key_image_to_rec(field, (uchar *) min_endp->key,
...@@ -4146,8 +4106,7 @@ double get_column_range_cardinality(Field *field, ...@@ -4146,8 +4106,7 @@ double get_column_range_cardinality(Field *field,
else else
max_mp_pos= 1.0; max_mp_pos= 1.0;
Histogram_binary *hist= Histogram_base *hist = col_stats->histogram_;
dynamic_cast<Histogram_binary *>(col_stats->histogram_);
if (hist && hist->is_usable(thd)) if (hist && hist->is_usable(thd))
sel= hist->range_selectivity(min_mp_pos, max_mp_pos); sel= hist->range_selectivity(min_mp_pos, max_mp_pos);
else else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment