Commit 6375873c authored by Sergei Petrunia's avatar Sergei Petrunia

Fixes in opt_histogram_json.cc in the last commits

Aslo add more test coverage
parent 49a7bbb1
...@@ -4028,10 +4028,58 @@ analyze select * from t1_json where a > 'zzzzzzzzz'; ...@@ -4028,10 +4028,58 @@ analyze select * from t1_json where a > 'zzzzzzzzz';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 10.00 0.00 Using where 1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 10.00 0.00 Using where
drop table ten; drop table ten;
UPDATE mysql.column_stats SET histogram='["a-1", "a-2", {"a": "b"}, "a-3"]' WHERE table_name='t1_json'; UPDATE mysql.column_stats
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: Root JSON element must be a JSON object at offset 12345. ERROR HY000: Failed to parse histogram: Root JSON element must be a JSON object at offset 0.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":"not-histogram"}' WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: A JSON array expected at offset 0.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":["not-a-bucket"]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: Object expected at offset 19.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"no-expected-members":1}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .start member must be present and be a scalar at offset 20.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":{}}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .start member must be present and be a scalar at offset 20.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":"not-an-integer"}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .size member must be present and be a scalar at offset 20.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .ndv member must be present and be a scalar at offset 20.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25, "ndv":1}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .end must be present in the last bucket and only there at offset 0.
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[]}'
WHERE table_name='t1_json';
FLUSH TABLES;
explain select * from t1_json limit 1;
ERROR HY000: Failed to parse histogram: .end must be present in the last bucket and only there at offset 0.
create table t2 ( create table t2 (
city varchar(100) city varchar(100)
); );
......
...@@ -39,12 +39,69 @@ analyze select * from t1_json where a > 'zzzzzzzzz'; ...@@ -39,12 +39,69 @@ analyze select * from t1_json where a > 'zzzzzzzzz';
drop table ten; drop table ten;
# test different valid JSON strings that are invalid histograms. #
UPDATE mysql.column_stats SET histogram='["a-1", "a-2", {"a": "b"}, "a-3"]' WHERE table_name='t1_json'; # Test different valid JSON strings that are invalid histograms.
#
UPDATE mysql.column_stats
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
FLUSH TABLES; FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED --error ER_JSON_HISTOGRAM_PARSE_FAILED
explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz'; explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":"not-histogram"}' WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":["not-a-bucket"]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"no-expected-members":1}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":{}}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":"not-an-integer"}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[{"start":"aaa", "size":0.25, "ndv":1}]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
UPDATE mysql.column_stats
SET histogram='{"histogram_hb_v2":[]}'
WHERE table_name='t1_json';
FLUSH TABLES;
--error ER_JSON_HISTOGRAM_PARSE_FAILED
explain select * from t1_json limit 1;
--source include/have_sequence.inc --source include/have_sequence.inc
create table t2 ( create table t2 (
......
...@@ -94,8 +94,8 @@ class Histogram_json_builder : public Histogram_builder ...@@ -94,8 +94,8 @@ class Histogram_json_builder : public Histogram_builder
{ {
column->store_field_value((uchar*) elem, col_length); column->store_field_value((uchar*) elem, col_length);
StringBuffer<MAX_FIELD_WIDTH> val; StringBuffer<MAX_FIELD_WIDTH> val;
column->val_str(&val); String *str= column->val_str(&val);
writer.add_member("end").add_str(val.c_ptr()); writer.add_member("end").add_str(str->c_ptr_safe());
finalize_bucket(); finalize_bucket();
} }
...@@ -109,10 +109,10 @@ class Histogram_json_builder : public Histogram_builder ...@@ -109,10 +109,10 @@ class Histogram_json_builder : public Histogram_builder
DBUG_ASSERT(bucket.size == 0); DBUG_ASSERT(bucket.size == 0);
column->store_field_value((uchar*) elem, col_length); column->store_field_value((uchar*) elem, col_length);
StringBuffer<MAX_FIELD_WIDTH> val; StringBuffer<MAX_FIELD_WIDTH> val;
column->val_str(&val); String *str= column->val_str(&val);
writer.start_object(); writer.start_object();
writer.add_member("start").add_str(val.c_ptr()); writer.add_member("start").add_str(str->c_ptr_safe());
bucket.ndv= 1; bucket.ndv= 1;
bucket.size= cnt; bucket.size= cnt;
...@@ -264,14 +264,17 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, ...@@ -264,14 +264,17 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
const char *err; const char *err;
DBUG_ENTER("Histogram_json_hb::parse"); DBUG_ENTER("Histogram_json_hb::parse");
DBUG_ASSERT(type_arg == JSON_HB); DBUG_ASSERT(type_arg == JSON_HB);
const char *err_pos= hist_data;
const char *obj1; const char *obj1;
int obj1_len; int obj1_len;
double cumulative_size= 0.0; double cumulative_size= 0.0;
size_t end_member_index= (size_t)-1;
if (JSV_OBJECT != json_type(hist_data, hist_data + hist_data_len, if (JSV_OBJECT != json_type(hist_data, hist_data + hist_data_len,
&obj1, &obj1_len)) &obj1, &obj1_len))
{ {
err= "Root JSON element must be a JSON object"; err= "Root JSON element must be a JSON object";
err_pos= hist_data;
goto error; goto error;
} }
...@@ -281,6 +284,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, ...@@ -281,6 +284,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
"histogram_hb_v2", &hist_array, "histogram_hb_v2", &hist_array,
&hist_array_len)) &hist_array_len))
{ {
err_pos= obj1;
err= "A JSON array expected"; err= "A JSON array expected";
goto error; goto error;
} }
...@@ -296,11 +300,13 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, ...@@ -296,11 +300,13 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
break; break;
if (ret == JSV_BAD_JSON) if (ret == JSV_BAD_JSON)
{ {
err_pos= hist_array;
err= "JSON parse error"; err= "JSON parse error";
goto error; goto error;
} }
if (ret != JSV_OBJECT) if (ret != JSV_OBJECT)
{ {
err_pos= hist_array;
err= "Object expected"; err= "Object expected";
goto error; goto error;
} }
...@@ -313,6 +319,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, ...@@ -313,6 +319,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
"start", &val, &val_len); "start", &val, &val_len);
if (ret != JSV_STRING && ret != JSV_NUMBER) if (ret != JSV_STRING && ret != JSV_NUMBER)
{ {
err_pos= bucket_info;
err= ".start member must be present and be a scalar"; err= ".start member must be present and be a scalar";
goto error; goto error;
} }
...@@ -324,6 +331,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, ...@@ -324,6 +331,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
"size", &size, &size_len); "size", &size, &size_len);
if (ret != JSV_NUMBER) if (ret != JSV_NUMBER)
{ {
err_pos= bucket_info;
err= ".size member must be present and be a scalar"; err= ".size member must be present and be a scalar";
goto error; goto error;
} }
...@@ -333,6 +341,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, ...@@ -333,6 +341,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
double size_d= my_strtod(size, &size_end, &conv_err); double size_d= my_strtod(size, &size_end, &conv_err);
if (conv_err) if (conv_err)
{ {
err_pos= size;
err= ".size member must be a floating-point value"; err= ".size member must be a floating-point value";
goto error; goto error;
} }
...@@ -345,6 +354,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, ...@@ -345,6 +354,7 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
"ndv", &ndv, &ndv_len); "ndv", &ndv, &ndv_len);
if (ret != JSV_NUMBER) if (ret != JSV_NUMBER)
{ {
err_pos= bucket_info;
err= ".ndv member must be present and be a scalar"; err= ".ndv member must be present and be a scalar";
goto error; goto error;
} }
...@@ -352,41 +362,58 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field, ...@@ -352,41 +362,58 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
longlong ndv_ll= my_strtoll10(ndv, &ndv_end, &conv_err); longlong ndv_ll= my_strtoll10(ndv, &ndv_end, &conv_err);
if (conv_err) if (conv_err)
{ {
err_pos= ndv;
err= ".ndv member must be an integer value"; err= ".ndv member must be an integer value";
goto error; goto error;
} }
uchar buf[MAX_KEY_LENGTH];
uint len_to_copy= field->key_length();
field->store_text(val, val_len, &my_charset_bin);
uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
buckets.push_back({std::string((char*)buf, bytes), cumulative_size,
ndv_ll});
// Read the "end" field
const char *end_val; const char *end_val;
int end_val_len; int end_val_len;
ret= json_get_object_key(bucket_info, bucket_info+bucket_info_len, ret= json_get_object_key(bucket_info, bucket_info+bucket_info_len,
"end", &end_val, &end_val_len); "end", &end_val, &end_val_len);
if (ret != JSV_NOTHING && ret != JSV_STRING && ret !=JSV_NUMBER) if (ret != JSV_NOTHING && ret != JSV_STRING && ret !=JSV_NUMBER)
{ {
err_pos= bucket_info;
err= ".end member must be a scalar"; err= ".end member must be a scalar";
goto error; goto error;
} }
if (ret != JSV_NOTHING) if (ret != JSV_NOTHING)
last_bucket_end_endp.assign(end_val, end_val_len);
buckets.push_back({std::string(val, val_len), NULL, cumulative_size,
ndv_ll});
if (buckets.size())
{ {
auto& prev_bucket= buckets[buckets.size()-1]; field->store_text(end_val, end_val_len, &my_charset_bin);
if (prev_bucket.ndv == 1) uint bytes= field->get_key_image(buf, len_to_copy, Field::itRAW);
prev_bucket.end_value= &prev_bucket.start_value; last_bucket_end_endp.assign((char*)buf, bytes);
else if (end_member_index == (size_t)-1)
prev_bucket.end_value= &buckets.back().start_value; end_member_index= buckets.size();
} }
} }
buckets.back().end_value= &last_bucket_end_endp;
size= buckets.size(); size= buckets.size();
if (end_member_index != buckets.size())
{
err= ".end must be present in the last bucket and only there";
err_pos= hist_data;
goto error;
}
if (!buckets.size())
{
err= ".end member is allowed only in last bucket";
err_pos= hist_data;
goto error;
}
DBUG_RETURN(false); DBUG_RETURN(false);
error: error:
my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), err, my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), err, err_pos - hist_data);
12345);
DBUG_RETURN(true); DBUG_RETURN(true);
} }
...@@ -469,7 +496,7 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint, ...@@ -469,7 +496,7 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
* The bucket has one value and this is the value we are looking for. * The bucket has one value and this is the value we are looking for.
* The bucket has multiple values. Then, assume * The bucket has multiple values. Then, assume
*/ */
sel= (get_left_fract(idx) - buckets[idx].cum_fract) / buckets[idx].ndv; sel= (buckets[idx].cum_fract - get_left_fract(idx)) / buckets[idx].ndv;
} }
return sel; return sel;
} }
...@@ -483,6 +510,14 @@ double Histogram_json_hb::get_left_fract(int idx) ...@@ -483,6 +510,14 @@ double Histogram_json_hb::get_left_fract(int idx)
return buckets[idx-1].cum_fract; return buckets[idx-1].cum_fract;
} }
std::string& Histogram_json_hb::get_end_value(int idx)
{
if (idx == (int)buckets.size()-1)
return last_bucket_end_endp;
else
return buckets[idx+1].start_value;
}
/* /*
@param field The table field histogram is for. We don't care about the @param field The table field histogram is for. We don't care about the
field's current value, we only need its virtual functions to field's current value, we only need its virtual functions to
...@@ -514,7 +549,7 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp, ...@@ -514,7 +549,7 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
double left_fract= get_left_fract(idx); double left_fract= get_left_fract(idx);
double sel= position_in_interval(field, min_key, min_key_len, double sel= position_in_interval(field, min_key, min_key_len,
buckets[idx].start_value, buckets[idx].start_value,
*buckets[idx].end_value); get_end_value(idx));
min= left_fract + sel * (buckets[idx].cum_fract - left_fract); min= left_fract + sel * (buckets[idx].cum_fract - left_fract);
} }
...@@ -538,7 +573,7 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp, ...@@ -538,7 +573,7 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
double left_fract= get_left_fract(idx); double left_fract= get_left_fract(idx);
double sel= position_in_interval(field, max_key, max_key_len, double sel= position_in_interval(field, max_key, max_key_len,
buckets[idx].start_value, buckets[idx].start_value,
*buckets[idx].end_value); get_end_value(idx));
max= left_fract + sel * (buckets[idx].cum_fract - left_fract); max= left_fract + sel * (buckets[idx].cum_fract - left_fract);
} }
else else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment