Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
2a1cdbab
Commit
2a1cdbab
authored
Aug 29, 2021
by
Sergei Petrunia
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix JSON parsing: future-proof data representation in JSON, code cleanup
parent
a0b4a868
Changes
6
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1349 additions
and
1288 deletions
+1349
-1288
include/json_lib.h
include/json_lib.h
+7
-6
mysql-test/main/statistics_json.result
mysql-test/main/statistics_json.result
+1224
-1183
mysql-test/main/statistics_json.test
mysql-test/main/statistics_json.test
+12
-10
sql/share/errmsg-utf8.txt
sql/share/errmsg-utf8.txt
+1
-1
sql/sql_statistics.cc
sql/sql_statistics.cc
+95
-82
sql/sql_statistics.h
sql/sql_statistics.h
+10
-6
No files found.
include/json_lib.h
View file @
2a1cdbab
...
...
@@ -283,12 +283,13 @@ int json_key_matches(json_engine_t *je, json_string_t *k);
int
json_read_value
(
json_engine_t
*
j
);
/*
* json_smart_read_value() reads parses a scalar value and value length from the json engine,
* and copies them into `value` and `value_length` respectively.
* It should only be called when the json_engine state is JST_VALUE.
* If it encounters a non-scalar value (say object or array) before getting to value_len,
* such value is also read and copied into value.
*/
json_smart_read_value() reads a JSON value. Pointer to value is stored in
*value and its length in *value_len.
if the value is non a scalar, it returns pointers to its JSON
representation.
The function should only be called when je->state==JST_VALUE.
*/
enum
json_types
json_smart_read_value
(
json_engine_t
*
je
,
const
char
**
value
,
int
*
value_len
);
/*
...
...
mysql-test/main/statistics_json.result
View file @
2a1cdbab
This diff is collapsed.
Click to expand it.
mysql-test/main/statistics_json.test
View file @
2a1cdbab
...
...
@@ -37,6 +37,8 @@ analyze select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
explain
extended
select
*
from
t1_json
where
a
<
'b-1a'
;
analyze
select
*
from
t1_json
where
a
>
'zzzzzzzzz'
;
drop
table
ten
;
# test different valid JSON strings that are invalid histograms.
UPDATE
mysql
.
column_stats
SET
histogram
=
'["a-1", "a-2", {"a": "b"}, "a-3"]'
WHERE
table_name
=
't1_json'
;
FLUSH
TABLES
;
...
...
@@ -45,23 +47,23 @@ explain extended select * from t1_json where a between 'a-3a' and 'zzzzzzzzz';
--
source
include
/
have_sequence
.
inc
create
table
users
(
create
table
t2
(
city
varchar
(
100
)
);
set
histogram_size
=
50
;
insert
into
users
select
'Moscow'
from
seq_1_to_99
;
insert
into
users
select
'Helsinki'
from
seq_1_to_2
;
insert
into
t2
select
'Moscow'
from
seq_1_to_99
;
insert
into
t2
select
'Helsinki'
from
seq_1_to_2
;
set
histogram_type
=
json_hb
;
analyze
table
users
persistent
for
all
;
explain
extended
select
*
from
users
where
city
=
'Moscow'
;
analyze
select
*
from
users
where
city
=
'Moscow'
;
explain
extended
select
*
from
users
where
city
=
'Helsinki'
;
analyze
select
*
from
users
where
city
=
'helsinki'
;
explain
extended
select
*
from
users
where
city
<
'Lagos'
;
analyze
table
t2
persistent
for
all
;
explain
extended
select
*
from
t2
where
city
=
'Moscow'
;
analyze
select
*
from
t2
where
city
=
'Moscow'
;
explain
extended
select
*
from
t2
where
city
=
'Helsinki'
;
analyze
select
*
from
t2
where
city
=
'helsinki'
;
explain
extended
select
*
from
t2
where
city
<
'Lagos'
;
drop
table
t1_bin
;
drop
table
t1_json
;
drop
table
users
;
drop
table
t2
;
DELETE
FROM
mysql
.
column_stats
;
...
...
sql/share/errmsg-utf8.txt
View file @
2a1cdbab
...
...
@@ -8914,4 +8914,4 @@ ER_PARTITION_CONVERT_SUBPARTITIONED
ER_PROVIDER_NOT_LOADED
eng "MariaDB tried to use the %s, but its provider plugin is not loaded"
ER_JSON_HISTOGRAM_PARSE_FAILED
eng "Failed to parse histogram
, encountered JSON_TYPE '%d'
."
eng "Failed to parse histogram
: %s at offset %d
."
sql/sql_statistics.cc
View file @
2a1cdbab
...
...
@@ -1123,6 +1123,7 @@ class Column_stat: public Stat_table
void
get_stat_values
()
{
table_field
->
read_stats
->
set_all_nulls
();
// default: hist_type=NULL means there's no histogram
table_field
->
read_stats
->
histogram_type_on_disk
=
INVALID_HISTOGRAM
;
if
(
table_field
->
read_stats
->
min_value
)
...
...
@@ -1196,7 +1197,10 @@ class Column_stat: public Stat_table
break
;
}
case
COLUMN_STAT_HISTOGRAM
:
//TODO: if stat_field->length() == 0 then histogram_type_on_disk is set to INVALID_HISTOGRAM
/*
Do nothing here: we take the histogram length from the 'histogram'
column itself
*/
break
;
}
}
...
...
@@ -1245,7 +1249,7 @@ class Column_stat: public Stat_table
}
if
(
!
hist
->
parse
(
mem_root
,
table_field
,
table_field
->
read_stats
->
histogram_type_on_disk
,
(
const
uchar
*
)
val
.
ptr
(),
val
.
length
()))
val
.
ptr
(),
val
.
length
()))
{
table_field
->
read_stats
->
histogram_
=
hist
;
return
hist
;
...
...
@@ -1255,19 +1259,19 @@ class Column_stat: public Stat_table
}
};
bool
Histogram_binary
::
parse
(
MEM_ROOT
*
mem_root
,
Field
*
,
Histogram_type
type_arg
,
const
uchar
*
ptr_arg
,
uint
size_arg
)
bool
Histogram_binary
::
parse
(
MEM_ROOT
*
mem_root
,
Field
*
,
Histogram_type
type_arg
,
const
char
*
hist_data
,
size_t
hist_data_len
)
{
// Just copy the data
size
=
(
uint8
)
size_arg
;
type
=
type_arg
;
if
((
values
=
(
uchar
*
)
alloc_root
(
mem_root
,
size_arg
)))
{
memcpy
(
values
,
ptr_arg
,
size_arg
);
return
false
;
}
/* On-disk an in-memory formats are the same. Just copy the data. */
type
=
type_arg
;
size
=
(
uint8
)
hist_data_len
;
// 'size' holds the size of histogram in bytes
if
(
!
(
values
=
(
uchar
*
)
alloc_root
(
mem_root
,
hist_data_len
)))
return
true
;
memcpy
(
values
,
hist_data
,
hist_data_len
);
return
false
;
}
/*
...
...
@@ -1307,39 +1311,81 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root,
*/
bool
Histogram_json
::
parse
(
MEM_ROOT
*
mem_root
,
Field
*
field
,
Histogram_type
type_arg
,
const
uchar
*
ptr
,
uint
size_arg
)
Histogram_type
type_arg
,
const
char
*
hist_data
,
size_t
hist_data_len
)
{
DBUG_ENTER
(
"Histogram_json::parse"
);
DBUG_ASSERT
(
type_arg
==
JSON_HB
);
size
=
(
uint8
)
size_arg
;
const
char
*
json
=
(
char
*
)
ptr
;
int
vt
;
std
::
vector
<
std
::
string
>
hist_buckets_text
;
bool
result
=
json_get_array_items
(
json
,
json
+
strlen
(
json
),
&
vt
,
hist_buckets_text
);
if
(
!
result
)
{
my_error
(
ER_JSON_HISTOGRAM_PARSE_FAILED
,
MYF
(
0
),
vt
);
DBUG_RETURN
(
true
);
const
char
*
err
;
json_engine_t
je
;
json_string_t
key_name
;
json_scan_start
(
&
je
,
&
my_charset_utf8mb4_bin
,
(
const
uchar
*
)
hist_data
,
(
const
uchar
*
)
hist_data
+
hist_data_len
);
if
(
json_read_value
(
&
je
)
||
je
.
value_type
!=
JSON_VALUE_OBJECT
)
{
err
=
"Root JSON element must be a JSON object"
;
goto
error
;
}
size
=
hist_buckets_text
.
size
();
/*
Convert the text based array into a data structure that allows lookups and
estimates
*/
for
(
auto
&
s
:
hist_buckets_text
)
json_string_set_str
(
&
key_name
,
(
const
uchar
*
)
JSON_NAME
,
(
const
uchar
*
)
JSON_NAME
+
strlen
(
JSON_NAME
));
json_string_set_cs
(
&
key_name
,
system_charset_info
);
if
(
json_scan_next
(
&
je
)
||
je
.
state
!=
JST_KEY
||
!
json_key_matches
(
&
je
,
&
key_name
))
{
field
->
store_text
(
s
.
data
(),
s
.
size
(),
&
my_charset_bin
);
err
=
"The first key in the object must be histogram_hb_v1"
;
goto
error
;
}
// Get the value in "truncated key tuple format" here:
// The value must be a JSON array
if
(
json_read_value
(
&
je
)
||
(
je
.
value_type
!=
JSON_VALUE_ARRAY
))
{
err
=
"A JSON array expected"
;
goto
error
;
}
// Read the array
while
(
!
json_scan_next
(
&
je
))
{
switch
(
je
.
state
)
{
case
JST_VALUE
:
{
const
char
*
val
;
int
val_len
;
json_smart_read_value
(
&
je
,
&
val
,
&
val_len
);
if
(
je
.
value_type
!=
JSON_VALUE_STRING
&&
je
.
value_type
!=
JSON_VALUE_NUMBER
&&
je
.
value_type
!=
JSON_VALUE_TRUE
&&
je
.
value_type
!=
JSON_VALUE_FALSE
)
{
err
=
"Scalar value expected"
;
goto
error
;
}
uchar
buf
[
MAX_KEY_LENGTH
];
uint
len_to_copy
=
field
->
key_length
();
field
->
store_text
(
val
,
val_len
,
&
my_charset_bin
);
uint
bytes
=
field
->
get_key_image
(
buf
,
len_to_copy
,
Field
::
itRAW
);
histogram_bounds
.
push_back
(
std
::
string
((
char
*
)
buf
,
bytes
));
// TODO: Should we also compare this endpoint with the previous
// to verify that the ordering is right?
break
;
}
case
JST_ARRAY_END
:
break
;
}
}
size
=
histogram_bounds
.
size
();
DBUG_RETURN
(
false
);
error:
my_error
(
ER_JSON_HISTOGRAM_PARSE_FAILED
,
MYF
(
0
),
err
,
je
.
s
.
c_str
-
(
const
uchar
*
)
hist_data
);
DBUG_RETURN
(
true
);
}
...
...
@@ -1594,7 +1640,7 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
void
Histogram_json
::
serialize
(
Field
*
field
)
{
field
->
store
(
(
char
*
)
json_text
,
strlen
((
char
*
)
json_text
),
&
my_charset_bin
);
field
->
store
(
json_text
.
data
(),
json_text
.
size
(
),
&
my_charset_bin
);
}
...
...
@@ -2052,13 +2098,16 @@ class Histogram_builder_json : public Histogram_builder
}
void
build_json_from_histogram
()
{
Json_writer
*
writer
=
new
Json_writer
();
writer
->
start_array
();
Json_writer
writer
;
writer
.
start_object
();
writer
.
add_member
(
Histogram_json
::
JSON_NAME
).
start_array
();
for
(
auto
&
value
:
bucket_bounds
)
{
writer
->
add_str
(
value
.
c_str
());
writer
.
add_str
(
value
.
c_str
());
}
writer
->
end_array
();
Binary_string
*
json_string
=
(
Binary_string
*
)
writer
->
output
.
get_string
();
writer
.
end_array
();
writer
.
end_object
();
Binary_string
*
json_string
=
(
Binary_string
*
)
writer
.
output
.
get_string
();
Histogram_json
*
hist
=
(
Histogram_json
*
)
histogram
;
hist
->
set_json_text
(
bucket_bounds
.
size
(),
(
uchar
*
)
json_string
->
c_ptr
());
}
...
...
@@ -2080,42 +2129,6 @@ Histogram_base *create_histogram(Histogram_type hist_type)
}
bool
json_get_array_items
(
const
char
*
json
,
const
char
*
json_end
,
int
*
value_type
,
std
::
vector
<
std
::
string
>
&
container
)
{
json_engine_t
je
;
int
vl
;
const
char
*
v
;
json_scan_start
(
&
je
,
&
my_charset_utf8mb4_bin
,
(
const
uchar
*
)
json
,
(
const
uchar
*
)
json_end
);
if
(
json_read_value
(
&
je
)
||
(
*
value_type
=
je
.
value_type
)
!=
JSON_VALUE_ARRAY
)
{
return
false
;
}
std
::
string
val
;
while
(
!
json_scan_next
(
&
je
))
{
switch
(
je
.
state
)
{
case
JST_VALUE
:
*
value_type
=
json_smart_read_value
(
&
je
,
&
v
,
&
vl
);
if
(
je
.
value_type
!=
JSON_VALUE_STRING
&&
je
.
value_type
!=
JSON_VALUE_NUMBER
&&
je
.
value_type
!=
JSON_VALUE_TRUE
&&
je
.
value_type
!=
JSON_VALUE_FALSE
)
{
return
false
;
}
val
=
std
::
string
(
v
,
vl
);
container
.
emplace_back
(
val
);
break
;
case
JST_ARRAY_END
:
break
;
}
}
return
true
;
}
C_MODE_START
int
histogram_build_walk
(
void
*
elem
,
element_count
elem_cnt
,
void
*
arg
)
...
...
sql/sql_statistics.h
View file @
2a1cdbab
...
...
@@ -152,7 +152,7 @@ class Histogram_base : public Sql_alloc
{
public:
virtual
bool
parse
(
MEM_ROOT
*
mem_root
,
Field
*
field
,
Histogram_type
type_arg
,
const
uchar
*
ptr
,
uint
size
)
=
0
;
const
char
*
hist_data
,
size_t
hist_data_len
)
=
0
;
virtual
void
serialize
(
Field
*
to_field
)
=
0
;
virtual
Histogram_type
get_type
()
=
0
;
...
...
@@ -187,7 +187,7 @@ class Histogram_binary : public Histogram_base
{
public:
bool
parse
(
MEM_ROOT
*
mem_root
,
Field
*
,
Histogram_type
type_arg
,
const
uchar
*
ptr_arg
,
uint
size_arg
)
override
;
const
char
*
hist_data
,
size_t
hist_data_len
)
override
;
void
serialize
(
Field
*
to_field
)
override
;
Histogram_type
get_type
()
override
{
return
type
;
}
...
...
@@ -350,14 +350,16 @@ class Histogram_json : public Histogram_base
uint8
size
;
/* Number of elements in the histogram */
/* Collection-time only: collected histogram in the JSON form. */
uchar
*
json_text
;
std
::
string
json_text
;
// Array of histogram bucket endpoints in KeyTupleFormat.
std
::
vector
<
std
::
string
>
histogram_bounds
;
public:
static
constexpr
const
char
*
JSON_NAME
=
"histogram_hb_v1"
;
bool
parse
(
MEM_ROOT
*
mem_root
,
Field
*
field
,
Histogram_type
type_arg
,
const
uchar
*
ptr
,
uint
size
)
override
;
const
char
*
hist_data
,
size_t
hist_data_len
)
override
;
void
serialize
(
Field
*
field
)
override
;
...
...
@@ -375,7 +377,8 @@ class Histogram_json : public Histogram_base
void
set_json_text
(
ulonglong
sz
,
uchar
*
json_text_arg
)
{
size
=
(
uint8
)
sz
;
json_text
=
json_text_arg
;
json_text
.
assign
((
const
char
*
)
json_text_arg
,
strlen
((
const
char
*
)
json_text_arg
));
}
uint
get_size
()
override
...
...
@@ -481,8 +484,9 @@ class Column_statistics
ulonglong
avg_frequency
;
public:
/* Histogram type as specified in mysql.column_stats.hist_type */
Histogram_type
histogram_type_on_disk
;
Histogram_base
*
histogram_
;
uint32
no_values_provided_bitmap
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment