Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
d8d57d2c
Commit
d8d57d2c
authored
Dec 03, 2021
by
Sergei Petrunia
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MDEV-26764: JSON_HB Histograms: handle BINARY and unassigned characters
Encode such characters in hex.
parent
748b293c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
130 additions
and
22 deletions
+130
-22
mysql-test/main/statistics_json.result
mysql-test/main/statistics_json.result
+29
-3
mysql-test/main/statistics_json.test
mysql-test/main/statistics_json.test
+6
-1
sql/opt_histogram_json.cc
sql/opt_histogram_json.cc
+83
-17
sql/opt_histogram_json.h
sql/opt_histogram_json.h
+12
-1
No files found.
mysql-test/main/statistics_json.result
View file @
d8d57d2c
...
...
@@ -7896,16 +7896,41 @@ a
drop table t1;
#
# Another testcase: use a character that cannot be represented in utf8:
# Also, now it's testcase for:
# MDEV-26764: JSON_HB Histograms: handle BINARY and unassigned characters
#
create table t1 ( a varchar(100) character set cp1251);
insert into t1 values ( _cp1251 x'88'),( _cp1251 x'98');
insert into t1 values ( _cp1251 x'88'),( _cp1251 x'88'), ( _cp1251 x'88');
insert into t1 values ( _cp1251 x'98'),( _cp1251 x'98');
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Operation failed
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
select hist_type, histogram
from mysql.column_stats
where db_name=database() and table_name='t1';
hist_type histogram
JSON_HB {
"target_histogram_size": 10,
"collected_at": "REPLACED",
"collected_by": "REPLACED",
"histogram_hb": [
{
"start": "€",
"size": 0.6,
"ndv": 1
},
{
"start_hex": "98",
"end_hex": "98",
"size": 0.4,
"ndv": 1
}
]
}
analyze select * from t1 where a=_cp1251 x'88';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 5 5.00 60.00 60.00 Using where
drop table t1;
#
# ASAN use-after-poison my_strnxfrm_simple_internal / Histogram_json_hb::range_selectivity ...
...
...
@@ -8102,7 +8127,8 @@ set histogram_type= JSON_HB, histogram_size= 1;
insert into t1 values ('foo'),(unhex('9C'));
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Operation failed
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
select * from t1;
a
foo
...
...
mysql-test/main/statistics_json.test
View file @
d8d57d2c
...
...
@@ -227,9 +227,12 @@ drop table t1;
--
echo
#
--
echo
# Another testcase: use a character that cannot be represented in utf8:
--
echo
# Also, now it's testcase for:
--
echo
# MDEV-26764: JSON_HB Histograms: handle BINARY and unassigned characters
--
echo
#
create
table
t1
(
a
varchar
(
100
)
character
set
cp1251
);
insert
into
t1
values
(
_cp1251
x
'88'
),(
_cp1251
x
'98'
);
insert
into
t1
values
(
_cp1251
x
'88'
),(
_cp1251
x
'88'
),
(
_cp1251
x
'88'
);
insert
into
t1
values
(
_cp1251
x
'98'
),(
_cp1251
x
'98'
);
analyze
table
t1
persistent
for
all
;
--
source
include
/
histogram_replaces
.
inc
...
...
@@ -237,6 +240,8 @@ select hist_type, histogram
from
mysql
.
column_stats
where
db_name
=
database
()
and
table_name
=
't1'
;
analyze
select
*
from
t1
where
a
=
_cp1251
x
'88'
;
drop
table
t1
;
--
echo
#
...
...
sql/opt_histogram_json.cc
View file @
d8d57d2c
...
...
@@ -70,11 +70,11 @@ static bool json_unescape_to_string(const char *val, int val_len, String* out)
succeeds.
*/
static
bool
json_escape_to_string
(
const
String
*
str
,
String
*
out
)
static
int
json_escape_to_string
(
const
String
*
str
,
String
*
out
)
{
// Make sure 'out' has some memory allocated.
if
(
!
out
->
alloced_length
()
&&
out
->
alloc
(
128
))
return
true
;
return
JSON_ERROR_OUT_OF_SPACE
;
while
(
1
)
{
...
...
@@ -90,15 +90,15 @@ static bool json_escape_to_string(const String *str, String* out)
if
(
res
>=
0
)
{
out
->
length
(
res
);
return
false
;
// Ok
return
0
;
// Ok
}
if
(
res
!=
JSON_ERROR_OUT_OF_SPACE
)
return
true
;
// Some conversion error
return
res
;
// Some conversion error
// Out of space error. Try with a bigger buffer
if
(
out
->
alloc
(
out
->
alloced_length
()
*
2
))
return
true
;
return
JSON_ERROR_OUT_OF_SPACE
;
}
}
...
...
@@ -208,8 +208,7 @@ class Histogram_json_builder : public Histogram_builder
*/
bool
finalize_bucket_with_end_value
(
void
*
elem
)
{
writer
.
add_member
(
"end"
);
if
(
append_column_value
(
elem
))
if
(
append_column_value
(
elem
,
false
))
return
true
;
finalize_bucket
();
return
false
;
...
...
@@ -224,8 +223,7 @@ class Histogram_json_builder : public Histogram_builder
{
DBUG_ASSERT
(
bucket
.
size
==
0
);
writer
.
start_object
();
writer
.
add_member
(
"start"
);
if
(
append_column_value
(
elem
))
if
(
append_column_value
(
elem
,
true
))
return
true
;
bucket
.
ndv
=
1
;
...
...
@@ -236,7 +234,7 @@ class Histogram_json_builder : public Histogram_builder
/*
Append the passed value into the JSON writer as string value
*/
bool
append_column_value
(
void
*
elem
)
bool
append_column_value
(
void
*
elem
,
bool
is_start
)
{
StringBuffer
<
MAX_FIELD_WIDTH
>
val
;
...
...
@@ -246,13 +244,22 @@ class Histogram_json_builder : public Histogram_builder
// Escape the value for JSON
StringBuffer
<
MAX_FIELD_WIDTH
>
escaped_val
;
i
f
(
json_escape_to_string
(
str
,
&
escaped_val
))
return
true
;
// Note: The Json_writer does NOT do escapes (perhaps this should change?)
i
nt
rc
=
json_escape_to_string
(
str
,
&
escaped_val
);
if
(
!
rc
)
{
writer
.
add_member
(
is_start
?
"start"
:
"end"
);
writer
.
add_str
(
escaped_val
.
c_ptr_safe
());
return
false
;
}
if
(
rc
==
JSON_ERROR_ILLEGAL_SYMBOL
)
{
escaped_val
.
set_hex
(
val
.
ptr
(),
val
.
length
());
writer
.
add_member
(
is_start
?
"start_hex"
:
"end_hex"
);
writer
.
add_str
(
escaped_val
.
c_ptr_safe
());
return
false
;
}
return
true
;
}
/*
Append a value group of cnt values.
...
...
@@ -496,6 +503,41 @@ bool read_bucket_endpoint(json_engine_t *je, Field *field, String *out,
}
bool
read_hex_bucket_endpoint
(
json_engine_t
*
je
,
Field
*
field
,
String
*
out
,
const
char
**
err
)
{
if
(
json_read_value
(
je
))
return
true
;
if
(
je
->
value_type
!=
JSON_VALUE_STRING
||
je
->
value_escaped
||
(
je
->
value_len
&
1
))
{
*
err
=
"Expected a hex string"
;
return
true
;
}
StringBuffer
<
128
>
buf
;
for
(
auto
pc
=
je
->
value
;
pc
<
je
->
value
+
je
->
value_len
;
pc
+=
2
)
{
int
hex_char1
=
hexchar_to_int
(
pc
[
0
]);
int
hex_char2
=
hexchar_to_int
(
pc
[
1
]);
if
(
hex_char1
==
-
1
||
hex_char2
==
-
1
)
{
*
err
=
"Expected a hex string"
;
return
true
;
}
buf
.
append
((
hex_char1
<<
4
)
|
hex_char2
);
}
field
->
store_text
(
buf
.
ptr
(),
buf
.
length
(),
field
->
charset
());
out
->
alloc
(
field
->
pack_length
());
uint
bytes
=
field
->
get_key_image
((
uchar
*
)
out
->
ptr
(),
field
->
key_length
(),
Field
::
itRAW
);
out
->
length
(
bytes
);
return
false
;
}
/*
@brief Parse a JSON reprsentation for one histogram bucket
...
...
@@ -619,6 +661,30 @@ int Histogram_json_hb::parse_bucket(json_engine_t *je, Field *field,
}
save1
.
restore_to
(
je
);
// Less common endoints:
Json_string
start_hex_str
(
"start_hex"
);
if
(
json_key_matches
(
je
,
start_hex_str
.
get
()))
{
if
(
read_hex_bucket_endpoint
(
je
,
field
,
&
value_buf
,
err
))
return
1
;
have_start
=
true
;
continue
;
}
save1
.
restore_to
(
je
);
Json_string
end_hex_str
(
"end_hex"
);
if
(
json_key_matches
(
je
,
end_hex_str
.
get
()))
{
if
(
read_hex_bucket_endpoint
(
je
,
field
,
&
value_buf
,
err
))
return
1
;
last_bucket_end_endp
.
assign
(
value_buf
.
ptr
(),
value_buf
.
length
());
*
assigned_last_end
=
true
;
continue
;
}
save1
.
restore_to
(
je
);
// Some unknown member. Skip it.
if
(
json_skip_key
(
je
))
return
1
;
...
...
sql/opt_histogram_json.h
View file @
d8d57d2c
...
...
@@ -32,12 +32,18 @@
"histogram_hb": [
{ "start": "value", "size":nnn.nn, "ndv": nnn },
...
{ "start": "value", "size":nnn.nn, "ndv": nnn, "end": "value"}
// Optionally, start and/or end can be replaced with _hex variant
{ "start_hex: "value", "size":nnn.nn, "ndv":nnn},
...
{ "start": "value", "size":nnn.nn, "ndv": nnn, "end": "value"},
]
}
The histogram is an object with single member named Histogram_json_hb::
JSON_NAME. The value of that member is an array of buckets.
Each bucket is an object with these members:
"start" - the first value in the bucket.
"size" - fraction of table rows that is contained in the bucket.
...
...
@@ -51,6 +57,11 @@
The exception is single-point buckets where last value is the same as the
first value.
start/end can be replaced with start_hex/end_hex. In _hex variant, the
constant is encoded in hex. This encoding is used to handle so called
"unassigned characters": some non-UTF8 charsets have byte combinations that
are not mapped to any UTF8 character.
*/
class
Histogram_json_hb
:
public
Histogram_base
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment