Commit 9954aecc authored by Michael Okoko's avatar Michael Okoko Committed by Sergei Petrunia

Store bucket bounds and extend test cases for JSON histogram

This fixes the memory allocation for json histogram builder and add more column types for testing.
Some challenges at the moment include:
* Garbage value at the end of JSON array still persists.
* Garbage value also gets appended to bucket values if the column is a primary key.
* There's a memory leak resulting in a "Warning: Memory not freed" message at the end of tests.
Signed-off-by: default avatarMichael Okoko <okokomichaels@outlook.com>
parent 237447de
--source include/have_stat_tables.inc
--source include/have_sequence.inc
--source include/analyze-format.inc
--echo #
--echo # Test that JSON is a valid histogram type and we can store JSON strings in mysql.column_stats
--echo # Test that we can store JSON arrays in histogram field mysql.column_stats when histogram_type=JSON
--echo #
--disable_warnings
drop table if exists t1;
--enable_warnings
set @save_histogram_type=@@histogram_type;
set @save_histogram_size=@@histogram_size;
CREATE TABLE t1 (
a int NOT NULL PRIMARY KEY,
b varchar(32)
) ENGINE=MYISAM;
a int,
b varchar(32),
c char(2),
d double
);
--disable_result_log
INSERT INTO t1 SELECT seq, seq, seq, seq from seq_1_to_25;
--enable_result_log
SET histogram_type='JSON';
SELECT @@histogram_type;
# set histogram size to be < row count (25 in this case) to see how histogram behaves
set histogram_size=10;
ANALYZE TABLE t1 PERSISTENT FOR ALL;
SELECT * FROM mysql.column_stats WHERE table_name='t1';
DELETE FROM mysql.column_stats;
DROP TABLE t1;
create schema world;
use world;
--disable_query_log
--disable_result_log
--disable_warnings
--source include/world_schema_utf8.inc
--source include/world.inc
--enable_warnings
--enable_result_log
--enable_query_log
INSERT INTO t1 VALUES
(7, 'xxxxxxxxxxxxxxxxxxxxxxxxxx'),
(17, 'vvvvvvvvvvvvv');
set histogram_type='JSON';
set histogram_size=25;
--disable_result_log
ANALYZE TABLE Country PERSISTENT FOR ALL;
--enable_result_log
ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(b) INDEXES();
DESCRIBE mysql.column_stats;
SELECT * FROM mysql.column_stats;
SELECT column_name, min_value, max_value, hist_size, hist_type, histogram FROM mysql.column_stats;
set histogram_type=@save_histogram_type;
set histogram_size=@save_histogram_size;
## Remove against Milestone-2
ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(b) INDEXES();
SELECT * FROM mysql.column_stats;
select table_name, hist_type, decode_histogram(hist_type, histogram ) from mysql.column_stats;
DROP TABLE t1;
\ No newline at end of file
DROP SCHEMA world;
\ No newline at end of file
......@@ -1073,9 +1073,16 @@ class Column_stat: public Stat_table
stat_field->store(stats->histogram.get_type() + 1);
break;
case COLUMN_STAT_HISTOGRAM:
if (stats->histogram.get_type() == JSON)
{
stat_field->store((char *) stats->histogram.get_values(),
strlen((char *) stats->histogram.get_values()), &my_charset_bin);
} else
{
stat_field->store((char *) stats->histogram.get_values(),
stats->histogram.get_size(), &my_charset_bin);
break;
}
break;
}
}
}
......@@ -1588,7 +1595,7 @@ class Histogram_builder
class Histogram_builder_json : public Histogram_builder
{
std::vector<String> bucket_bounds;
std::vector<std::string> bucket_bounds;
public:
Histogram_builder_json(Field *col, uint col_len, ha_rows rows)
......@@ -1619,9 +1626,10 @@ std::vector<String> bucket_bounds;
return 0;
if (count > bucket_capacity * (curr_bucket + 1))
{
auto *val= new StringBuffer<MAX_FIELD_WIDTH>;
column->val_str(val);
bucket_bounds.emplace_back(String(val->ptr(), val->length(), &my_charset_bin));
column->store_field_value((uchar *) elem, col_length);
StringBuffer<MAX_FIELD_WIDTH> val;
column->val_str(&val);
bucket_bounds.emplace_back(val.ptr());
curr_bucket++;
}
return 0;
......@@ -1631,9 +1639,10 @@ std::vector<String> bucket_bounds;
Json_writer *writer = new Json_writer();
writer->start_array();
for(auto& value: bucket_bounds) {
writer->add_str(value);
writer->add_str(value.c_str());
}
writer->end_array();
histogram->set_size(bucket_bounds.size());
histogram->set_values((uchar *) writer->output.get_string()->ptr());
}
};
......
......@@ -153,9 +153,9 @@ class Histogram
{
switch (type) {
case SINGLE_PREC_HB:
case JSON:
return ((uint) (1 << 8) - 1);
case DOUBLE_PREC_HB:
case JSON:
return ((uint) (1 << 16) - 1);
}
return 1;
......@@ -166,9 +166,9 @@ class Histogram
{
switch (type) {
case SINGLE_PREC_HB:
case JSON:
return size;
case DOUBLE_PREC_HB:
case JSON:
return size / 2;
}
return 0;
......@@ -180,9 +180,9 @@ class Histogram
DBUG_ASSERT(i < get_width());
switch (type) {
case SINGLE_PREC_HB:
case JSON:
return (uint) (((uint8 *) values)[i]);
case DOUBLE_PREC_HB:
case JSON:
return (uint) uint2korr(values + i * 2);
}
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment