Store bucket bounds and extend test cases for JSON histogram

This fixes the memory allocation for json histogram builder and add more column types for testing. Some challenges at the moment include: * Garbage value at the end of JSON array still persists. * Garbage value also gets appended to bucket values if the column is a primary key. * There's a memory leak resulting in a "Warning: Memory not freed" message at the end of tests. Signed-off-by: Michael Okoko <okokomichaels@outlook.com>

Store bucket bounds and extend test cases for JSON histogram
This fixes the memory allocation for json histogram builder and add more column types for testing. Some challenges at the moment include: * Garbage value at the end of JSON array still persists. * Garbage value also gets appended to bucket values if the column is a primary key. * There's a memory leak resulting in a "Warning: Memory not freed" message at the end of tests. Signed-off-by: Michael Okoko <okokomichaels@outlook.com>
9954aecc · Michael Okoko · Sergei Petrunia · 237447de · 9954aecc · 9954aecc
Commit 9954aecc authored Jun 30, 2021 by Michael Okoko Committed by Sergei Petrunia Jan 19, 2022
4 changed files
--- a/mysql-test/main/statistics_json.result
+++ b/mysql-test/main/statistics_json.result
--- a/mysql-test/main/statistics_json.test
+++ b/mysql-test/main/statistics_json.test
 --source include/have_stat_tables.inc
+--source include/have_sequence.inc
+--source include/analyze-format.inc
 --echo #
--echo # Test that JSON is a valid histogram type and we can store JSON strings in mysql.column_stats
+--echo # Test that we can store JSON arrays in histogram field mysql.column_stats when histogram_type=JSON
 --echo #
 --disable_warnings
 drop table if exists t1;
 --enable_warnings

 set @save_histogram_type=@@histogram_type;
+set @save_histogram_size=@@histogram_size;

 CREATE TABLE t1 (
-  a int NOT NULL PRIMARY KEY,
-  b varchar(32)
-) ENGINE=MYISAM;
+  a int,
+  b varchar(32),
+  c char(2),
+  d double
+);
+
+--disable_result_log
+INSERT INTO t1 SELECT seq, seq, seq, seq from seq_1_to_25;
+--enable_result_log

 SET histogram_type='JSON';
-SELECT @@histogram_type;
+# set histogram size to be < row count (25 in this case) to see how histogram behaves
+set histogram_size=10;
+
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+SELECT * FROM mysql.column_stats WHERE table_name='t1';
+DELETE FROM mysql.column_stats;
+DROP TABLE t1;
+
+create schema world;
+use world;
+--disable_query_log
+--disable_result_log
+--disable_warnings
+--source include/world_schema_utf8.inc
+--source include/world.inc
+--enable_warnings
+--enable_result_log
+--enable_query_log

-INSERT INTO t1 VALUES
-  (7, 'xxxxxxxxxxxxxxxxxxxxxxxxxx'),
-  (17, 'vvvvvvvvvvvvv');
+set histogram_type='JSON';
+set histogram_size=25;
+--disable_result_log
+ANALYZE TABLE Country PERSISTENT FOR ALL;
+--enable_result_log

-ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(b) INDEXES();
-DESCRIBE mysql.column_stats;
-SELECT * FROM mysql.column_stats;
+SELECT column_name, min_value, max_value, hist_size, hist_type, histogram FROM mysql.column_stats;

 set histogram_type=@save_histogram_type;
+set histogram_size=@save_histogram_size;

-## Remove against Milestone-2
-ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(b) INDEXES();
-SELECT * FROM mysql.column_stats;
-select table_name, hist_type, decode_histogram(hist_type, histogram ) from mysql.column_stats;
-
-DROP TABLE t1;
\ No newline at end of file
+DROP SCHEMA world;
\ No newline at end of file
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -1073,9 +1073,16 @@ class Column_stat: public Stat_table
          stat_field->store(stats->histogram.get_type() + 1);
          break;
        case COLUMN_STAT_HISTOGRAM:
+          if (stats->histogram.get_type() == JSON)
+          {
+            stat_field->store((char *) stats->histogram.get_values(),
+                              strlen((char *) stats->histogram.get_values()), &my_charset_bin);
+          } else
+          {
            stat_field->store((char *) stats->histogram.get_values(),
                              stats->histogram.get_size(), &my_charset_bin);
-          break;           
+          }
+          break;
        }
      }
    }
@@ -1588,7 +1595,7 @@ class Histogram_builder

 class Histogram_builder_json : public Histogram_builder
 {
-std::vector<String> bucket_bounds;
+std::vector<std::string> bucket_bounds;

 public:
  Histogram_builder_json(Field *col, uint col_len, ha_rows rows)
@@ -1619,9 +1626,10 @@ std::vector<String> bucket_bounds;
      return 0;
    if (count > bucket_capacity * (curr_bucket + 1))
    {
-      auto *val= new StringBuffer<MAX_FIELD_WIDTH>;
-      column->val_str(val);
-      bucket_bounds.emplace_back(String(val->ptr(), val->length(), &my_charset_bin));
+      column->store_field_value((uchar *) elem, col_length);
+      StringBuffer<MAX_FIELD_WIDTH> val;
+      column->val_str(&val);
+      bucket_bounds.emplace_back(val.ptr());
      curr_bucket++;
    }
    return 0;
@@ -1631,9 +1639,10 @@ std::vector<String> bucket_bounds;
    Json_writer *writer = new Json_writer();
    writer->start_array();
    for(auto& value: bucket_bounds) {
-      writer->add_str(value);
+      writer->add_str(value.c_str());
    }
    writer->end_array();
+    histogram->set_size(bucket_bounds.size());
    histogram->set_values((uchar *) writer->output.get_string()->ptr());
  }
 };

--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -153,9 +153,9 @@ class Histogram
  {
    switch (type) {
    case SINGLE_PREC_HB:
+    case JSON:
      return ((uint) (1 << 8) - 1);
    case DOUBLE_PREC_HB:
-    case JSON:
      return ((uint) (1 << 16) - 1);
    }
    return 1;
@@ -166,9 +166,9 @@ class Histogram
  {
    switch (type) {
    case SINGLE_PREC_HB:
+    case JSON:
      return size;
    case DOUBLE_PREC_HB:
-    case JSON:
      return size / 2;
    }
    return 0;
@@ -180,9 +180,9 @@ class Histogram
    DBUG_ASSERT(i < get_width());
    switch (type) {
    case SINGLE_PREC_HB:
+    case JSON:
      return (uint) (((uint8 *) values)[i]);
    case DOUBLE_PREC_HB:
-    case JSON:
      return (uint) uint2korr(values + i * 2);
    }
    return 0;