report parse error when parsing JSON histogram fails

Signed-off-by: Michael Okoko <okokomichaels@outlook.com>

report parse error when parsing JSON histogram fails
Signed-off-by: Michael Okoko <okokomichaels@outlook.com>
e778d12f · Michael Okoko · Sergei Petrunia · fe2e516a · e778d12f · e778d12f
Commit e778d12f authored Aug 05, 2021 by Michael Okoko Committed by Sergei Petrunia Jan 19, 2022
4 changed files
--- a/mysql-test/main/statistics_json.result
+++ b/mysql-test/main/statistics_json.result
@@ -67,6 +67,41 @@ test	t1	d	1	25	0.0000	8.0000	0.0000	10	JSON	[
  "21",
  "23"
 ]
+SELECT * FROM t1;
+a	b	c	d
+1	1	1	1
+2	2	2	2
+3	3	3	3
+4	4	4	4
+5	5	5	5
+6	6	6	6
+7	7	7	7
+8	8	8	8
+9	9	9	9
+10	10	10	10
+11	11	11	11
+12	12	12	12
+13	13	13	13
+14	14	14	14
+15	15	15	15
+16	16	16	16
+17	17	17	17
+18	18	18	18
+19	19	19	19
+20	20	20	20
+21	21	21	21
+22	22	22	22
+23	23	23	23
+24	24	24	24
+25	25	25	25
+UPDATE mysql.column_stats SET histogram='["1", {"a": "b"}, "2"]' WHERE table_name='t1';
+FLUSH TABLES;
+SELECT * FROM t1;
+ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '1'.
+UPDATE mysql.column_stats SET histogram='{}' WHERE table_name='t1';
+FLUSH TABLES;
+SELECT * FROM t1;
+ERROR HY000: Failed to parse histogram, encountered JSON_TYPE '32608'.
 DELETE FROM mysql.column_stats;
 DROP TABLE t1;
 create schema world;

--- a/mysql-test/main/statistics_json.test
+++ b/mysql-test/main/statistics_json.test
@@ -28,6 +28,19 @@ set histogram_size=10;

 ANALYZE TABLE t1 PERSISTENT FOR ALL;
 SELECT * FROM mysql.column_stats WHERE table_name='t1';
+SELECT * FROM t1;
+
+# We then test different valid JSON strings that are invalid histograms.
+UPDATE mysql.column_stats SET histogram='["1", {"a": "b"}, "2"]' WHERE table_name='t1';
+FLUSH TABLES;
+--error ER_JSON_HISTOGRAM_PARSE_FAILED
+SELECT * FROM t1;
+
+UPDATE mysql.column_stats SET histogram='{}' WHERE table_name='t1';
+FLUSH TABLES;
+--error ER_JSON_HISTOGRAM_PARSE_FAILED
+SELECT * FROM t1;
+
 DELETE FROM mysql.column_stats;
 DROP TABLE t1;


--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -8913,3 +8913,5 @@ ER_PARTITION_CONVERT_SUBPARTITIONED
        eng "Convert partition is not supported for subpartitioned table."
 ER_PROVIDER_NOT_LOADED
        eng "MariaDB tried to use the %s, but its provider plugin is not loaded"
+ER_JSON_HISTOGRAM_PARSE_FAILED
+        eng "Failed to parse histogram, encountered JSON_TYPE '%d'."
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -67,15 +67,11 @@
   * json_get_array_items expects a JSON array as argument,
   * and pushes the elements of the array into the `container` vector.
   * It only works if all the elements in the original JSON array
-   * are scalar values (i.e., strings, numbers, true or false), and returns JSV_BAD_JSON if:
-   * the original JSON is not an array OR the JSON array contains non-scalar elements.
+   * are scalar values (i.e., strings, numbers, true or false),
+   * else, the JSON type encountered is stored in value_type and the function returns false.
   */
 bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container);

-std::vector<std::string> parse_histogram_from_json(const char *json);
-
-void test_parse_histogram_from_json();
-
 Histogram_base *create_histogram(Histogram_type hist_type);

 /* Currently there are only 3 persistent statistical tables */
@@ -1221,18 +1217,29 @@ class Column_stat: public Stat_table
    of read_stats->histogram.
  */

-  Histogram_binary * load_histogram(MEM_ROOT *mem_root)
+  Histogram_base * load_histogram(MEM_ROOT *mem_root)
  {
    if (find_stat())
    {
      char buff[MAX_FIELD_WIDTH];
      String val(buff, sizeof(buff), &my_charset_bin);
      uint fldno= COLUMN_STAT_HISTOGRAM;
+      Histogram_base *hist;
      Field *stat_field= stat_table->field[fldno];
      table_field->read_stats->set_not_null(fldno);
      stat_field->val_str(&val);
-      // histogram-todo: here, create the histogram of appropriate type.
-      Histogram_binary *hist= new (mem_root) Histogram_binary();
+      switch (table_field->read_stats->histogram_type_on_disk)
+      {
+      case SINGLE_PREC_HB:
+      case DOUBLE_PREC_HB:
+        hist = new (mem_root) Histogram_binary();
+        break;
+      case JSON:
+        hist = new (mem_root) Histogram_json();
+        break;
+      default:
+        return NULL;
+      }
      if (!hist->parse(mem_root, table_field->read_stats->histogram_type_on_disk, 
                       (const uchar*)val.ptr(), val.length()))
      {
@@ -1283,21 +1290,17 @@ void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htyp

 bool Histogram_json::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *ptr, uint size_arg)
 {
+  DBUG_ENTER("Histogram_json::parse");
  type = type_arg;
-  // I think we could use memcpy here, but not sure about how to get the right size
-  // since we can't depend on size_arg (it's zero for json histograms)
-  // also, does it make sense to cast here? or we can modify json_get_array_items
-  // to accept uchar*
  const char *json = (char *)ptr;
  int vt;
  bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets);
-  fprintf(stderr,"==============\n");
-  fprintf(stderr,"histogram: %s\n", json);
-  fprintf(stderr, "json_get_array_items() returned %s\n", result ? "true" : "false");
-  fprintf(stderr, "value type after json_get_array_items() is %d\n", vt);
-  fprintf(stderr, " JSV_BAD_JSON=%d, JSON_VALUE_ARRAY=%d\n", (int)JSV_BAD_JSON, (int)JSON_VALUE_ARRAY);
-  fprintf(stderr, "hist_buckets.size()=%zu\n", hist_buckets.size());
-  return false;
+  if (!result)
+  {
+    my_error(ER_JSON_HISTOGRAM_PARSE_FAILED, MYF(0), vt);
+    DBUG_RETURN(true);
+  }
+  DBUG_RETURN(false);
 }

 void Histogram_json::serialize(Field *field)
@@ -1753,11 +1756,6 @@ class Histogram_builder_json : public Histogram_builder
    histogram->set_size(bucket_bounds.size());
    Binary_string *json_string = (Binary_string *) writer->output.get_string();
    histogram->set_values((uchar *) json_string->c_ptr());
-
-    std::vector<std::string> buckets = parse_histogram_from_json(json_string->c_ptr());
-    printf("%zu", buckets.size());
-
-    test_parse_histogram_from_json();
  }
 };

@@ -1770,41 +1768,6 @@ Histogram_base *create_histogram(Histogram_type hist_type)
    return new Histogram_binary;
 }

-void test_parse_histogram_from_json()
-{
-  std::vector<std::string> bucket = {};
-  std::string json;
-  std::string tests[7] = {
-      R"(["aabbb", "ccccdd", "eeefff"])",
-      R"(["aabbb", "ccc{}dd", "eeefff"])",
-      R"(["aabbb", {"a": "b"}, "eeefff"])",
-      R"({})",
-      R"([1,2,3, null])",
-      R"([null])",
-      R"([])"
-  };
-
-  for(const auto& test : tests) {
-    json = test;
-    bucket = parse_histogram_from_json(json.c_str());
-  }
-}
-
-std::vector<std::string> parse_histogram_from_json(const char *json)
-{
-  std::vector<std::string> hist_buckets= {};
-  int vt;
-  bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets);
-  fprintf(stderr,"==============\n");
-  fprintf(stderr,"histogram: %s\n", json);
-  fprintf(stderr, "json_get_array_items() returned %s\n", result ? "true" : "false");
-  fprintf(stderr, "value type after json_get_array_items() is %d\n", vt);
-  fprintf(stderr, " JSV_BAD_JSON=%d, JSON_VALUE_ARRAY=%d\n", (int)JSV_BAD_JSON, (int)JSON_VALUE_ARRAY);
-  fprintf(stderr, "hist_buckets.size()=%zu\n", hist_buckets.size());
-
-  return hist_buckets;
-}
-
 bool json_get_array_items(const char *json, const char *json_end, int *value_type, std::vector<std::string> &container) {
  json_engine_t je;
  int vl;
@@ -1814,7 +1777,6 @@ bool json_get_array_items(const char *json, const char *json_end, int *value_typ

  if (json_read_value(&je) || je.value_type != JSON_VALUE_ARRAY)
  {
-    *value_type = JSV_BAD_JSON;
    return false;
  }
  *value_type = je.value_type;
@@ -1831,16 +1793,15 @@ bool json_get_array_items(const char *json, const char *json_end, int *value_typ
          je.value_type != JSON_VALUE_TRUE &&
          je.value_type != JSON_VALUE_FALSE)
      {
-        *value_type = JSV_BAD_JSON;
        return false;
      }
      val = std::string(v, vl);
      container.emplace_back(val);
+      break;
    case JST_ARRAY_END:
      break;
    }
  }
-
  return true;
 }

@@ -4101,8 +4062,7 @@ double get_column_range_cardinality(Field *field,
      if (avg_frequency > 1.0 + 0.000001 && 
          col_stats->min_max_values_are_provided())
      {
-        Histogram_binary *hist=
-            dynamic_cast<Histogram_binary *>(col_stats->histogram_);
+        Histogram_base *hist = col_stats->histogram_;
        if (hist && hist->is_usable(thd))
        {
          store_key_image_to_rec(field, (uchar *) min_endp->key,
@@ -4146,8 +4106,7 @@ double get_column_range_cardinality(Field *field,
      else
        max_mp_pos= 1.0;

-      Histogram_binary *hist=
-          dynamic_cast<Histogram_binary *>(col_stats->histogram_);
+      Histogram_base *hist = col_stats->histogram_;
      if (hist && hist->is_usable(thd))
        sel= hist->range_selectivity(min_mp_pos, max_mp_pos);
      else