Merge branch 'cume_dist' into bb-10.2-mdev9543

de35787a · Vicențiu Ciorbaru · 4fe6fbbb · 3544fe01 · de35787a · de35787a
Commit de35787a authored Mar 28, 2016 by Vicențiu Ciorbaru
5 changed files
--- a/mysql-test/r/win_percent_cume.result
+++ b/mysql-test/r/win_percent_cume.result
+create table t1 (
+pk int primary key,
+a int,
+b int
+);
+insert into t1 values
+( 1 , 0, 10),
+( 2 , 0, 10),
+( 3 , 1, 10),
+( 4 , 1, 10),
+( 8 , 2, 10),
+( 5 , 2, 20),
+( 6 , 2, 20),
+( 7 , 2, 20),
+( 9 , 4, 20),
+(10 , 4, 20);
+select pk, a, b,
+percent_rank() over (order by a),
+cume_dist() over (order by a)
+from t1;
+pk	a	b	percent_rank() over (order by a)	cume_dist() over (order by a)
+1	0	10	0.0000000000	0.2000000000
+2	0	10	0.0000000000	0.2000000000
+3	1	10	0.2222222222	0.4000000000
+4	1	10	0.2222222222	0.4000000000
+8	2	10	0.4444444444	0.8000000000
+5	2	20	0.4444444444	0.8000000000
+6	2	20	0.4444444444	0.8000000000
+7	2	20	0.4444444444	0.8000000000
+9	4	20	0.8888888889	1.0000000000
+10	4	20	0.8888888889	1.0000000000
+select pk, a, b,
+percent_rank() over (order by pk),
+cume_dist() over (order by pk)
+from t1 order by pk;
+pk	a	b	percent_rank() over (order by pk)	cume_dist() over (order by pk)
+1	0	10	0.0000000000	0.1000000000
+2	0	10	0.1111111111	0.2000000000
+3	1	10	0.2222222222	0.3000000000
+4	1	10	0.3333333333	0.4000000000
+5	2	20	0.4444444444	0.5000000000
+6	2	20	0.5555555556	0.6000000000
+7	2	20	0.6666666667	0.7000000000
+8	2	10	0.7777777778	0.8000000000
+9	4	20	0.8888888889	0.9000000000
+10	4	20	1.0000000000	1.0000000000
+select pk, a, b,
+percent_rank() over (partition by a order by a),
+cume_dist() over (partition by a order by a)
+from t1;
+pk	a	b	percent_rank() over (partition by a order by a)	cume_dist() over (partition by a order by a)
+1	0	10	0.0000000000	1.0000000000
+2	0	10	0.0000000000	1.0000000000
+3	1	10	0.0000000000	1.0000000000
+4	1	10	0.0000000000	1.0000000000
+8	2	10	0.0000000000	1.0000000000
+5	2	20	0.0000000000	1.0000000000
+6	2	20	0.0000000000	1.0000000000
+7	2	20	0.0000000000	1.0000000000
+9	4	20	0.0000000000	1.0000000000
+10	4	20	0.0000000000	1.0000000000
+drop table t1;
--- a/mysql-test/t/win_percent_cume.test
+++ b/mysql-test/t/win_percent_cume.test
+create table t1 (
+  pk int primary key,
+  a int,
+  b int
+);
+
+
+insert into t1 values
+( 1 , 0, 10),
+( 2 , 0, 10),
+( 3 , 1, 10),
+( 4 , 1, 10),
+( 8 , 2, 10),
+( 5 , 2, 20),
+( 6 , 2, 20),
+( 7 , 2, 20),
+( 9 , 4, 20),
+(10 , 4, 20);
+
+select pk, a, b,
+    percent_rank() over (order by a),
+    cume_dist() over (order by a)
+from t1;
+
+select pk, a, b,
+       percent_rank() over (order by pk),
+       cume_dist() over (order by pk)
+from t1 order by pk;
+
+select pk, a, b,
+        percent_rank() over (partition by a order by a),
+        cume_dist() over (partition by a order by a)
+from t1;
+
+drop table t1;
+
--- a/sql/item_windowfunc.h
+++ b/sql/item_windowfunc.h
@@ -317,12 +317,18 @@ class Item_context
  NOTE: All two pass window functions need to implement
  this interface.
 */
-class Item_sum_window_with_context : public Item_sum_num,
-                                     public Item_context
+class Item_sum_window_with_row_count : public Item_sum_num
 {
 public:
-  Item_sum_window_with_context(THD *thd)
-   : Item_sum_num(thd), Item_context() {}
+  Item_sum_window_with_row_count(THD *thd) : Item_sum_num(thd),
+                                             partition_row_count_(0){}
+
+  void set_row_count(ulonglong count) { partition_row_count_ = count; }
+
+ protected:
+  longlong get_row_count() { return partition_row_count_; }
+ private:
+  ulonglong partition_row_count_;
 };

 /*
@@ -336,12 +342,11 @@ class Item_sum_window_with_context : public Item_sum_num,
    This is held within the row_count context.
  - Second pass to compute rank of current row and the value of the function
 */
-class Item_sum_percent_rank: public Item_sum_window_with_context,
-                             public Window_context_row_count
+class Item_sum_percent_rank: public Item_sum_window_with_row_count
 {
 public:
  Item_sum_percent_rank(THD *thd)
-    : Item_sum_window_with_context(thd), cur_rank(1) {}
+    : Item_sum_window_with_row_count(thd), cur_rank(1) {}

  longlong val_int()
  {
@@ -359,14 +364,9 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
     We can not get the real value without knowing the number of rows
     in the partition. Don't divide by 0.
   */
-   if (!get_context_())
-   {
-     // Calling this kind of function with a context makes no sense.
-     DBUG_ASSERT(0);
-     return 0;
-   }
-
-   longlong partition_rows = get_context_()->get_field_context(result_field);
+   ulonglong partition_rows = get_row_count();
+   null_value= partition_rows > 0 ? false : true;
+
   return partition_rows > 1 ?
             static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0;
  }
@@ -381,25 +381,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
    return "percent_rank";
  }

-  bool create_window_context()
-  {
-    // TODO-cvicentiu: Currently this means we must make sure to delete
-    // the window context. We can potentially allocate this on the THD memroot.
-    // At the same time, this is only necessary for a small portion of the
-    // query execution and it does not make sense to keep it for all of it.
-    context_ = new Window_context_row_count();
-    if (context_ == NULL)
-      return true;
-    return false;
-  }
-
-  void delete_window_context()
-  {
-    if (context_)
-      delete get_context_();
-    context_ = NULL;
-  }
-
  void update_field() {}

  void clear()
@@ -428,13 +409,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
  void cleanup()
  {
    peer_tracker.cleanup();
-    Item_sum_window_with_context::cleanup();
-  }
-
-  /* Helper function so that we don't cast the context every time. */
-  Window_context_row_count* get_context_()
-  {
-    return static_cast<Window_context_row_count *>(context_);
  }
 };

@@ -448,27 +422,62 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
    window ordering of the window partition of R
  - NR is defined to be the number of rows in the window partition of R.

-  Just like with Item_sum_percent_rank, compuation of this function requires
+  Just like with Item_sum_percent_rank, computation of this function requires
  two passes.
 */

-class Item_sum_cume_dist: public Item_sum_percent_rank
+class Item_sum_cume_dist: public Item_sum_window_with_row_count
 {
 public:
-  Item_sum_cume_dist(THD *thd)
-    : Item_sum_percent_rank(thd) {}
+  Item_sum_cume_dist(THD *thd) : Item_sum_window_with_row_count(thd),
+                                 current_row_count_(0) {}
+
+  double val_real()
+  {
+    if (get_row_count() == 0)
+    {
+      null_value= true;
+      return 0;
+    }
+    ulonglong partition_row_count= get_row_count();
+    null_value= false;
+    return static_cast<double>(current_row_count_) / partition_row_count;
+  }

-  double val_real() { return 0; }
+  bool add()
+  {
+    current_row_count_++;
+    return false;
+  }

  enum Sumfunctype sum_func () const
  {
    return CUME_DIST_FUNC;
  }

+  void clear()
+  {
+    current_row_count_= 0;
+    set_row_count(0);
+  }
+
  const char*func_name() const
  {
    return "cume_dist";
  }
+
+  void update_field() {}
+  enum Item_result result_type () const { return REAL_RESULT; }
+  enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
+
+  void fix_length_and_dec()
+  {
+    decimals = 10;  // TODO-cvicentiu find out how many decimals the standard
+                    // requires.
+  }
+
+ private:
+  ulonglong current_row_count_;
 };


@@ -499,11 +508,11 @@ class Item_window_func : public Item_func_or_sum
      force_return_blank(true),
      read_value_from_result_field(false) {}

-  Item_sum *window_func() { return (Item_sum *) args[0]; }
+  Item_sum *window_func() const { return (Item_sum *) args[0]; }

  void update_used_tables();

-  bool is_frame_prohibited()
+  bool is_frame_prohibited() const
  {
    switch (window_func()->sum_func()) {
    case Item_sum::ROW_NUMBER_FUNC:
@@ -517,7 +526,28 @@ class Item_window_func : public Item_func_or_sum
    }
  }

-  bool is_order_list_mandatory()
+  bool requires_partition_size() const
+  {
+    switch (window_func()->sum_func()) {
+    case Item_sum::PERCENT_RANK_FUNC:
+    case Item_sum::CUME_DIST_FUNC:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  bool requires_peer_size() const
+  {
+    switch (window_func()->sum_func()) {
+    case Item_sum::CUME_DIST_FUNC:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  bool is_order_list_mandatory() const
  {
    switch (window_func()->sum_func()) {
    case Item_sum::RANK_FUNC:

--- a/sql/sql_window.cc
+++ b/sql/sql_window.cc
@@ -865,15 +865,18 @@ class Frame_unbounded_preceding : public Frame_cursor
  }
 };

+
 /*
  UNBOUNDED FOLLOWING frame bound
 */

 class Frame_unbounded_following : public Frame_cursor
 {
-  Table_read_cursor cursor;

+protected:
+  Table_read_cursor cursor;
  Group_bound_tracker bound_tracker;
+
 public:
  void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list,
            SQL_I_List<ORDER> *order_list)
@@ -910,6 +913,35 @@ class Frame_unbounded_following : public Frame_cursor
 };


+class Frame_unbounded_following_set_count : public Frame_unbounded_following
+{
+  void next_partition(longlong rownum, Item_sum* item)
+  {
+    ulonglong num_rows_in_partition= 0;
+    if (!rownum)
+    {
+      /* Read the first row */
+      if (cursor.get_next())
+        return;
+    }
+    num_rows_in_partition++;
+
+    /* Remember which partition we are in */
+    bound_tracker.check_if_next_group();
+    /* Walk to the end of the partition, find how many rows there are. */
+    while (!cursor.get_next())
+    {
+      if (bound_tracker.check_if_next_group())
+        break;
+      num_rows_in_partition++;
+    }
+
+    Item_sum_window_with_row_count* item_with_row_count =
+      static_cast<Item_sum_window_with_row_count *>(item);
+    item_with_row_count->set_row_count(num_rows_in_partition);
+  }
+};
+
 /////////////////////////////////////////////////////////////////////////////
 // ROWS-type frame bounds
 /////////////////////////////////////////////////////////////////////////////
@@ -1212,6 +1244,47 @@ Frame_cursor *get_frame_cursor(Window_frame *frame, bool is_top_bound)
  return NULL;
 }

+void add_extra_frame_cursors(List<Frame_cursor> *cursors,
+                             const Item_sum *window_func)
+{
+  switch (window_func->sum_func())
+  {
+    case Item_sum::CUME_DIST_FUNC:
+      cursors->push_back(new Frame_unbounded_preceding);
+      cursors->push_back(new Frame_range_current_row_bottom);
+      break;
+    default:
+      cursors->push_back(new Frame_unbounded_preceding);
+      cursors->push_back(new Frame_rows_current_row_bottom);
+  }
+}
+
+List<Frame_cursor> get_window_func_required_cursors(
+    const Item_window_func* item_win)
+{
+  List<Frame_cursor> result;
+
+  if (item_win->requires_partition_size())
+    result.push_back(new Frame_unbounded_following_set_count);
+
+  /*
+    If it is not a regular window function that follows frame specifications,
+    specific cursors are required.
+  */
+  if (item_win->is_frame_prohibited())
+  {
+    add_extra_frame_cursors(&result, item_win->window_func());
+    return result;
+  }
+
+  /* A regular window function follows the frame specification. */
+  result.push_back(get_frame_cursor(item_win->window_spec->window_frame,
+                                    false));
+  result.push_back(get_frame_cursor(item_win->window_spec->window_frame,
+                                    true));
+
+  return result;
+}

 /*
  Streamed window function computation with window frames.
@@ -1254,21 +1327,20 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
 {
  THD *thd= current_thd;
  int err= 0;
-  Frame_cursor *top_bound;
-  Frame_cursor *bottom_bound;

  Item_sum *sum_func= item_win->window_func();
  /* This algorithm doesn't support DISTINCT aggregator */
  sum_func->set_aggregator(Aggregator::SIMPLE_AGGREGATOR);
-  
-  Window_frame *window_frame= item_win->window_spec->window_frame;
-  top_bound= get_frame_cursor(window_frame, true);
-  bottom_bound= get_frame_cursor(window_frame, false);
-    
-  top_bound->init(thd, info, item_win->window_spec->partition_list,
-                  item_win->window_spec->order_list);
-  bottom_bound->init(thd, info, item_win->window_spec->partition_list,
-                     item_win->window_spec->order_list);
+
+  List<Frame_cursor> cursors= get_window_func_required_cursors(item_win);
+
+  List_iterator_fast<Frame_cursor> it(cursors);
+  Frame_cursor *c;
+  while((c= it++))
+  {
+    c->init(thd, info, item_win->window_spec->partition_list,
+            item_win->window_spec->order_list);
+  }

  bool is_error= false;
  longlong rownum= 0;
@@ -1293,24 +1365,28 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
        pre_XXX functions assume that tbl->record[0] contains current_row, and 
        they may not change it.
      */
-      bottom_bound->pre_next_partition(rownum, sum_func);
-      top_bound->pre_next_partition(rownum, sum_func);
+      it.rewind();
+      while ((c= it++))
+        c->pre_next_partition(rownum, sum_func);
      /*
        We move bottom_bound first, because we want rows to be added into the
        aggregate before top_bound attempts to remove them.
      */
-      bottom_bound->next_partition(rownum, sum_func);
-      top_bound->next_partition(rownum, sum_func);
+      it.rewind();
+      while ((c= it++))
+        c->next_partition(rownum, sum_func);
    }
    else
    {
      /* Again, both pre_XXX function can find current_row in tbl->record[0] */
-      bottom_bound->pre_next_row(sum_func);
-      top_bound->pre_next_row(sum_func);
+      it.rewind();
+      while ((c= it++))
+        c->pre_next_row(sum_func);

      /* These make no assumptions about tbl->record[0] and may change it */
-      bottom_bound->next_row(sum_func);
-      top_bound->next_row(sum_func);
+      it.rewind();
+      while ((c= it++))
+        c->next_row(sum_func);
    }
    rownum++;

@@ -1331,115 +1407,11 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
  }

  my_free(rowid_buf);
-  delete top_bound;
-  delete bottom_bound;
+  cursors.delete_elements();
  return is_error? true: false;
 }


-bool compute_two_pass_window_functions(Item_window_func *item_win,
-                                       TABLE *table, READ_RECORD *info)
-{
-  /* Perform first pass. */
-
-  // TODO-cvicentiu why not initialize the record for when we need, _in_
-  // this function.
-  READ_RECORD *info2= new READ_RECORD();
-  int err;
-  bool is_error = false;
-  bool first_row= true;
-  clone_read_record(info, info2);
-  Item_sum_window_with_context *window_func= 
-    static_cast<Item_sum_window_with_context *>(item_win->window_func());
-  uchar *rowid_buf= (uchar*) my_malloc(table->file->ref_length, MYF(0));
-
-  is_error= window_func->create_window_context();
-  /* Unable to allocate a new context. */
-  if (is_error)
-    return true;
-
-  Window_context *context = window_func->get_window_context();
-  /*
-     The two pass algorithm is as follows:
-     We have a sorted table according to the partition and order by clauses.
-     1. Scan through the table till we reach a partition boundary.
-     2. For each row that we scan, add it to the context.
-     3. Once the partition boundary is met, do a second scan through the
-     current partition and use the context information to compute the value for
-     the window function for that partition.
-     4. Reset the context.
-     5. Repeat from 1 till end of table.
-  */
-
-  bool done = false;
-  longlong rows_in_current_partition = 0;
-  // TODO handle end of table updating.
-  while (!done)
-  {
-
-    if ((err= info->read_record(info)))
-    {
-      done = true;
-    }
-
-    bool partition_changed= done || item_win->check_if_partition_changed();
-    // The first time we always have a partition changed. Ignore it.
-    if (first_row)
-    {
-      partition_changed= false;
-      first_row= false;
-    }
-
-    if (partition_changed)
-    {
-      /*
-         We are now looking at the first row for the next partition, or at the
-         end of the table. Either way, we must remember this position for when
-         we finish doing the second pass.
-      */
-      table->file->position(table->record[0]);
-      memcpy(rowid_buf, table->file->ref, table->file->ref_length);
-
-      for (longlong row_number = 0; row_number < rows_in_current_partition;
-          row_number++)
-      {
-        if ((err= info2->read_record(info2)))
-        {
-          is_error= true;
-          break;
-        }
-        window_func->add();
-        // Save the window function into the table.
-        item_win->save_in_field(item_win->result_field, true);
-        err= table->file->ha_update_row(table->record[1], table->record[0]);
-        if (err && err != HA_ERR_RECORD_IS_THE_SAME)
-        {
-          is_error= true;
-          break;
-        }
-      }
-
-      if (is_error)
-        break;
-
-      rows_in_current_partition= 0;
-      window_func->clear();
-      context->reset();
-
-      // Return to the beginning of the new partition.
-      table->file->ha_rnd_pos(table->record[0], rowid_buf);
-    }
-    rows_in_current_partition++;
-    context->add_field_to_context(item_win->result_field);
-  }
-
-  window_func->delete_window_context();
-  delete info2;
-  my_free(rowid_buf);
-  return is_error;
-}
-
-
 /* Make a list that is a concation of two lists of ORDER elements */

 static ORDER* concat_order_lists(MEM_ROOT *mem_root, ORDER *list1, ORDER *list2)
@@ -1502,16 +1474,12 @@ bool Window_func_runner::setup(THD *thd)
      compute_func= compute_window_func_values;
      break;
    }
-    case Item_sum::PERCENT_RANK_FUNC:
-    case Item_sum::CUME_DIST_FUNC:
-    {
-      compute_func= compute_two_pass_window_functions;
-      break;
-    }
    case Item_sum::COUNT_FUNC:
    case Item_sum::SUM_BIT_FUNC:
    case Item_sum::SUM_FUNC:
    case Item_sum::AVG_FUNC:
+    case Item_sum::PERCENT_RANK_FUNC:
+    case Item_sum::CUME_DIST_FUNC:
    {
      /*
        Frame-aware window function computation. It does one pass, but

--- a/sql/sql_window.h
+++ b/sql/sql_window.h
@@ -150,7 +150,7 @@ typedef bool (*window_compute_func_t)(Item_window_func *item_win,
  Currently, we make a spearate filesort() call for each window function.
 */

-class Window_func_runner : public Sql_alloc 
+class Window_func_runner : public Sql_alloc
 {
  Item_window_func *win_func;
  /* Window function can be computed over this sorting */