item_windowfunc.h 15.1 KB
Newer Older
1 2 3 4 5 6 7 8
#ifndef ITEM_WINDOWFUNC_INCLUDED
#define ITEM_WINDOWFUNC_INCLUDED

#include "my_global.h"
#include "item.h"

class Window_spec;

9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56

int test_if_group_changed(List<Cached_item> &list);

/* A wrapper around test_if_group_changed */
class Group_bound_tracker
{
  List<Cached_item> group_fields;
public:
  void init(THD *thd, SQL_I_List<ORDER> *list)
  {
    for (ORDER *curr = list->first; curr; curr=curr->next) 
    {
      Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
      group_fields.push_back(tmp);
    }
  }

  void cleanup()
  {
    group_fields.empty();
  }

  /*
    Check if the current row is in a different group than the previous row
    this function was called for.
    The new row's group becomes the current row's group.
  */
  bool check_if_next_group()
  {
    if (test_if_group_changed(group_fields) > -1)
      return true;
    return false;
  }

  int compare_with_cache()
  {
    List_iterator<Cached_item> li(group_fields);
    Cached_item *ptr;
    int res;
    while ((ptr= li++))
    {
      if ((res= ptr->cmp_read_only()))
        return res;
    }
    return 0;
  }
};

Sergei Petrunia's avatar
Sergei Petrunia committed
57 58 59
/*
  ROW_NUMBER() OVER (...)

60
  @detail
Sergei Petrunia's avatar
Sergei Petrunia committed
61 62 63 64
  - This is a Window function (not just an aggregate)
  - It can be computed by doing one pass over select output, provided 
    the output is sorted according to the window definition.
*/
65 66 67 68 69

class Item_sum_row_number: public Item_sum_int
{
  longlong count;

70 71 72 73 74 75 76 77 78 79
public:
  void clear()
  {
    count= 0;
  }
  bool add() 
  {
    count++;
    return false; 
  }
80 81 82 83 84
  void update_field() {}

  Item_sum_row_number(THD *thd)
    : Item_sum_int(thd),  count(0) {}

85
  enum Sumfunctype sum_func() const
86 87 88 89
  {
    return ROW_NUMBER_FUNC;
  }

90 91 92 93
  longlong val_int()
  {
    return count;
  }
94 95 96 97 98 99 100
  const char*func_name() const
  {
    return "row_number";
  }
  
};

Sergei Petrunia's avatar
Sergei Petrunia committed
101 102 103 104

/*
  RANK() OVER (...) Windowing function

105
  @detail
Sergei Petrunia's avatar
Sergei Petrunia committed
106 107 108
  - This is a Window function (not just an aggregate)
  - It can be computed by doing one pass over select output, provided 
    the output is sorted according to the window definition.
109 110 111 112 113 114 115 116

  The function is defined as:

  "The rank of row R is defined as 1 (one) plus the number of rows that 
  precede R and are not peers of R"

  "This implies that if two or more rows are not distinct with respect to 
  the window ordering, then there will be one or more"
Sergei Petrunia's avatar
Sergei Petrunia committed
117 118
*/

119 120
class Item_sum_rank: public Item_sum_int
{
121
protected:
122 123
  longlong row_number; // just ROW_NUMBER()
  longlong cur_rank;   // current value
Sergei Petrunia's avatar
Sergei Petrunia committed
124
  
125
  Group_bound_tracker peer_tracker;
126
public:
Sergei Petrunia's avatar
Sergei Petrunia committed
127 128
  void clear()
  {
129 130 131
    /* This is called on partition start */
    cur_rank= 1;
    row_number= 0;
Sergei Petrunia's avatar
Sergei Petrunia committed
132
  }
133 134 135 136 137 138

  bool add();

  longlong val_int()
  {
    return cur_rank;
Sergei Petrunia's avatar
Sergei Petrunia committed
139
  }
140

141
  void update_field() {}
142 143 144 145
  /*
   void reset_field();
    TODO: ^^ what does this do ? It is not called ever?
  */
146

147
public:
148
  Item_sum_rank(THD *thd)
149
    : Item_sum_int(thd) {}
150 151 152 153 154 155 156 157 158 159

  enum Sumfunctype sum_func () const
  {
    return RANK_FUNC;
  }

  const char*func_name() const
  {
    return "rank";
  }
160

161
  void setup_window_func(THD *thd, Window_spec *window_spec);
162 163 164 165 166
  void cleanup()
  {
    peer_tracker.cleanup();
    Item_sum_int::cleanup();
  }
167 168
};

Sergei Petrunia's avatar
Sergei Petrunia committed
169 170

/*
171
  DENSE_RANK() OVER (...) Windowing function
Sergei Petrunia's avatar
Sergei Petrunia committed
172

173
  @detail
Sergei Petrunia's avatar
Sergei Petrunia committed
174 175 176
  - This is a Window function (not just an aggregate)
  - It can be computed by doing one pass over select output, provided 
    the output is sorted according to the window definition.
177 178 179 180 181 182 183 184 185

  The function is defined as:

  "If DENSE_RANK is specified, then the rank of row R is defined as the 
  number of rows preceding and including R that are distinct with respect 
  to the window ordering"

  "This implies that there are no gaps in the sequential rank numbering of
  rows in each window partition."
Sergei Petrunia's avatar
Sergei Petrunia committed
186 187
*/

188

189 190 191
class Item_sum_dense_rank: public Item_sum_int
{
  longlong dense_rank;
192
  Group_bound_tracker peer_tracker;
Vicențiu Ciorbaru's avatar
Vicențiu Ciorbaru committed
193 194 195 196 197 198
  /*
     XXX(cvicentiu) This class could potentially be implemented in the rank
     class, with a switch for the DENSE case.
  */
  void clear()
  {
199 200 201
    dense_rank= 1;
  }
  bool add();
202
  void update_field() {}
Vicențiu Ciorbaru's avatar
Vicențiu Ciorbaru committed
203 204
  longlong val_int()
  {
205 206
    return dense_rank;
  }
207 208 209 210 211 212 213 214 215 216 217 218 219

 public:
  Item_sum_dense_rank(THD *thd)
    : Item_sum_int(thd), dense_rank(0) {}
  enum Sumfunctype sum_func () const
  {
    return DENSE_RANK_FUNC;
  }

  const char*func_name() const
  {
    return "dense_rank";
  }
220 221 222

  void setup_window_func(THD *thd, Window_spec *window_spec);

223 224 225 226 227
  void cleanup()
  {
    peer_tracker.cleanup();
    Item_sum_int::cleanup();
  }
228 229
};

230
/*
231 232
  A base window function (aggregate) that also holds a counter for the number
  of rows.
233
*/
234
class Item_sum_window_with_row_count : public Item_sum_num
235 236
{
 public:
237 238 239 240 241 242 243 244 245
  Item_sum_window_with_row_count(THD *thd) : Item_sum_num(thd),
                                             partition_row_count_(0){}

  void set_row_count(ulonglong count) { partition_row_count_ = count; }

 protected:
  longlong get_row_count() { return partition_row_count_; }
 private:
  ulonglong partition_row_count_;
246
};
247 248 249 250 251 252 253 254 255

/*
  @detail
  "The relative rank of a row R is defined as (RK-1)/(NR-1), where RK is 
  defined to be the RANK of R and NR is defined to be the number of rows in
  the window partition of R."

  Computation of this function requires two passes:
  - First pass to find #rows in the partition
256
    This is held within the row_count context.
257 258
  - Second pass to compute rank of current row and the value of the function
*/
259
class Item_sum_percent_rank: public Item_sum_window_with_row_count
260 261 262
{
 public:
  Item_sum_percent_rank(THD *thd)
263
    : Item_sum_window_with_row_count(thd), cur_rank(1) {}
264

265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
  longlong val_int()
  {
   /*
      Percent rank is a real value so calling the integer value should never
      happen. It makes no sense as it gets truncated to either 0 or 1.
   */
    DBUG_ASSERT(0);
    return 0;
  }

  double val_real()
  {
   /*
     We can not get the real value without knowing the number of rows
     in the partition. Don't divide by 0.
   */
281 282 283
   ulonglong partition_rows = get_row_count();
   null_value= partition_rows > 0 ? false : true;

284 285 286
   return partition_rows > 1 ?
             static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0;
  }
287 288 289 290 291 292 293 294 295 296

  enum Sumfunctype sum_func () const
  {
    return PERCENT_RANK_FUNC;
  }

  const char*func_name() const
  {
    return "percent_rank";
  }
297 298 299 300 301 302 303 304 305 306

  void update_field() {}

  void clear()
  {
    cur_rank= 1;
    row_number= 0;
  }
  bool add();
  enum Item_result result_type () const { return REAL_RESULT; }
307 308
  enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }

309 310 311 312 313 314 315 316 317 318 319 320
  void fix_length_and_dec()
  {
    decimals = 10;  // TODO-cvicentiu find out how many decimals the standard
                    // requires.
  }

  void setup_window_func(THD *thd, Window_spec *window_spec);

 private:
  longlong cur_rank;   // Current rank of the current row.
  longlong row_number; // Value if this were ROW_NUMBER() function.

321 322 323 324 325
  Group_bound_tracker peer_tracker;

  void cleanup()
  {
    peer_tracker.cleanup();
326
    Item_sum_num::cleanup();
327
  }
328 329
};

330

331 332


333 334 335 336 337 338 339
/*
  @detail
  "The relative rank of a row R is defined as NP/NR, where 
  - NP is defined to be the number of rows preceding or peer with R in the 
    window ordering of the window partition of R
  - NR is defined to be the number of rows in the window partition of R.

340
  Just like with Item_sum_percent_rank, computation of this function requires
341 342 343
  two passes.
*/

344
class Item_sum_cume_dist: public Item_sum_window_with_row_count
345 346
{
 public:
347 348
  Item_sum_cume_dist(THD *thd) : Item_sum_window_with_row_count(thd),
                                 current_row_count_(0) {}
349

350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
  double val_real()
  {
    if (get_row_count() == 0)
    {
      null_value= true;
      return 0;
    }
    ulonglong partition_row_count= get_row_count();
    null_value= false;
    return static_cast<double>(current_row_count_) / partition_row_count;
  }

  bool add()
  {
    current_row_count_++;
    return false;
  }
367

368
  enum Sumfunctype sum_func() const
369 370 371 372
  {
    return CUME_DIST_FUNC;
  }

373 374 375 376 377 378
  void clear()
  {
    current_row_count_= 0;
    set_row_count(0);
  }

379 380 381 382
  const char*func_name() const
  {
    return "cume_dist";
  }
383 384 385 386 387 388 389 390 391 392 393 394 395

  void update_field() {}
  enum Item_result result_type () const { return REAL_RESULT; }
  enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }

  void fix_length_and_dec()
  {
    decimals = 10;  // TODO-cvicentiu find out how many decimals the standard
                    // requires.
  }

 private:
  ulonglong current_row_count_;
396 397
};

398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
class Item_sum_ntile : public Item_sum_window_with_row_count
{
 public:
  Item_sum_ntile(THD* thd, ulong num_quantiles) :
    Item_sum_window_with_row_count(thd), num_quantiles_(num_quantiles),
    current_row_count_(0) {};

  double val_real()
  {
    return val_int();
  }

  longlong val_int()
  {
    if (get_row_count() == 0)
    {
      null_value= true;
      return 0;
    }
    null_value= false;
    ulonglong quantile_size = get_row_count() / num_quantiles_;
    ulonglong extra_rows = get_row_count() - quantile_size * num_quantiles_;

    if (current_row_count_ <= extra_rows * (quantile_size + 1))
      return (current_row_count_ - 1) / (quantile_size + 1) + 1;

    return (current_row_count_ - 1 - extra_rows) / quantile_size + 1;
  }

  bool add()
  {
    current_row_count_++;
    return false;
  }

  enum Sumfunctype sum_func() const
  {
    return NTILE_FUNC;
  }

  void clear()
  {
    current_row_count_= 0;
    set_row_count(0);
  }

  const char*func_name() const
  {
    return "ntile";
  }

  void update_field() {}

  enum Item_result result_type () const { return INT_RESULT; }
  enum_field_types field_type() const { return MYSQL_TYPE_LONGLONG; }

 private:
  ulong num_quantiles_;
  ulong current_row_count_;
};

459

460
class Item_window_func : public Item_func_or_sum
461
{
462
  /* Window function parameters as we've got them from the parser */
463
public:
464
  LEX_STRING *window_name;
465
public:
466
  Window_spec *window_spec;
467 468
  
  /*
469
    This stores the data about the partition we're currently in.
470
    advance_window() uses this to tell when we've left one partition and
471
    entered another
472
  */
473
  Group_bound_tracker partition_tracker;
474 475
public:
  Item_window_func(THD *thd, Item_sum *win_func, LEX_STRING *win_name)
476
    : Item_func_or_sum(thd, (Item *) win_func),
477
      window_name(win_name), window_spec(NULL), 
478
      force_return_blank(true),
Sergei Petrunia's avatar
Sergei Petrunia committed
479
      read_value_from_result_field(false) {}
480 481

  Item_window_func(THD *thd, Item_sum *win_func, Window_spec *win_spec)
482
    : Item_func_or_sum(thd, (Item *) win_func), 
483
      window_name(NULL), window_spec(win_spec), 
484
      force_return_blank(true),
Sergei Petrunia's avatar
Sergei Petrunia committed
485
      read_value_from_result_field(false) {}
486

487
  Item_sum *window_func() const { return (Item_sum *) args[0]; }
488 489

  void update_used_tables();
490

491
  bool is_frame_prohibited() const
492
  {
493
    switch (window_func()->sum_func()) {
494 495 496 497 498
    case Item_sum::ROW_NUMBER_FUNC:
    case Item_sum::RANK_FUNC:
    case Item_sum::DENSE_RANK_FUNC:
    case Item_sum::PERCENT_RANK_FUNC:
    case Item_sum::CUME_DIST_FUNC:
499
    case Item_sum::NTILE_FUNC:
500 501 502
      return true;
    default: 
      return false;
503 504 505 506 507 508 509 510
    }
  }

  bool requires_partition_size() const
  {
    switch (window_func()->sum_func()) {
    case Item_sum::PERCENT_RANK_FUNC:
    case Item_sum::CUME_DIST_FUNC:
511
    case Item_sum::NTILE_FUNC:
512 513 514 515 516 517 518 519 520 521 522 523 524
      return true;
    default:
      return false;
    }
  }

  bool requires_peer_size() const
  {
    switch (window_func()->sum_func()) {
    case Item_sum::CUME_DIST_FUNC:
      return true;
    default:
      return false;
525
    }
526 527
  }

528
  bool is_order_list_mandatory() const
529
  {
530
    switch (window_func()->sum_func()) {
531 532 533 534 535 536 537 538
    case Item_sum::RANK_FUNC:
    case Item_sum::DENSE_RANK_FUNC:
    case Item_sum::PERCENT_RANK_FUNC:
    case Item_sum::CUME_DIST_FUNC:
      return true;
    default: 
      return false;
    }
539 540
  }  

541 542
  /*
    Computation functions.
543
    TODO: consoder merging these with class Group_bound_tracker.
544 545
  */
  void setup_partition_border_check(THD *thd);
546

547
  void advance_window();
548
  bool check_if_partition_changed();
549

550 551 552 553
  enum_field_types field_type() const
  { 
    return ((Item_sum *) args[0])->field_type(); 
  }
554
  enum Item::Type type() const { return Item::WINDOW_FUNC_ITEM; }
Sergei Petrunia's avatar
Sergei Petrunia committed
555 556

private:
Sergei Petrunia's avatar
Sergei Petrunia committed
557
  /* 
Sergei Petrunia's avatar
Sergei Petrunia committed
558
    Window functions are very special functions, so val_() methods have
Sergei Petrunia's avatar
Sergei Petrunia committed
559 560
    special meaning for them:

561 562 563
    - Phase#1, "Initial" we run the join and put its result into temporary 
      table. For window functions, we write the default value (NULL?) as 
      a placeholder.
Sergei Petrunia's avatar
Sergei Petrunia committed
564
      
565 566 567 568
    - Phase#2: "Computation": executor does the scan in {PARTITION, ORDER BY} 
      order of this window function. It calls appropriate methods to inform 
      the window function about rows entering/leaving the window. 
      It calls window_func()->val_int() so that current window function value
Sergei Petrunia's avatar
Sergei Petrunia committed
569 570
      can be saved and stored in the temp.table.

571 572 573
    - Phase#3: "Retrieval" the temporary table is read and passed to query 
      output. However, Item_window_func still remains in the select list,
      so item_windowfunc->val_int() will be called.
Sergei Petrunia's avatar
Sergei Petrunia committed
574
      During Phase#3, read_value_from_result_field= true.
Sergei Petrunia's avatar
Sergei Petrunia committed
575
  */
576
  bool force_return_blank;
577 578 579
  bool read_value_from_result_field;

public:
580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
  void set_phase_to_initial()
  {
    force_return_blank= true;
    read_value_from_result_field= false;
  }
  void set_phase_to_computation()
  {
    force_return_blank= false;
    read_value_from_result_field= false;
  }
  void set_phase_to_retrieval()
  {
    force_return_blank= false;
    read_value_from_result_field= true;
  }

596 597
  double val_real() 
  {
598
    double res;
599
    if (force_return_blank)
600 601 602 603 604 605 606
    {
      res= 0.0;
      null_value= false;
    }
    else if (read_value_from_result_field)
    {
      res= result_field->val_real();
607
      null_value= result_field->is_null();
608 609 610
    }
    else
    {
611 612
      res= window_func()->val_real();
      null_value= window_func()->null_value;
613
    }
614
    return res;
615
  }
616

617
  longlong val_int()
618
  {
619
    longlong res;
620
    if (force_return_blank)
621 622 623 624 625 626 627 628 629 630 631
    {
      res= 0;
      null_value= false;
    }
    else if (read_value_from_result_field)
    {
      res= result_field->val_int();
      null_value= result_field->is_null();
    }
    else
    {
632 633
      res= window_func()->val_int();
      null_value= window_func()->null_value;
634 635
    }
    return res;
636
  }
637

638 639
  String* val_str(String* str)
  {
640
    String *res;
641
    if (force_return_blank)
642 643 644 645 646 647 648 649 650 651 652 653 654 655
    {
      null_value= false;
      str->length(0);
      res= str;
    }
    else if (read_value_from_result_field)
    {
      if ((null_value= result_field->is_null()))
        res= NULL;
      else
        res= result_field->val_str(str);
    }
    else
    {
656 657
      res= window_func()->val_str(str);
      null_value= window_func()->null_value;
658 659
    }
    return res;
660
  }
661 662

  my_decimal* val_decimal(my_decimal* dec)
663 664
  {
    my_decimal *res;
665
    if (force_return_blank)
666 667
    {
      my_decimal_set_zero(dec);
668 669 670 671 672 673 674 675 676 677 678 679
      null_value= false;
      res= dec;
    }
    else if (read_value_from_result_field)
    {
      if ((null_value= result_field->is_null()))
        res= NULL;
      else
        res= result_field->val_decimal(dec);
    }
    else
    {
680 681
      res= window_func()->val_decimal(dec);
      null_value= window_func()->null_value;
682
    }
683
    return res;
684
  }
685

686 687
  void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
                              List<Item> &fields, uint flags);
688 689
  void fix_length_and_dec()
  {
690
    decimals = window_func()->decimals;
691
  }
692 693 694 695

  const char* func_name() const { return "WF"; }

  bool fix_fields(THD *thd, Item **ref);
696

697
  bool resolve_window_name(THD *thd);
698

699 700 701
};

#endif /* ITEM_WINDOWFUNC_INCLUDED */