opt_range.cc 318 KB
Newer Older
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

17 18 19 20 21
/*
  TODO:
  Fix that MAYBE_KEY are stored in the tree so that we can detect use
  of full hash keys for queries like:

22 23
  select s.id, kws.keyword_id from sites as s,kws where s.id=kws.site_id and kws.keyword_id in (204,205);

24 25
*/

26
/*
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
  This file contains:

  RangeAnalysisModule  
    A module that accepts a condition, index (or partitioning) description, 
    and builds lists of intervals (in index/partitioning space), such that 
    all possible records that match the condition are contained within the 
    intervals.
    The entry point for the range analysis module is get_mm_tree() function.
    
    The lists are returned in form of complicated structure of interlinked
    SEL_TREE/SEL_IMERGE/SEL_ARG objects.
    See check_quick_keys, find_used_partitions for examples of how to walk 
    this structure.
    All direct "users" of this module are located within this file, too.


  PartitionPruningModule
    A module that accepts a partitioned table, condition, and finds which
    partitions we will need to use in query execution. Search down for
    "PartitionPruningModule" for description.
    The module has single entry point - prune_partitions() function.


  Range/index_merge/groupby-minmax optimizer module  
    A module that accepts a table, condition, and returns 
     - a QUICK_*_SELECT object that can be used to retrieve rows that match
       the specified condition, or a "no records will match the condition" 
       statement.

    The module entry points are
      test_quick_select()
      get_quick_select_for_ref()


  Record retrieval code for range/index_merge/groupby-min-max.
    Implementations of QUICK_*_SELECT classes.
63 64
*/

65
#ifdef USE_PRAGMA_IMPLEMENTATION
bk@work.mysql.com's avatar
bk@work.mysql.com committed
66 67 68 69 70 71 72 73 74 75 76 77
#pragma implementation				// gcc: Class implementation
#endif

#include "mysql_priv.h"
#include <m_ctype.h>
#include "sql_select.h"

#ifndef EXTRA_DEBUG
#define test_rb_tree(A,B) {}
#define test_use_count(A) {}
#endif

78
/*
79
  Convert double value to #rows. Currently this does floor(), and we
80 81
  might consider using round() instead.
*/
82
#define double2rows(x) ((ha_rows)(x))
83

bk@work.mysql.com's avatar
bk@work.mysql.com committed
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
static int sel_cmp(Field *f,char *a,char *b,uint8 a_flag,uint8 b_flag);

static char is_null_string[2]= {1,0};

class SEL_ARG :public Sql_alloc
{
public:
  uint8 min_flag,max_flag,maybe_flag;
  uint8 part;					// Which key part
  uint8 maybe_null;
  uint16 elements;				// Elements in tree
  ulong use_count;				// use of this sub_tree
  Field *field;
  char *min_value,*max_value;			// Pointer to range

  SEL_ARG *left,*right,*next,*prev,*parent,*next_key_part;
  enum leaf_color { BLACK,RED } color;
  enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type;

  SEL_ARG() {}
  SEL_ARG(SEL_ARG &);
  SEL_ARG(Field *,const char *,const char *);
  SEL_ARG(Field *field, uint8 part, char *min_value, char *max_value,
	  uint8 min_flag, uint8 max_flag, uint8 maybe_flag);
  SEL_ARG(enum Type type_arg)
109 110 111
    :elements(1),use_count(1),left(0),next_key_part(0),color(BLACK),
     type(type_arg)
  {}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
112 113
  inline bool is_same(SEL_ARG *arg)
  {
114
    if (type != arg->type || part != arg->part)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
      return 0;
    if (type != KEY_RANGE)
      return 1;
    return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0;
  }
  inline void merge_flags(SEL_ARG *arg) { maybe_flag|=arg->maybe_flag; }
  inline void maybe_smaller() { maybe_flag=1; }
  inline int cmp_min_to_min(SEL_ARG* arg)
  {
    return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag);
  }
  inline int cmp_min_to_max(SEL_ARG* arg)
  {
    return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag);
  }
  inline int cmp_max_to_max(SEL_ARG* arg)
  {
    return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag);
  }
  inline int cmp_max_to_min(SEL_ARG* arg)
  {
    return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag);
  }
  SEL_ARG *clone_and(SEL_ARG* arg)
  {						// Get overlapping range
    char *new_min,*new_max;
    uint8 flag_min,flag_max;
    if (cmp_min_to_min(arg) >= 0)
    {
      new_min=min_value; flag_min=min_flag;
    }
    else
    {
      new_min=arg->min_value; flag_min=arg->min_flag; /* purecov: deadcode */
    }
    if (cmp_max_to_max(arg) <= 0)
    {
      new_max=max_value; flag_max=max_flag;
    }
    else
    {
      new_max=arg->max_value; flag_max=arg->max_flag;
    }
    return new SEL_ARG(field, part, new_min, new_max, flag_min, flag_max,
		       test(maybe_flag && arg->maybe_flag));
  }
  SEL_ARG *clone_first(SEL_ARG *arg)
  {						// min <= X < arg->min
    return new SEL_ARG(field,part, min_value, arg->min_value,
		       min_flag, arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX,
		       maybe_flag | arg->maybe_flag);
  }
  SEL_ARG *clone_last(SEL_ARG *arg)
  {						// min <= X <= key_max
    return new SEL_ARG(field, part, min_value, arg->max_value,
		       min_flag, arg->max_flag, maybe_flag | arg->maybe_flag);
  }
  SEL_ARG *clone(SEL_ARG *new_parent,SEL_ARG **next);

  bool copy_min(SEL_ARG* arg)
  {						// Get overlapping range
    if (cmp_min_to_min(arg) > 0)
    {
      min_value=arg->min_value; min_flag=arg->min_flag;
      if ((max_flag & (NO_MAX_RANGE | NO_MIN_RANGE)) ==
	  (NO_MAX_RANGE | NO_MIN_RANGE))
	return 1;				// Full range
    }
    maybe_flag|=arg->maybe_flag;
    return 0;
  }
  bool copy_max(SEL_ARG* arg)
  {						// Get overlapping range
    if (cmp_max_to_max(arg) <= 0)
    {
      max_value=arg->max_value; max_flag=arg->max_flag;
      if ((max_flag & (NO_MAX_RANGE | NO_MIN_RANGE)) ==
	  (NO_MAX_RANGE | NO_MIN_RANGE))
	return 1;				// Full range
    }
    maybe_flag|=arg->maybe_flag;
    return 0;
  }

  void copy_min_to_min(SEL_ARG *arg)
  {
    min_value=arg->min_value; min_flag=arg->min_flag;
  }
  void copy_min_to_max(SEL_ARG *arg)
  {
    max_value=arg->min_value;
    max_flag=arg->min_flag & NEAR_MIN ? 0 : NEAR_MAX;
  }
  void copy_max_to_min(SEL_ARG *arg)
  {
    min_value=arg->max_value;
    min_flag=arg->max_flag & NEAR_MAX ? 0 : NEAR_MIN;
  }
213
  void store_min(uint length,char **min_key,uint min_key_flag)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
214
  {
215 216 217
    if ((min_flag & GEOM_FLAG) ||
        (!(min_flag & NO_MIN_RANGE) &&
	!(min_key_flag & (NO_MIN_RANGE | NEAR_MIN))))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
218 219 220 221
    {
      if (maybe_null && *min_value)
      {
	**min_key=1;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
222
	bzero(*min_key+1,length-1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
223 224
      }
      else
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
225 226
	memcpy(*min_key,min_value,length);
      (*min_key)+= length;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
227
    }
228
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
229 230 231
  void store(uint length,char **min_key,uint min_key_flag,
	     char **max_key, uint max_key_flag)
  {
232
    store_min(length, min_key, min_key_flag);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
233 234 235 236 237 238
    if (!(max_flag & NO_MAX_RANGE) &&
	!(max_key_flag & (NO_MAX_RANGE | NEAR_MAX)))
    {
      if (maybe_null && *max_value)
      {
	**max_key=1;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
239
	bzero(*max_key+1,length-1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
240 241
      }
      else
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
242 243
	memcpy(*max_key,max_value,length);
      (*max_key)+= length;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
244 245 246 247 248 249
    }
  }

  void store_min_key(KEY_PART *key,char **range_key, uint *range_key_flag)
  {
    SEL_ARG *key_tree= first();
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
250
    key_tree->store(key[key_tree->part].store_length,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
251 252 253 254 255 256 257 258 259 260 261 262
		    range_key,*range_key_flag,range_key,NO_MAX_RANGE);
    *range_key_flag|= key_tree->min_flag;
    if (key_tree->next_key_part &&
	key_tree->next_key_part->part == key_tree->part+1 &&
	!(*range_key_flag & (NO_MIN_RANGE | NEAR_MIN)) &&
	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
      key_tree->next_key_part->store_min_key(key,range_key, range_key_flag);
  }

  void store_max_key(KEY_PART *key,char **range_key, uint *range_key_flag)
  {
    SEL_ARG *key_tree= last();
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
263
    key_tree->store(key[key_tree->part].store_length,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
		    range_key, NO_MIN_RANGE, range_key,*range_key_flag);
    (*range_key_flag)|= key_tree->max_flag;
    if (key_tree->next_key_part &&
	key_tree->next_key_part->part == key_tree->part+1 &&
	!(*range_key_flag & (NO_MAX_RANGE | NEAR_MAX)) &&
	key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
      key_tree->next_key_part->store_max_key(key,range_key, range_key_flag);
  }

  SEL_ARG *insert(SEL_ARG *key);
  SEL_ARG *tree_delete(SEL_ARG *key);
  SEL_ARG *find_range(SEL_ARG *key);
  SEL_ARG *rb_insert(SEL_ARG *leaf);
  friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par);
#ifdef EXTRA_DEBUG
  friend int test_rb_tree(SEL_ARG *element,SEL_ARG *parent);
  void test_use_count(SEL_ARG *root);
#endif
  SEL_ARG *first();
  SEL_ARG *last();
  void make_root();
  inline bool simple_key()
  {
    return !next_key_part && elements == 1;
  }
  void increment_use_count(long count)
  {
    if (next_key_part)
    {
      next_key_part->use_count+=count;
      count*= (next_key_part->use_count-count);
      for (SEL_ARG *pos=next_key_part->first(); pos ; pos=pos->next)
	if (pos->next_key_part)
	  pos->increment_use_count(count);
    }
  }
  void free_tree()
  {
    for (SEL_ARG *pos=first(); pos ; pos=pos->next)
      if (pos->next_key_part)
      {
	pos->next_key_part->use_count--;
	pos->next_key_part->free_tree();
      }
  }

  inline SEL_ARG **parent_ptr()
  {
    return parent->left == this ? &parent->left : &parent->right;
  }
  SEL_ARG *clone_tree();
315

316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332

  /*
    Check if this SEL_ARG object represents a single-point interval

    SYNOPSIS
      is_singlepoint()
    
    DESCRIPTION
      Check if this SEL_ARG object (not tree) represents a single-point
      interval, i.e. if it represents a "keypart = const" or 
      "keypart IS NULL".

    RETURN
      TRUE   This SEL_ARG object represents a singlepoint interval
      FALSE  Otherwise
  */

333 334
  bool is_singlepoint()
  {
335 336 337 338 339 340
    /* 
      Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field) 
      flags, and the same for right edge.
    */
    if (min_flag || max_flag)
      return FALSE;
341
    byte *min_val= (byte *)min_value;
342
    byte *max_val= (byte *)max_value;
343 344 345 346 347 348 349 350 351 352 353 354 355

    if (maybe_null)
    {
      /* First byte is a NULL value indicator */
      if (*min_val != *max_val)
        return FALSE;

      if (*min_val)
        return TRUE; /* This "x IS NULL" */
      min_val++;
      max_val++;
    }
    return !field->key_cmp(min_val, max_val);
356
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
357 358
};

359
class SEL_IMERGE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
360

361

bk@work.mysql.com's avatar
bk@work.mysql.com committed
362 363 364
class SEL_TREE :public Sql_alloc
{
public:
365 366 367 368 369
  /*
    Starting an effort to document this field:
    (for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) => 
       (type == SEL_TREE::IMPOSSIBLE)
  */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
370 371
  enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
  SEL_TREE(enum Type type_arg) :type(type_arg) {}
372
  SEL_TREE() :type(KEY)
373
  {
374
    keys_map.clear_all();
375 376
    bzero((char*) keys,sizeof(keys));
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
377
  SEL_ARG *keys[MAX_KEY];
378 379
  key_map keys_map;        /* bitmask of non-NULL elements in keys */

380 381
  /*
    Possible ways to read rows using index_merge. The list is non-empty only
382 383 384
    if type==KEY. Currently can be non empty only if keys_map.is_clear_all().
  */
  List<SEL_IMERGE> merges;
385

386 387
  /* The members below are filled/used only after get_mm_tree is done */
  key_map ror_scans_map;   /* bitmask of ROR scan-able elements in keys */
388
  uint    n_ror_scans;     /* number of set bits in ror_scans_map */
389 390 391 392

  struct st_ror_scan_info **ror_scans;     /* list of ROR key scans */
  struct st_ror_scan_info **ror_scans_end; /* last ROR scan */
  /* Note that #records for each key scan is stored in table->quick_rows */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
393 394
};

395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
class RANGE_OPT_PARAM
{
public:
  THD	*thd;   /* Current thread handle */
  TABLE *table; /* Table being analyzed */
  COND *cond;   /* Used inside get_mm_tree(). */
  table_map prev_tables;
  table_map read_tables;
  table_map current_table; /* Bit of the table being analyzed */

  /* Array of parts of all keys for which range analysis is performed */
  KEY_PART *key_parts;
  KEY_PART *key_parts_end;
  MEM_ROOT *mem_root; /* Memory that will be freed when range analysis completes */
  MEM_ROOT *old_root; /* Memory that will last until the query end */
  /*
    Number of indexes used in range analysis (In SEL_TREE::keys only first
    #keys elements are not empty)
  */
  uint keys;
  
  /* 
    If true, the index descriptions describe real indexes (and it is ok to
    call field->optimize_range(real_keynr[...], ...).
    Otherwise index description describes fake indexes.
  */
  bool using_real_indexes;
  
423 424
  bool remove_jump_scans;
  
425 426 427 428 429 430
  /*
    used_key_no -> table_key_no translation table. Only makes sense if
    using_real_indexes==TRUE
  */
  uint real_keynr[MAX_KEY];
};
bk@work.mysql.com's avatar
bk@work.mysql.com committed
431

432 433 434
class PARAM : public RANGE_OPT_PARAM
{
public:
435
  KEY_PART *key[MAX_KEY]; /* First key parts of keys used in the query */
436
  uint baseflag, max_key_part, range_count;
437

438

bk@work.mysql.com's avatar
bk@work.mysql.com committed
439 440
  char min_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH],
    max_key[MAX_KEY_LENGTH+MAX_FIELD_WIDTH];
441
  bool quick;				// Don't calulate possible keys
442

443
  uint fields_bitmap_size;
444 445 446 447
  MY_BITMAP needed_fields;    /* bitmask of fields needed by the query */

  key_map *needed_reg;        /* ptr to SQL_SELECT::needed_reg */

448 449
  uint *imerge_cost_buff;     /* buffer for index_merge cost estimates */
  uint imerge_cost_buff_size; /* size of the buffer */
450

451
  /* TRUE if last checked tree->key can be used for ROR-scan */
452
  bool is_ror_scan;
453
};
bk@work.mysql.com's avatar
bk@work.mysql.com committed
454

455 456 457 458 459
class TABLE_READ_PLAN;
  class TRP_RANGE;
  class TRP_ROR_INTERSECT;
  class TRP_ROR_UNION;
  class TRP_ROR_INDEX_MERGE;
460
  class TRP_GROUP_MIN_MAX;
461 462 463

struct st_ror_scan_info;

464
static SEL_TREE * get_mm_parts(RANGE_OPT_PARAM *param,COND *cond_func,Field *field,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
465 466
			       Item_func::Functype type,Item *value,
			       Item_result cmp_type);
467
static SEL_ARG *get_mm_leaf(RANGE_OPT_PARAM *param,COND *cond_func,Field *field,
468
			    KEY_PART *key_part,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
469
			    Item_func::Functype type,Item *value);
470
static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond);
471 472

static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
473 474 475 476 477
static ha_rows check_quick_select(PARAM *param,uint index,SEL_ARG *key_tree);
static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
				char *min_key,uint min_key_flag,
				char *max_key, uint max_key_flag);

478
QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
479
                                     SEL_ARG *key_tree,
480
                                     MEM_ROOT *alloc = NULL);
481
static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
482
                                       bool index_read_must_be_used,
483 484 485 486 487 488
                                       double read_time);
static
TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
                                          double read_time,
                                          bool *are_all_covering);
static
489 490
TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
                                                   SEL_TREE *tree,
491 492 493 494
                                                   double read_time);
static
TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
                                         double read_time);
495 496
static
TRP_GROUP_MIN_MAX *get_best_group_min_max(PARAM *param, SEL_TREE *tree);
497
static int get_index_merge_params(PARAM *param, key_map& needed_reg,
498
                           SEL_IMERGE *imerge, double *read_time,
499
                           ha_rows* imerge_rows);
500
static double get_index_only_read_time(const PARAM* param, ha_rows records,
501 502
                                       int keynr);

bk@work.mysql.com's avatar
bk@work.mysql.com committed
503
#ifndef DBUG_OFF
504 505
static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
                           const char *msg);
506 507
static void print_ror_scans_arr(TABLE *table, const char *msg,
                                struct st_ror_scan_info **start,
508 509 510
                                struct st_ror_scan_info **end);
static void print_rowid(byte* val, int len);
static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
511
#endif
512

513 514
static SEL_TREE *tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
static SEL_TREE *tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
515 516 517 518
static SEL_ARG *sel_add(SEL_ARG *key1,SEL_ARG *key2);
static SEL_ARG *key_or(SEL_ARG *key1,SEL_ARG *key2);
static SEL_ARG *key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag);
static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
519
bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
520 521 522 523 524
			   SEL_ARG *key_tree,char *min_key,uint min_key_flag,
			   char *max_key,uint max_key_flag);
static bool eq_tree(SEL_ARG* a,SEL_ARG *b);

static SEL_ARG null_element(SEL_ARG::IMPOSSIBLE);
525
static bool null_part_in_key(KEY_PART *key_part, const char *key,
526
                             uint length);
527
bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param);
528 529 530


/*
531
  SEL_IMERGE is a list of possible ways to do index merge, i.e. it is
532
  a condition in the following form:
533
   (t_1||t_2||...||t_N) && (next)
534

535
  where all t_i are SEL_TREEs, next is another SEL_IMERGE and no pair
536 537 538 539 540 541 542 543 544 545 546
  (t_i,t_j) contains SEL_ARGS for the same index.

  SEL_TREE contained in SEL_IMERGE always has merges=NULL.

  This class relies on memory manager to do the cleanup.
*/

class SEL_IMERGE : public Sql_alloc
{
  enum { PREALLOCED_TREES= 10};
public:
547
  SEL_TREE *trees_prealloced[PREALLOCED_TREES];
548 549 550 551 552 553 554 555 556 557 558
  SEL_TREE **trees;             /* trees used to do index_merge   */
  SEL_TREE **trees_next;        /* last of these trees            */
  SEL_TREE **trees_end;         /* end of allocated space         */

  SEL_ARG  ***best_keys;        /* best keys to read in SEL_TREEs */

  SEL_IMERGE() :
    trees(&trees_prealloced[0]),
    trees_next(trees),
    trees_end(trees + PREALLOCED_TREES)
  {}
559 560 561
  int or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree);
  int or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree);
  int or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge);
562 563 564
};


565
/*
566 567
  Add SEL_TREE to this index_merge without any checks,

568 569
  NOTES
    This function implements the following:
570 571 572 573 574 575 576
      (x_1||...||x_N) || t = (x_1||...||x_N||t), where x_i, t are SEL_TREEs

  RETURN
     0 - OK
    -1 - Out of memory.
*/

577
int SEL_IMERGE::or_sel_tree(RANGE_OPT_PARAM *param, SEL_TREE *tree)
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601
{
  if (trees_next == trees_end)
  {
    const int realloc_ratio= 2;		/* Double size for next round */
    uint old_elements= (trees_end - trees);
    uint old_size= sizeof(SEL_TREE**) * old_elements;
    uint new_size= old_size * realloc_ratio;
    SEL_TREE **new_trees;
    if (!(new_trees= (SEL_TREE**)alloc_root(param->mem_root, new_size)))
      return -1;
    memcpy(new_trees, trees, old_size);
    trees=      new_trees;
    trees_next= trees + old_elements;
    trees_end=  trees + old_elements * realloc_ratio;
  }
  *(trees_next++)= tree;
  return 0;
}


/*
  Perform OR operation on this SEL_IMERGE and supplied SEL_TREE new_tree,
  combining new_tree with one of the trees in this SEL_IMERGE if they both
  have SEL_ARGs for the same key.
602

603 604 605 606 607
  SYNOPSIS
    or_sel_tree_with_checks()
      param    PARAM from SQL_SELECT::test_quick_select
      new_tree SEL_TREE with type KEY or KEY_SMALLER.

608
  NOTES
609
    This does the following:
610 611
    (t_1||...||t_k)||new_tree =
     either
612 613 614
       = (t_1||...||t_k||new_tree)
     or
       = (t_1||....||(t_j|| new_tree)||...||t_k),
615

616
     where t_i, y are SEL_TREEs.
617 618
    new_tree is combined with the first t_j it has a SEL_ARG on common
    key with. As a consequence of this, choice of keys to do index_merge
619 620
    read may depend on the order of conditions in WHERE part of the query.

621
  RETURN
622
    0  OK
623
    1  One of the trees was combined with new_tree to SEL_TREE::ALWAYS,
624 625 626 627
       and (*this) should be discarded.
   -1  An error occurred.
*/

628
int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param, SEL_TREE *new_tree)
629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
{
  for (SEL_TREE** tree = trees;
       tree != trees_next;
       tree++)
  {
    if (sel_trees_can_be_ored(*tree, new_tree, param))
    {
      *tree = tree_or(param, *tree, new_tree);
      if (!*tree)
        return 1;
      if (((*tree)->type == SEL_TREE::MAYBE) ||
          ((*tree)->type == SEL_TREE::ALWAYS))
        return 1;
      /* SEL_TREE::IMPOSSIBLE is impossible here */
      return 0;
    }
  }

647
  /* New tree cannot be combined with any of existing trees. */
648 649 650 651 652 653 654 655 656
  return or_sel_tree(param, new_tree);
}


/*
  Perform OR operation on this index_merge and supplied index_merge list.

  RETURN
    0 - OK
657
    1 - One of conditions in result is always TRUE and this SEL_IMERGE
658 659 660 661
        should be discarded.
   -1 - An error occurred
*/

662
int SEL_IMERGE::or_sel_imerge_with_checks(RANGE_OPT_PARAM *param, SEL_IMERGE* imerge)
663 664 665 666 667 668 669 670 671 672 673 674
{
  for (SEL_TREE** tree= imerge->trees;
       tree != imerge->trees_next;
       tree++)
  {
    if (or_sel_tree_with_checks(param, *tree))
      return 1;
  }
  return 0;
}


675
/*
676
  Perform AND operation on two index_merge lists and store result in *im1.
677 678 679 680 681 682 683 684 685 686 687
*/

inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
{
  im1->concat(im2);
}


/*
  Perform OR operation on 2 index_merge lists, storing result in first list.

688
  NOTES
689 690 691
    The following conversion is implemented:
     (a_1 &&...&& a_N)||(b_1 &&...&& b_K) = AND_i,j(a_i || b_j) =>
      => (a_1||b_1).
692 693

    i.e. all conjuncts except the first one are currently dropped.
694 695
    This is done to avoid producing N*K ways to do index_merge.

monty@mysql.com's avatar
monty@mysql.com committed
696
    If (a_1||b_1) produce a condition that is always TRUE, NULL is returned
697
    and index_merge is discarded (while it is actually possible to try
698
    harder).
699

700 701
    As a consequence of this, choice of keys to do index_merge read may depend
    on the order of conditions in WHERE part of the query.
702 703

  RETURN
704
    0     OK, result is stored in *im1
705 706 707
    other Error, both passed lists are unusable
*/

708
int imerge_list_or_list(RANGE_OPT_PARAM *param,
709 710 711 712 713 714
                        List<SEL_IMERGE> *im1,
                        List<SEL_IMERGE> *im2)
{
  SEL_IMERGE *imerge= im1->head();
  im1->empty();
  im1->push_back(imerge);
715

716 717 718 719 720 721 722 723
  return imerge->or_sel_imerge_with_checks(param, im2->head());
}


/*
  Perform OR operation on index_merge list and key tree.

  RETURN
724
    0     OK, result is stored in *im1.
725 726 727
    other Error
*/

728
int imerge_list_or_tree(RANGE_OPT_PARAM *param,
729 730 731 732 733
                        List<SEL_IMERGE> *im1,
                        SEL_TREE *tree)
{
  SEL_IMERGE *imerge;
  List_iterator<SEL_IMERGE> it(*im1);
monty@mishka.local's avatar
monty@mishka.local committed
734
  while ((imerge= it++))
735 736 737 738 739 740
  {
    if (imerge->or_sel_tree_with_checks(param, tree))
      it.remove();
  }
  return im1->is_empty();
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
741 742

/***************************************************************************
743
** Basic functions for SQL_SELECT and QUICK_RANGE_SELECT
bk@work.mysql.com's avatar
bk@work.mysql.com committed
744 745 746 747 748 749 750 751 752
***************************************************************************/

	/* make a select from mysql info
	   Error is set as following:
	   0 = ok
	   1 = Got some error (out of memory?)
	   */

SQL_SELECT *make_select(TABLE *head, table_map const_tables,
monty@mysql.com's avatar
monty@mysql.com committed
753 754 755
			table_map read_tables, COND *conds,
                        bool allow_null_cond,
                        int *error)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
756 757 758 759 760
{
  SQL_SELECT *select;
  DBUG_ENTER("make_select");

  *error=0;
761 762

  if (!conds && !allow_null_cond)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
763 764 765
    DBUG_RETURN(0);
  if (!(select= new SQL_SELECT))
  {
766 767
    *error= 1;			// out of memory
    DBUG_RETURN(0);		/* purecov: inspected */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
768 769 770 771 772 773
  }
  select->read_tables=read_tables;
  select->const_tables=const_tables;
  select->head=head;
  select->cond=conds;

igor@hundin.mysql.fi's avatar
igor@hundin.mysql.fi committed
774
  if (head->sort.io_cache)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
775
  {
igor@hundin.mysql.fi's avatar
igor@hundin.mysql.fi committed
776
    select->file= *head->sort.io_cache;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
777 778
    select->records=(ha_rows) (select->file.end_of_file/
			       head->file->ref_length);
igor@hundin.mysql.fi's avatar
igor@hundin.mysql.fi committed
779 780
    my_free((gptr) (head->sort.io_cache),MYF(0));
    head->sort.io_cache=0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
781 782 783 784 785 786 787
  }
  DBUG_RETURN(select);
}


SQL_SELECT::SQL_SELECT() :quick(0),cond(0),free_cond(0)
{
serg@serg.mylan's avatar
serg@serg.mylan committed
788
  quick_keys.clear_all(); needed_reg.clear_all();
bk@work.mysql.com's avatar
bk@work.mysql.com committed
789 790 791 792
  my_b_clear(&file);
}


793
void SQL_SELECT::cleanup()
bk@work.mysql.com's avatar
bk@work.mysql.com committed
794 795
{
  delete quick;
796
  quick= 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
797
  if (free_cond)
798 799
  {
    free_cond=0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
800
    delete cond;
801
    cond= 0;
802
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
803 804 805
  close_cached_file(&file);
}

806 807 808 809 810 811

SQL_SELECT::~SQL_SELECT()
{
  cleanup();
}

812
#undef index					// Fix for Unixware 7
bk@work.mysql.com's avatar
bk@work.mysql.com committed
813

sergefp@mysql.com's avatar
sergefp@mysql.com committed
814 815 816 817 818
QUICK_SELECT_I::QUICK_SELECT_I()
  :max_used_key_length(0),
   used_key_parts(0)
{}

819
QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(THD *thd, TABLE *table, uint key_nr,
sergefp@mysql.com's avatar
sergefp@mysql.com committed
820
                                       bool no_alloc, MEM_ROOT *parent_alloc)
821
  :dont_free(0),error(0),free_file(0),in_range(0),cur_range(NULL),range(0)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
822
{
monty@mysql.com's avatar
monty@mysql.com committed
823
  sorted= 0;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
824 825
  index= key_nr;
  head=  table;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
826
  key_part_info= head->key_info[index].key_part;
827
  my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
828

sergefp@mysql.com's avatar
sergefp@mysql.com committed
829
  /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */
ingo@mysql.com's avatar
ingo@mysql.com committed
830 831 832 833 834 835
  multi_range_bufsiz= thd->variables.read_rnd_buff_size;
  multi_range_count= thd->variables.multi_range_count;
  multi_range_length= 0;
  multi_range= NULL;
  multi_range_buff= NULL;

sergefp@mysql.com's avatar
sergefp@mysql.com committed
836
  if (!no_alloc && !parent_alloc)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
837
  {
838 839
    // Allocates everything through the internal memroot
    init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
840
    thd->mem_root= &alloc;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
841 842 843
  }
  else
    bzero((char*) &alloc,sizeof(alloc));
844 845
  file= head->file;
  record= head->record[0];
bk@work.mysql.com's avatar
bk@work.mysql.com committed
846 847
}

monty@mysql.com's avatar
monty@mysql.com committed
848

849 850
int QUICK_RANGE_SELECT::init()
{
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
851
  DBUG_ENTER("QUICK_RANGE_SELECT::init");
ingo@mysql.com's avatar
ingo@mysql.com committed
852

853 854
  if (file->inited != handler::NONE)
    file->ha_index_or_rnd_end();
tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
855
  DBUG_RETURN(error= file->ha_index_init(index, 1));
monty@mysql.com's avatar
monty@mysql.com committed
856 857 858 859 860 861
}


void QUICK_RANGE_SELECT::range_end()
{
  if (file->inited != handler::NONE)
862
    file->ha_index_or_rnd_end();
bk@work.mysql.com's avatar
bk@work.mysql.com committed
863 864
}

monty@mysql.com's avatar
monty@mysql.com committed
865

866
QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT()
867
{
868
  DBUG_ENTER("QUICK_RANGE_SELECT::~QUICK_RANGE_SELECT");
869 870
  if (!dont_free)
  {
871 872
    /* file is NULL for CPK scan on covering ROR-intersection */
    if (file) 
873
    {
874 875 876 877 878 879
      range_end();
      file->extra(HA_EXTRA_NO_KEYREAD);
      if (free_file)
      {
        DBUG_PRINT("info", ("Freeing separate handler %p (free=%d)", file,
                            free_file));
880
        file->ha_reset();
881
        file->ha_external_lock(current_thd, F_UNLCK);
882
        file->close();
883
        delete file;
884
      }
885
    }
886
    delete_dynamic(&ranges); /* ranges are allocated in alloc */
887 888
    free_root(&alloc,MYF(0));
  }
ingo@mysql.com's avatar
ingo@mysql.com committed
889 890 891 892
  if (multi_range)
    my_free((char*) multi_range, MYF(0));
  if (multi_range_buff)
    my_free((char*) multi_range_buff, MYF(0));
893
  DBUG_VOID_RETURN;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
894 895
}

896

897
QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT(THD *thd_param,
898
                                                   TABLE *table)
sergefp@mysql.com's avatar
sergefp@mysql.com committed
899
  :pk_quick_select(NULL), thd(thd_param)
900
{
901
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::QUICK_INDEX_MERGE_SELECT");
902 903
  index= MAX_KEY;
  head= table;
904
  bzero(&read_record, sizeof(read_record));
905
  init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
906
  DBUG_VOID_RETURN;
907 908 909 910
}

int QUICK_INDEX_MERGE_SELECT::init()
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
911 912
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::init");
  DBUG_RETURN(0);
913 914
}

915
int QUICK_INDEX_MERGE_SELECT::reset()
916
{
917
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::reset");
sergefp@mysql.com's avatar
sergefp@mysql.com committed
918
  DBUG_RETURN(read_keys_and_merge());
919 920
}

921
bool
922 923
QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
{
924 925
  /*
    Save quick_select that does scan on clustered primary key as it will be
926
    processed separately.
927
  */
928
  if (head->file->primary_key_is_clustered() &&
929
      quick_sel_range->index == head->s->primary_key)
930 931 932 933
    pk_quick_select= quick_sel_range;
  else
    return quick_selects.push_back(quick_sel_range);
  return 0;
934 935 936 937
}

QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT()
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
938 939
  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
  QUICK_RANGE_SELECT* quick;
940
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::~QUICK_INDEX_MERGE_SELECT");
sergefp@mysql.com's avatar
sergefp@mysql.com committed
941 942 943
  quick_it.rewind();
  while ((quick= quick_it++))
    quick->file= NULL;
944
  quick_selects.delete_elements();
945
  delete pk_quick_select;
946
  free_root(&alloc,MYF(0));
947
  DBUG_VOID_RETURN;
948 949
}

950 951 952 953 954

QUICK_ROR_INTERSECT_SELECT::QUICK_ROR_INTERSECT_SELECT(THD *thd_param,
                                                       TABLE *table,
                                                       bool retrieve_full_rows,
                                                       MEM_ROOT *parent_alloc)
sergefp@mysql.com's avatar
sergefp@mysql.com committed
955
  : cpk_quick(NULL), thd(thd_param), need_to_fetch_row(retrieve_full_rows),
956
    scans_inited(FALSE)
957 958
{
  index= MAX_KEY;
959
  head= table;
960 961
  record= head->record[0];
  if (!parent_alloc)
962
    init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
963 964
  else
    bzero(&alloc, sizeof(MEM_ROOT));
965
  last_rowid= (byte*)alloc_root(parent_alloc? parent_alloc : &alloc,
966 967 968
                                head->file->ref_length);
}

969

970
/*
971 972 973
  Do post-constructor initialization.
  SYNOPSIS
    QUICK_ROR_INTERSECT_SELECT::init()
974

975 976 977 978 979
  RETURN
    0      OK
    other  Error code
*/

980 981
int QUICK_ROR_INTERSECT_SELECT::init()
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
982 983 984
  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init");
 /* Check if last_rowid was successfully allocated in ctor */
  DBUG_RETURN(!last_rowid);
985 986 987 988
}


/*
989 990 991 992
  Initialize this quick select to be a ROR-merged scan.

  SYNOPSIS
    QUICK_RANGE_SELECT::init_ror_merged_scan()
monty@mysql.com's avatar
monty@mysql.com committed
993
      reuse_handler If TRUE, use head->file, otherwise create a separate
994 995 996 997
                    handler object

  NOTES
    This function creates and prepares for subsequent use a separate handler
998
    object if it can't reuse head->file. The reason for this is that during
999 1000 1001
    ROR-merge several key scans are performed simultaneously, and a single
    handler is only capable of preserving context of a single key scan.

1002
    In ROR-merge the quick select doing merge does full records retrieval,
1003
    merged quick selects read only keys.
1004 1005

  RETURN
1006 1007 1008 1009
    0  ROR child scan initialized, ok to use.
    1  error
*/

1010
int QUICK_RANGE_SELECT::init_ror_merged_scan(bool reuse_handler)
1011 1012
{
  handler *save_file= file;
1013
  THD *thd;
1014
  DBUG_ENTER("QUICK_RANGE_SELECT::init_ror_merged_scan");
1015

1016 1017 1018 1019
  if (reuse_handler)
  {
    DBUG_PRINT("info", ("Reusing handler %p", file));
    if (file->extra(HA_EXTRA_KEYREAD) ||
1020
        file->ha_retrieve_all_pk() ||
1021 1022 1023 1024
        init() || reset())
    {
      DBUG_RETURN(1);
    }
monty@mysql.com's avatar
monty@mysql.com committed
1025
    DBUG_RETURN(0);
1026 1027 1028 1029 1030 1031 1032 1033
  }

  /* Create a separate handler object for this quick select */
  if (free_file)
  {
    /* already have own 'handler' object. */
    DBUG_RETURN(0);
  }
1034

1035 1036
  thd= head->in_use;
  if (!(file= get_new_handler(head->s, thd->mem_root, head->s->db_type)))
1037 1038
    goto failure;
  DBUG_PRINT("info", ("Allocated new handler %p", file));
1039 1040
  if (file->ha_open(head, head->s->normalized_path.str, head->db_stat,
                    HA_OPEN_IGNORE_IF_LOCKED))
1041
  {
1042
    /* Caller will free the memory */
1043 1044
    goto failure;
  }
1045
  if (file->ha_external_lock(thd, F_RDLCK))
1046
    goto failure;
1047 1048

  if (file->extra(HA_EXTRA_KEYREAD) ||
1049
      file->ha_retrieve_all_pk() ||
1050 1051
      init() || reset())
  {
1052
    file->ha_external_lock(thd, F_UNLCK);
1053 1054 1055
    file->close();
    goto failure;
  }
monty@mysql.com's avatar
monty@mysql.com committed
1056
  free_file= TRUE;
1057 1058 1059 1060
  last_rowid= file->ref;
  DBUG_RETURN(0);

failure:
1061 1062
  if (file)
    delete file;
1063 1064 1065 1066
  file= save_file;
  DBUG_RETURN(1);
}

1067 1068 1069 1070 1071

/*
  Initialize this quick select to be a part of a ROR-merged scan.
  SYNOPSIS
    QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan()
monty@mysql.com's avatar
monty@mysql.com committed
1072
      reuse_handler If TRUE, use head->file, otherwise create separate
1073
                    handler object.
1074
  RETURN
1075 1076 1077 1078
    0     OK
    other error code
*/
int QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan(bool reuse_handler)
1079 1080 1081
{
  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
  QUICK_RANGE_SELECT* quick;
1082
  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::init_ror_merged_scan");
1083 1084

  /* Initialize all merged "children" quick selects */
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1085
  DBUG_ASSERT(!need_to_fetch_row || reuse_handler);
1086 1087 1088
  if (!need_to_fetch_row && reuse_handler)
  {
    quick= quick_it++;
1089
    /*
1090
      There is no use of this->file. Use it for the first of merged range
1091 1092
      selects.
    */
monty@mysql.com's avatar
monty@mysql.com committed
1093
    if (quick->init_ror_merged_scan(TRUE))
1094 1095 1096
      DBUG_RETURN(1);
    quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
  }
monty@mishka.local's avatar
monty@mishka.local committed
1097
  while ((quick= quick_it++))
1098
  {
monty@mysql.com's avatar
monty@mysql.com committed
1099
    if (quick->init_ror_merged_scan(FALSE))
1100 1101
      DBUG_RETURN(1);
    quick->file->extra(HA_EXTRA_KEYREAD_PRESERVE_FIELDS);
1102
    /* All merged scans share the same record buffer in intersection. */
1103 1104 1105
    quick->record= head->record[0];
  }

monty@mysql.com's avatar
monty@mysql.com committed
1106
  if (need_to_fetch_row && head->file->ha_rnd_init(1))
1107 1108 1109 1110 1111 1112 1113
  {
    DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
    DBUG_RETURN(1);
  }
  DBUG_RETURN(0);
}

1114

1115
/*
1116 1117 1118 1119 1120 1121 1122 1123
  Initialize quick select for row retrieval.
  SYNOPSIS
    reset()
  RETURN
    0      OK
    other  Error code
*/

1124 1125 1126
int QUICK_ROR_INTERSECT_SELECT::reset()
{
  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::reset");
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1127 1128
  if (!scans_inited && init_ror_merged_scan(TRUE))
    DBUG_RETURN(1);
1129
  scans_inited= TRUE;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1130 1131 1132 1133 1134
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  QUICK_RANGE_SELECT *quick;
  while ((quick= it++))
    quick->reset();
  DBUG_RETURN(0);
1135 1136
}

1137 1138 1139

/*
  Add a merged quick select to this ROR-intersection quick select.
1140

1141 1142 1143 1144 1145 1146
  SYNOPSIS
    QUICK_ROR_INTERSECT_SELECT::push_quick_back()
      quick Quick select to be added. The quick select must return
            rows in rowid order.
  NOTES
    This call can only be made before init() is called.
1147

1148
  RETURN
1149
    FALSE OK
monty@mysql.com's avatar
monty@mysql.com committed
1150
    TRUE  Out of memory.
1151 1152
*/

1153
bool
1154 1155
QUICK_ROR_INTERSECT_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick)
{
1156
  return quick_selects.push_back(quick);
1157 1158 1159
}

QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT()
1160
{
1161
  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::~QUICK_ROR_INTERSECT_SELECT");
1162
  quick_selects.delete_elements();
1163 1164
  delete cpk_quick;
  free_root(&alloc,MYF(0));
monty@mysql.com's avatar
monty@mysql.com committed
1165 1166
  if (need_to_fetch_row && head->file->inited != handler::NONE)
    head->file->ha_rnd_end();
1167 1168 1169
  DBUG_VOID_RETURN;
}

monty@mysql.com's avatar
monty@mysql.com committed
1170

1171 1172
QUICK_ROR_UNION_SELECT::QUICK_ROR_UNION_SELECT(THD *thd_param,
                                               TABLE *table)
1173
  : thd(thd_param), scans_inited(FALSE)
1174 1175 1176 1177 1178 1179
{
  index= MAX_KEY;
  head= table;
  rowid_length= table->file->ref_length;
  record= head->record[0];
  init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
monty@mysql.com's avatar
monty@mysql.com committed
1180
  thd_param->mem_root= &alloc;
1181 1182
}

1183 1184 1185 1186 1187

/*
  Do post-constructor initialization.
  SYNOPSIS
    QUICK_ROR_UNION_SELECT::init()
1188

1189 1190 1191 1192 1193
  RETURN
    0      OK
    other  Error code
*/

1194 1195
int QUICK_ROR_UNION_SELECT::init()
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1196
  DBUG_ENTER("QUICK_ROR_UNION_SELECT::init");
1197
  if (init_queue(&queue, quick_selects.elements, 0,
monty@mysql.com's avatar
monty@mysql.com committed
1198
                 FALSE , QUICK_ROR_UNION_SELECT::queue_cmp,
1199 1200 1201
                 (void*) this))
  {
    bzero(&queue, sizeof(QUEUE));
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1202
    DBUG_RETURN(1);
1203
  }
1204

1205
  if (!(cur_rowid= (byte*)alloc_root(&alloc, 2*head->file->ref_length)))
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1206
    DBUG_RETURN(1);
1207
  prev_rowid= cur_rowid + head->file->ref_length;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1208
  DBUG_RETURN(0);
1209 1210
}

1211

1212
/*
1213
  Comparison function to be used QUICK_ROR_UNION_SELECT::queue priority
1214 1215
  queue.

1216 1217 1218 1219 1220 1221
  SYNPOSIS
    QUICK_ROR_UNION_SELECT::queue_cmp()
      arg   Pointer to QUICK_ROR_UNION_SELECT
      val1  First merged select
      val2  Second merged select
*/
1222

1223 1224
int QUICK_ROR_UNION_SELECT::queue_cmp(void *arg, byte *val1, byte *val2)
{
1225
  QUICK_ROR_UNION_SELECT *self= (QUICK_ROR_UNION_SELECT*)arg;
1226 1227 1228 1229
  return self->head->file->cmp_ref(((QUICK_SELECT_I*)val1)->last_rowid,
                                   ((QUICK_SELECT_I*)val2)->last_rowid);
}

1230

1231
/*
1232 1233 1234
  Initialize quick select for row retrieval.
  SYNOPSIS
    reset()
1235

1236 1237 1238 1239 1240
  RETURN
    0      OK
    other  Error code
*/

1241 1242 1243 1244 1245
int QUICK_ROR_UNION_SELECT::reset()
{
  QUICK_SELECT_I* quick;
  int error;
  DBUG_ENTER("QUICK_ROR_UNION_SELECT::reset");
monty@mysql.com's avatar
monty@mysql.com committed
1246
  have_prev_rowid= FALSE;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1247 1248 1249 1250 1251 1252 1253 1254 1255
  if (!scans_inited)
  {
    QUICK_SELECT_I *quick;
    List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
    while ((quick= it++))
    {
      if (quick->init_ror_merged_scan(FALSE))
        DBUG_RETURN(1);
    }
1256
    scans_inited= TRUE;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1257 1258
  }
  queue_remove_all(&queue);
1259 1260
  /*
    Initialize scans for merged quick selects and put all merged quick
1261 1262 1263 1264 1265
    selects into the queue.
  */
  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
  while ((quick= it++))
  {
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1266
    if (quick->reset())
1267
      DBUG_RETURN(1);
1268 1269 1270 1271
    if ((error= quick->get_next()))
    {
      if (error == HA_ERR_END_OF_FILE)
        continue;
monty@mysql.com's avatar
monty@mysql.com committed
1272
      DBUG_RETURN(error);
1273 1274 1275 1276 1277
    }
    quick->save_last_pos();
    queue_insert(&queue, (byte*)quick);
  }

monty@mysql.com's avatar
monty@mysql.com committed
1278
  if (head->file->ha_rnd_init(1))
1279 1280 1281 1282 1283 1284 1285 1286 1287
  {
    DBUG_PRINT("error", ("ROR index_merge rnd_init call failed"));
    DBUG_RETURN(1);
  }

  DBUG_RETURN(0);
}


1288
bool
1289 1290 1291 1292 1293 1294 1295 1296 1297
QUICK_ROR_UNION_SELECT::push_quick_back(QUICK_SELECT_I *quick_sel_range)
{
  return quick_selects.push_back(quick_sel_range);
}

QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT()
{
  DBUG_ENTER("QUICK_ROR_UNION_SELECT::~QUICK_ROR_UNION_SELECT");
  delete_queue(&queue);
1298
  quick_selects.delete_elements();
1299 1300
  if (head->file->inited != handler::NONE)
    head->file->ha_rnd_end();
1301 1302
  free_root(&alloc,MYF(0));
  DBUG_VOID_RETURN;
1303 1304
}

1305

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358
QUICK_RANGE::QUICK_RANGE()
  :min_key(0),max_key(0),min_length(0),max_length(0),
   flag(NO_MIN_RANGE | NO_MAX_RANGE)
{}

SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
{
  type=arg.type;
  min_flag=arg.min_flag;
  max_flag=arg.max_flag;
  maybe_flag=arg.maybe_flag;
  maybe_null=arg.maybe_null;
  part=arg.part;
  field=arg.field;
  min_value=arg.min_value;
  max_value=arg.max_value;
  next_key_part=arg.next_key_part;
  use_count=1; elements=1;
}


inline void SEL_ARG::make_root()
{
  left=right= &null_element;
  color=BLACK;
  next=prev=0;
  use_count=0; elements=1;
}

SEL_ARG::SEL_ARG(Field *f,const char *min_value_arg,const char *max_value_arg)
  :min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
   elements(1), use_count(1), field(f), min_value((char*) min_value_arg),
   max_value((char*) max_value_arg), next(0),prev(0),
   next_key_part(0),color(BLACK),type(KEY_RANGE)
{
  left=right= &null_element;
}

SEL_ARG::SEL_ARG(Field *field_,uint8 part_,char *min_value_,char *max_value_,
		 uint8 min_flag_,uint8 max_flag_,uint8 maybe_flag_)
  :min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
   part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
   field(field_), min_value(min_value_), max_value(max_value_),
   next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE)
{
  left=right= &null_element;
}

SEL_ARG *SEL_ARG::clone(SEL_ARG *new_parent,SEL_ARG **next_arg)
{
  SEL_ARG *tmp;
  if (type != KEY_RANGE)
  {
1359 1360
    if (!(tmp= new SEL_ARG(type)))
      return 0;					// out of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1361 1362 1363 1364 1365 1366
    tmp->prev= *next_arg;			// Link into next/prev chain
    (*next_arg)->next=tmp;
    (*next_arg)= tmp;
  }
  else
  {
1367 1368 1369
    if (!(tmp= new SEL_ARG(field,part, min_value,max_value,
			   min_flag, max_flag, maybe_flag)))
      return 0;					// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1370 1371 1372 1373 1374 1375 1376 1377 1378 1379
    tmp->parent=new_parent;
    tmp->next_key_part=next_key_part;
    if (left != &null_element)
      tmp->left=left->clone(tmp,next_arg);

    tmp->prev= *next_arg;			// Link into next/prev chain
    (*next_arg)->next=tmp;
    (*next_arg)= tmp;

    if (right != &null_element)
1380 1381
      if (!(tmp->right= right->clone(tmp,next_arg)))
	return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1382 1383
  }
  increment_use_count(1);
1384
  tmp->color= color;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407
  return tmp;
}

SEL_ARG *SEL_ARG::first()
{
  SEL_ARG *next_arg=this;
  if (!next_arg->left)
    return 0;					// MAYBE_KEY
  while (next_arg->left != &null_element)
    next_arg=next_arg->left;
  return next_arg;
}

SEL_ARG *SEL_ARG::last()
{
  SEL_ARG *next_arg=this;
  if (!next_arg->right)
    return 0;					// MAYBE_KEY
  while (next_arg->right != &null_element)
    next_arg=next_arg->right;
  return next_arg;
}

1408

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1409 1410 1411
/*
  Check if a compare is ok, when one takes ranges in account
  Returns -2 or 2 if the ranges where 'joined' like  < 2 and >= 2
1412
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435

static int sel_cmp(Field *field, char *a,char *b,uint8 a_flag,uint8 b_flag)
{
  int cmp;
  /* First check if there was a compare to a min or max element */
  if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
  {
    if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
	(b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
      return 0;
    return (a_flag & NO_MIN_RANGE) ? -1 : 1;
  }
  if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
    return (b_flag & NO_MIN_RANGE) ? 1 : -1;

  if (field->real_maybe_null())			// If null is part of key
  {
    if (*a != *b)
    {
      return *a ? -1 : 1;
    }
    if (*a)
      goto end;					// NULL where equal
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1436
    a++; b++;					// Skip NULL marker
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460
  }
  cmp=field->key_cmp((byte*) a,(byte*) b);
  if (cmp) return cmp < 0 ? -1 : 1;		// The values differed

  // Check if the compared equal arguments was defined with open/closed range
 end:
  if (a_flag & (NEAR_MIN | NEAR_MAX))
  {
    if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
      return 0;
    if (!(b_flag & (NEAR_MIN | NEAR_MAX)))
      return (a_flag & NEAR_MIN) ? 2 : -2;
    return (a_flag & NEAR_MIN) ? 1 : -1;
  }
  if (b_flag & (NEAR_MIN | NEAR_MAX))
    return (b_flag & NEAR_MIN) ? -2 : 2;
  return 0;					// The elements where equal
}


SEL_ARG *SEL_ARG::clone_tree()
{
  SEL_ARG tmp_link,*next_arg,*root;
  next_arg= &tmp_link;
1461
  root= clone((SEL_ARG *) 0, &next_arg);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1462 1463
  next_arg->next=0;				// Fix last link
  tmp_link.next->prev=0;			// Fix first link
1464 1465
  if (root)					// If not OOM
    root->use_count= 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1466 1467 1468
  return root;
}

1469

1470
/*
1471
  Find the best index to retrieve first N records in given order
1472 1473 1474 1475 1476 1477 1478 1479

  SYNOPSIS
    get_index_for_order()
      table  Table to be accessed
      order  Required ordering
      limit  Number of records that will be retrieved

  DESCRIPTION
1480 1481 1482 1483
    Find the best index that allows to retrieve first #limit records in the 
    given order cheaper then one would retrieve them using full table scan.

  IMPLEMENTATION
1484
    Run through all table indexes and find the shortest index that allows
1485 1486
    records to be retrieved in given order. We look for the shortest index
    as we will have fewer index pages to read with it.
1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508

    This function is used only by UPDATE/DELETE, so we take into account how
    the UPDATE/DELETE code will work:
     * index can only be scanned in forward direction
     * HA_EXTRA_KEYREAD will not be used
    Perhaps these assumptions could be relaxed

  RETURN
    index number
    MAX_KEY if no such index was found.
*/

uint get_index_for_order(TABLE *table, ORDER *order, ha_rows limit)
{
  uint idx;
  uint match_key= MAX_KEY, match_key_len= MAX_KEY_LENGTH + 1;
  ORDER *ord;
  
  for (ord= order; ord; ord= ord->next)
    if (!ord->asc)
      return MAX_KEY;

sergefp@mysql.com's avatar
sergefp@mysql.com committed
1509
  for (idx= 0; idx < table->s->keys; idx++)
1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558
  {
    if (!(table->keys_in_use_for_query.is_set(idx)))
      continue;
    KEY_PART_INFO *keyinfo= table->key_info[idx].key_part;
    uint partno= 0;
    
    /* 
      The below check is sufficient considering we now have either BTREE 
      indexes (records are returned in order for any index prefix) or HASH 
      indexes (records are not returned in order for any index prefix).
    */
    if (!(table->file->index_flags(idx, 0, 1) & HA_READ_ORDER))
      continue;
    for (ord= order; ord; ord= ord->next, partno++)
    {
      Item *item= order->item[0];
      if (!(item->type() == Item::FIELD_ITEM &&
           ((Item_field*)item)->field->eq(keyinfo[partno].field)))
        break;
    }
    
    if (!ord && table->key_info[idx].key_length < match_key_len)
    {
      /* 
        Ok, the ordering is compatible and this key is shorter then
        previous match (we want shorter keys as we'll have to read fewer
        index pages for the same number of records)
      */
      match_key= idx;
      match_key_len= table->key_info[idx].key_length;
    }
  }

  if (match_key != MAX_KEY)
  {
    /* 
      Found an index that allows records to be retrieved in the requested 
      order. Now we'll check if using the index is cheaper then doing a table
      scan.
    */
    double full_scan_time= table->file->scan_time();
    double index_scan_time= table->file->read_time(match_key, 1, limit);
    if (index_scan_time > full_scan_time)
      match_key= MAX_KEY;
  }
  return match_key;
}


serg@serg.mylan's avatar
serg@serg.mylan committed
1559
/*
1560
  Table rows retrieval plan. Range optimizer creates QUICK_SELECT_I-derived
1561 1562 1563 1564 1565
  objects from table read plans.
*/
class TABLE_READ_PLAN
{
public:
1566 1567
  /*
    Plan read cost, with or without cost of full row retrieval, depending
1568 1569
    on plan creation parameters.
  */
1570
  double read_cost;
1571
  ha_rows records; /* estimate of #rows to be examined */
serg@serg.mylan's avatar
serg@serg.mylan committed
1572

1573 1574
  /*
    If TRUE, the scan returns rows in rowid order. This is used only for
1575 1576
    scans that can be both ROR and non-ROR.
  */
1577
  bool is_ror;
1578

1579 1580 1581 1582 1583
  /*
    Create quick select for this plan.
    SYNOPSIS
     make_quick()
       param               Parameter from test_quick_select
monty@mysql.com's avatar
monty@mysql.com committed
1584
       retrieve_full_rows  If TRUE, created quick select will do full record
1585 1586
                           retrieval.
       parent_alloc        Memory pool to use, if any.
1587

1588 1589
    NOTES
      retrieve_full_rows is ignored by some implementations.
1590 1591

    RETURN
1592 1593 1594
      created quick select
      NULL on any error.
  */
1595 1596 1597 1598
  virtual QUICK_SELECT_I *make_quick(PARAM *param,
                                     bool retrieve_full_rows,
                                     MEM_ROOT *parent_alloc=NULL) = 0;

1599
  /* Table read plans are allocated on MEM_ROOT and are never deleted */
1600 1601
  static void *operator new(size_t size, MEM_ROOT *mem_root)
  { return (void*) alloc_root(mem_root, (uint) size); }
1602
  static void operator delete(void *ptr,size_t size) { TRASH(ptr, size); }
1603
  static void operator delete(void *ptr, MEM_ROOT *mem_root) { /* Never called */ }
1604 1605
  virtual ~TABLE_READ_PLAN() {}               /* Remove gcc warning */

1606 1607 1608 1609 1610 1611 1612
};

class TRP_ROR_INTERSECT;
class TRP_ROR_UNION;
class TRP_INDEX_MERGE;


1613
/*
1614
  Plan for a QUICK_RANGE_SELECT scan.
1615 1616 1617
  TRP_RANGE::make_quick ignores retrieve_full_rows parameter because
  QUICK_RANGE_SELECT doesn't distinguish between 'index only' scans and full
  record retrieval scans.
serg@serg.mylan's avatar
serg@serg.mylan committed
1618
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1619

1620
class TRP_RANGE : public TABLE_READ_PLAN
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1621
{
1622
public:
1623 1624
  SEL_ARG *key; /* set of intervals to be used in "range" method retrieval */
  uint     key_idx; /* key number in PARAM::key */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1625

1626
  TRP_RANGE(SEL_ARG *key_arg, uint idx_arg)
1627 1628
   : key(key_arg), key_idx(idx_arg)
  {}
1629
  virtual ~TRP_RANGE() {}                     /* Remove gcc warning */
1630

1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643
  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
                             MEM_ROOT *parent_alloc)
  {
    DBUG_ENTER("TRP_RANGE::make_quick");
    QUICK_RANGE_SELECT *quick;
    if ((quick= get_quick_select(param, key_idx, key, parent_alloc)))
    {
      quick->records= records;
      quick->read_time= read_cost;
    }
    DBUG_RETURN(quick);
  }
};
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1644 1645


1646 1647
/* Plan for QUICK_ROR_INTERSECT_SELECT scan. */

1648 1649 1650
class TRP_ROR_INTERSECT : public TABLE_READ_PLAN
{
public:
1651 1652
  TRP_ROR_INTERSECT() {}                      /* Remove gcc warning */
  virtual ~TRP_ROR_INTERSECT() {}             /* Remove gcc warning */
1653
  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
1654
                             MEM_ROOT *parent_alloc);
1655

1656
  /* Array of pointers to ROR range scans used in this intersection */
1657
  struct st_ror_scan_info **first_scan;
1658 1659
  struct st_ror_scan_info **last_scan; /* End of the above array */
  struct st_ror_scan_info *cpk_scan;  /* Clustered PK scan, if there is one */
monty@mysql.com's avatar
monty@mysql.com committed
1660
  bool is_covering; /* TRUE if no row retrieval phase is necessary */
1661
  double index_scan_costs; /* SUM(cost(index_scan)) */
1662 1663
};

1664

1665
/*
1666 1667
  Plan for QUICK_ROR_UNION_SELECT scan.
  QUICK_ROR_UNION_SELECT always retrieves full rows, so retrieve_full_rows
1668
  is ignored by make_quick.
1669
*/
1670

1671 1672 1673
class TRP_ROR_UNION : public TABLE_READ_PLAN
{
public:
1674 1675
  TRP_ROR_UNION() {}                          /* Remove gcc warning */
  virtual ~TRP_ROR_UNION() {}                 /* Remove gcc warning */
1676
  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
1677
                             MEM_ROOT *parent_alloc);
1678 1679
  TABLE_READ_PLAN **first_ror; /* array of ptrs to plans for merged scans */
  TABLE_READ_PLAN **last_ror;  /* end of the above array */
1680 1681
};

1682 1683 1684 1685

/*
  Plan for QUICK_INDEX_MERGE_SELECT scan.
  QUICK_ROR_INTERSECT_SELECT always retrieves full rows, so retrieve_full_rows
1686
  is ignored by make_quick.
1687 1688
*/

1689 1690 1691
class TRP_INDEX_MERGE : public TABLE_READ_PLAN
{
public:
1692 1693
  TRP_INDEX_MERGE() {}                        /* Remove gcc warning */
  virtual ~TRP_INDEX_MERGE() {}               /* Remove gcc warning */
1694
  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
1695
                             MEM_ROOT *parent_alloc);
1696 1697
  TRP_RANGE **range_scans; /* array of ptrs to plans of merged scans */
  TRP_RANGE **range_scans_end; /* end of the array */
1698 1699 1700
};


1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723
/*
  Plan for a QUICK_GROUP_MIN_MAX_SELECT scan. 
*/

class TRP_GROUP_MIN_MAX : public TABLE_READ_PLAN
{
private:
  bool have_min, have_max;
  KEY_PART_INFO *min_max_arg_part;
  uint group_prefix_len;
  uint used_key_parts;
  uint group_key_parts;
  KEY *index_info;
  uint index;
  uint key_infix_len;
  byte key_infix[MAX_KEY_LENGTH];
  SEL_TREE *range_tree; /* Represents all range predicates in the query. */
  SEL_ARG  *index_tree; /* The SEL_ARG sub-tree corresponding to index_info. */
  uint param_idx; /* Index of used key in param->key. */
  /* Number of records selected by the ranges in index_tree. */
public:
  ha_rows quick_prefix_records;
public:
1724 1725 1726 1727
  TRP_GROUP_MIN_MAX(bool have_min_arg, bool have_max_arg,
                    KEY_PART_INFO *min_max_arg_part_arg,
                    uint group_prefix_len_arg, uint used_key_parts_arg,
                    uint group_key_parts_arg, KEY *index_info_arg,
1728 1729
                    uint index_arg, uint key_infix_len_arg,
                    byte *key_infix_arg,
1730 1731 1732 1733 1734 1735 1736 1737 1738
                    SEL_TREE *tree_arg, SEL_ARG *index_tree_arg,
                    uint param_idx_arg, ha_rows quick_prefix_records_arg)
  : have_min(have_min_arg), have_max(have_max_arg),
    min_max_arg_part(min_max_arg_part_arg),
    group_prefix_len(group_prefix_len_arg), used_key_parts(used_key_parts_arg),
    group_key_parts(group_key_parts_arg), index_info(index_info_arg),
    index(index_arg), key_infix_len(key_infix_len_arg), range_tree(tree_arg),
    index_tree(index_tree_arg), param_idx(param_idx_arg),
    quick_prefix_records(quick_prefix_records_arg)
1739 1740 1741 1742
    {
      if (key_infix_len)
        memcpy(this->key_infix, key_infix_arg, key_infix_len);
    }
1743
  virtual ~TRP_GROUP_MIN_MAX() {}             /* Remove gcc warning */
1744 1745 1746 1747 1748 1749

  QUICK_SELECT_I *make_quick(PARAM *param, bool retrieve_full_rows,
                             MEM_ROOT *parent_alloc);
};


1750
/*
1751
  Fill param->needed_fields with bitmap of fields used in the query.
1752
  SYNOPSIS
1753 1754
    fill_used_fields_bitmap()
      param Parameter from test_quick_select function.
1755

1756 1757 1758
  NOTES
    Clustered PK members are not put into the bitmap as they are implicitly
    present in all keys (and it is impossible to avoid reading them).
1759 1760 1761
  RETURN
    0  Ok
    1  Out of memory.
1762 1763 1764 1765 1766
*/

static int fill_used_fields_bitmap(PARAM *param)
{
  TABLE *table= param->table;
monty@mysql.com's avatar
monty@mysql.com committed
1767
  param->fields_bitmap_size= bitmap_buffer_size(table->s->fields+1);
1768
  uint32 *tmp;
1769
  uint pk;
monty@mysql.com's avatar
monty@mysql.com committed
1770
  if (!(tmp= (uint32*) alloc_root(param->mem_root,param->fields_bitmap_size)) ||
1771
      bitmap_init(&param->needed_fields, tmp, param->fields_bitmap_size*8,
monty@mysql.com's avatar
monty@mysql.com committed
1772
                  FALSE))
1773
    return 1;
1774

1775
  bitmap_clear_all(&param->needed_fields);
1776
  for (uint i= 0; i < table->s->fields; i++)
1777 1778 1779 1780 1781
  {
    if (param->thd->query_id == table->field[i]->query_id)
      bitmap_set_bit(&param->needed_fields, i+1);
  }

1782
  pk= param->table->s->primary_key;
1783 1784
  if (param->table->file->primary_key_is_clustered() && pk != MAX_KEY)
  {
1785
    /* The table uses clustered PK and it is not internally generated */
1786
    KEY_PART_INFO *key_part= param->table->key_info[pk].key_part;
1787
    KEY_PART_INFO *key_part_end= key_part +
1788
                                 param->table->key_info[pk].key_parts;
1789
    for (;key_part != key_part_end; ++key_part)
1790 1791 1792 1793 1794 1795 1796 1797
    {
      bitmap_clear_bit(&param->needed_fields, key_part->fieldnr);
    }
  }
  return 0;
}


serg@serg.mylan's avatar
serg@serg.mylan committed
1798
/*
1799
  Test if a key can be used in different ranges
serg@serg.mylan's avatar
serg@serg.mylan committed
1800 1801

  SYNOPSIS
1802 1803 1804 1805 1806
    SQL_SELECT::test_quick_select()
      thd               Current thread
      keys_to_use       Keys to use for range retrieval
      prev_tables       Tables assumed to be already read when the scan is
                        performed (but not read at the moment of this call)
1807 1808 1809
      limit             Query limit
      force_quick_range Prefer to use range (instead of full table scan) even
                        if it is more expensive.
1810 1811 1812 1813 1814

  NOTES
    Updates the following in the select parameter:
      needed_reg - Bits for keys with may be used if all prev regs are read
      quick      - Parameter to use when reading records.
1815

1816 1817 1818
    In the table struct the following information is updated:
      quick_keys - Which keys can be used
      quick_rows - How many rows the key matches
serg@serg.mylan's avatar
serg@serg.mylan committed
1819

1820 1821 1822 1823
  TODO
   Check if this function really needs to modify keys_to_use, and change the
   code to pass it by reference if it doesn't.

1824
   In addition to force_quick_range other means can be (an usually are) used
1825 1826
   to make this function prefer range over full table scan. Figure out if
   force_quick_range is really needed.
1827

1828 1829 1830 1831
  RETURN
   -1 if impossible select (i.e. certainly no rows will be selected)
    0 if can't use quick_select
    1 if found usable ranges and quick select has been successfully created.
serg@serg.mylan's avatar
serg@serg.mylan committed
1832
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1833

1834 1835
int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
				  table_map prev_tables,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1836 1837 1838 1839
				  ha_rows limit, bool force_quick_range)
{
  uint idx;
  double scan_time;
1840
  DBUG_ENTER("SQL_SELECT::test_quick_select");
serg@serg.mylan's avatar
serg@serg.mylan committed
1841 1842 1843
  DBUG_PRINT("enter",("keys_to_use: %lu  prev_tables: %lu  const_tables: %lu",
		      keys_to_use.to_ulonglong(), (ulong) prev_tables,
		      (ulong) const_tables));
1844
  DBUG_PRINT("info", ("records=%lu", (ulong)head->file->records));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1845 1846
  delete quick;
  quick=0;
1847 1848 1849
  needed_reg.clear_all();
  quick_keys.clear_all();
  if ((specialflag & SPECIAL_SAFE_MODE) && ! force_quick_range ||
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1850 1851
      !limit)
    DBUG_RETURN(0); /* purecov: inspected */
1852 1853
  if (keys_to_use.is_clear_all())
    DBUG_RETURN(0);
1854
  records= head->file->records;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1855 1856
  if (!records)
    records++;					/* purecov: inspected */
1857 1858
  scan_time= (double) records / TIME_FOR_COMPARE + 1;
  read_time= (double) head->file->scan_time() + scan_time + 1.1;
1859 1860
  if (head->force_index)
    scan_time= read_time= DBL_MAX;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1861
  if (limit < records)
1862
    read_time= (double) records + scan_time + 1; // Force to use index
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1863
  else if (read_time <= 2.0 && !force_quick_range)
1864
    DBUG_RETURN(0);				/* No need for quick select */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1865

1866
  DBUG_PRINT("info",("Time to scan table: %g", read_time));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1867

1868 1869
  keys_to_use.intersect(head->keys_in_use_for_query);
  if (!keys_to_use.is_clear_all())
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1870
  {
1871
    MEM_ROOT alloc;
1872
    SEL_TREE *tree= NULL;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1873
    KEY_PART *key_parts;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
1874
    KEY *key_info;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1875
    PARAM param;
serg@serg.mylan's avatar
serg@serg.mylan committed
1876

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1877
    /* set up parameter that is passed to all functions */
1878
    param.thd= thd;
monty@mysql.com's avatar
monty@mysql.com committed
1879
    param.baseflag=head->file->table_flags();
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1880 1881 1882 1883 1884
    param.prev_tables=prev_tables | const_tables;
    param.read_tables=read_tables;
    param.current_table= head->map;
    param.table=head;
    param.keys=0;
1885
    param.mem_root= &alloc;
1886
    param.old_root= thd->mem_root;
1887
    param.needed_reg= &needed_reg;
1888
    param.imerge_cost_buff_size= 0;
1889
    param.using_real_indexes= TRUE;
1890
    param.remove_jump_scans= TRUE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1891

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
1892
    thd->no_errors=1;				// Don't warn about NULL
1893
    init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
1894 1895 1896 1897
    if (!(param.key_parts= (KEY_PART*) alloc_root(&alloc,
                                                  sizeof(KEY_PART)*
                                                  head->s->key_parts)) ||
        fill_used_fields_bitmap(&param))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1898
    {
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
1899
      thd->no_errors=0;
1900
      free_root(&alloc,MYF(0));			// Return memory & allocator
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1901 1902 1903
      DBUG_RETURN(0);				// Can't use range
    }
    key_parts= param.key_parts;
1904
    thd->mem_root= &alloc;
1905 1906 1907 1908

    /*
      Make an array with description of all key parts of all table keys.
      This is used in get_mm_parts function.
1909
    */
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
1910
    key_info= head->key_info;
1911
    for (idx=0 ; idx < head->s->keys ; idx++, key_info++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1912
    {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
1913
      KEY_PART_INFO *key_part_info;
1914
      if (!keys_to_use.is_set(idx))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1915 1916 1917 1918 1919
	continue;
      if (key_info->flags & HA_FULLTEXT)
	continue;    // ToDo: ft-keys in non-ft ranges, if possible   SerG

      param.key[param.keys]=key_parts;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
1920 1921 1922
      key_part_info= key_info->key_part;
      for (uint part=0 ; part < key_info->key_parts ;
	   part++, key_parts++, key_part_info++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1923
      {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
1924 1925 1926 1927 1928 1929
	key_parts->key=		 param.keys;
	key_parts->part=	 part;
	key_parts->length=       key_part_info->length;
	key_parts->store_length= key_part_info->store_length;
	key_parts->field=	 key_part_info->field;
	key_parts->null_bit=	 key_part_info->null_bit;
1930
        key_parts->image_type =
1931
          (key_info->flags & HA_SPATIAL) ? Field::itMBR : Field::itRAW;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1932 1933 1934 1935 1936
      }
      param.real_keynr[param.keys++]=idx;
    }
    param.key_parts_end=key_parts;

sergefp@mysql.com's avatar
sergefp@mysql.com committed
1937 1938 1939 1940
    /* Calculate cost of full index read for the shortest covering index */
    if (!head->used_keys.is_clear_all())
    {
      int key_for_use= find_shortest_key(head, &head->used_keys);
1941 1942 1943
      double key_read_time= (get_index_only_read_time(&param, records,
                                                     key_for_use) +
                             (double) records / TIME_FOR_COMPARE);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
1944 1945 1946 1947 1948
      DBUG_PRINT("info",  ("'all'+'using index' scan will be using key %d, "
                           "read time %g", key_for_use, key_read_time));
      if (key_read_time < read_time)
        read_time= key_read_time;
    }
1949

1950 1951 1952 1953 1954
    TABLE_READ_PLAN *best_trp= NULL;
    TRP_GROUP_MIN_MAX *group_trp;
    double best_read_time= read_time;

    if (cond)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1955
    {
1956 1957 1958 1959 1960 1961 1962 1963
      if ((tree= get_mm_tree(&param,cond)))
      {
        if (tree->type == SEL_TREE::IMPOSSIBLE)
        {
          records=0L;                      /* Return -1 from this function. */
          read_time= (double) HA_POS_ERROR;
          goto free_mem;
        }
1964 1965 1966 1967 1968 1969
        /*
          If the tree can't be used for range scans, proceed anyway, as we
          can construct a group-min-max quick select
        */
        if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
          tree= NULL;
1970
      }
1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984
    }

    /*
      Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
      Notice that it can be constructed no matter if there is a range tree.
    */
    group_trp= get_best_group_min_max(&param, tree);
    if (group_trp && group_trp->read_cost < best_read_time)
    {
      best_trp= group_trp;
      best_read_time= best_trp->read_cost;
    }

    if (tree)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1985
    {
monty@mysql.com's avatar
monty@mysql.com committed
1986 1987 1988
      /*
        It is possible to use a range-based quick select (but it might be
        slower than 'all' table scan).
1989 1990
      */
      if (tree->merges.is_empty())
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1991
      {
1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
        TRP_RANGE         *range_trp;
        TRP_ROR_INTERSECT *rori_trp;
        bool can_build_covering= FALSE;

        /* Get best 'range' plan and prepare data for making other plans */
        if ((range_trp= get_key_scans_params(&param, tree, FALSE,
                                             best_read_time)))
        {
          best_trp= range_trp;
          best_read_time= best_trp->read_cost;
        }

2004
        /*
2005 2006 2007
          Simultaneous key scans and row deletes on several handler
          objects are not allowed so don't use ROR-intersection for
          table deletes.
2008
        */
2009 2010 2011 2012
        if ((thd->lex->sql_command != SQLCOM_DELETE))
#ifdef NOT_USED
          if ((thd->lex->sql_command != SQLCOM_UPDATE))
#endif
2013
        {
2014
          /*
2015 2016
            Get best non-covering ROR-intersection plan and prepare data for
            building covering ROR-intersection.
2017
          */
2018 2019
          if ((rori_trp= get_best_ror_intersect(&param, tree, best_read_time,
                                                &can_build_covering)))
2020
          {
2021 2022
            best_trp= rori_trp;
            best_read_time= best_trp->read_cost;
2023 2024
            /*
              Try constructing covering ROR-intersect only if it looks possible
2025 2026
              and worth doing.
            */
2027 2028 2029 2030
            if (!rori_trp->is_covering && can_build_covering &&
                (rori_trp= get_best_covering_ror_intersect(&param, tree,
                                                           best_read_time)))
              best_trp= rori_trp;
2031 2032
          }
        }
2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044
      }
      else
      {
        /* Try creating index_merge/ROR-union scan. */
        SEL_IMERGE *imerge;
        TABLE_READ_PLAN *best_conj_trp= NULL, *new_conj_trp;
        LINT_INIT(new_conj_trp); /* no empty index_merge lists possible */

        DBUG_PRINT("info",("No range reads possible,"
                           " trying to construct index_merge"));
        List_iterator_fast<SEL_IMERGE> it(tree->merges);
        while ((imerge= it++))
2045
        {
2046 2047 2048 2049
          new_conj_trp= get_best_disjunct_quick(&param, imerge, best_read_time);
          if (!best_conj_trp || (new_conj_trp && new_conj_trp->read_cost <
                                 best_conj_trp->read_cost))
            best_conj_trp= new_conj_trp;
2050
        }
2051 2052 2053 2054
        if (best_conj_trp)
          best_trp= best_conj_trp;
      }
    }
2055

2056
    thd->mem_root= param.old_root;
2057 2058 2059 2060 2061 2062 2063 2064 2065

    /* If we got a read plan, create a quick select from it. */
    if (best_trp)
    {
      records= best_trp->records;
      if (!(quick= best_trp->make_quick(&param, TRUE)) || quick->init())
      {
        delete quick;
        quick= NULL;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2066 2067
      }
    }
2068 2069

  free_mem:
2070
    free_root(&alloc,MYF(0));			// Return memory & allocator
2071
    thd->mem_root= param.old_root;
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2072
    thd->no_errors=0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2073
  }
2074

2075
  DBUG_EXECUTE("info", print_quick(quick, &needed_reg););
2076

bk@work.mysql.com's avatar
bk@work.mysql.com committed
2077 2078 2079 2080 2081 2082 2083
  /*
    Assume that if the user is using 'limit' we will only need to scan
    limit rows if we are using a key
  */
  DBUG_RETURN(records ? test(quick) : -1);
}

2084
/****************************************************************************
2085
 * Partition pruning module
2086 2087 2088
 ****************************************************************************/
#ifdef WITH_PARTITION_STORAGE_ENGINE

2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129
/*
  PartitionPruningModule

  This part of the code does partition pruning. Partition pruning solves the
  following problem: given a query over partitioned tables, find partitions
  that we will not need to access (i.e. partitions that we can assume to be
  empty) when executing the query.
  The set of partitions to prune doesn't depend on which query execution
  plan will be used to execute the query.
  
  HOW IT WORKS
  
  Partition pruning module makes use of RangeAnalysisModule. The following
  examples show how the problem of partition pruning can be reduced to the 
  range analysis problem:
  
  EXAMPLE 1
    Consider a query:
    
      SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
    
    where table t1 is partitioned using PARTITION BY RANGE(t1.a).  An apparent
    way to find the used (i.e. not pruned away) partitions is as follows:
    
    1. analyze the WHERE clause and extract the list of intervals over t1.a
       for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}

    2. for each interval I
       {
         find partitions that have non-empty intersection with I;
         mark them as used;
       }
       
  EXAMPLE 2
    Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
    we need to:

    1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
       The list of intervals we'll obtain will look like this:
       ((t1.a, t1.b) = (1,'foo')),
       ((t1.a, t1.b) = (2,'bar')), 
2130
       ((t1,a, t1.b) > (10,'zz'))
2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159
       
    2. for each interval I 
       {
         if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
         {
           calculate HASH(part_func(t1.a, t1.b));
           find which partition has records with this hash value and mark
             it as used;
         }
         else
         {
           mark all partitions as used; 
           break;
         }
       }

   For both examples the step #1 is exactly what RangeAnalysisModule could
   be used to do, if it was provided with appropriate index description
   (array of KEY_PART structures). 
   In example #1, we need to provide it with description of index(t1.a), 
   in example #2, we need to provide it with description of index(t1.a, t1.b).
   
   These index descriptions are further called "partitioning index
   descriptions". Note that it doesn't matter if such indexes really exist,
   as range analysis module only uses the description.
   
   Putting it all together, partitioning module works as follows:
   
   prune_partitions() {
2160
     call create_partition_index_description();
2161 2162 2163 2164 2165 2166 2167 2168 2169

     call get_mm_tree(); // invoke the RangeAnalysisModule
     
     // analyze the obtained interval list and get used partitions 
     call find_used_partitions();
  }

*/

2170 2171 2172 2173 2174 2175 2176 2177 2178 2179
struct st_part_prune_param;
struct st_part_opt_info;

typedef void (*mark_full_part_func)(partition_info*, uint32);

/*
  Partition pruning operation context
*/
typedef struct st_part_prune_param
{
2180
  RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
2181

2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227
  /***************************************************************
   Following fields are filled in based solely on partitioning 
   definition and not modified after that:
   **************************************************************/
  partition_info *part_info; /* Copy of table->part_info */
  /* Function to get partition id from partitioning fields only */
  get_part_id_func get_top_partition_id_func;
  /* Function to mark a partition as used (w/all subpartitions if they exist)*/
  mark_full_part_func mark_full_partition_used;
 
  /* Partitioning 'index' description, array of key parts */
  KEY_PART *key;
  
  /*
    Number of fields in partitioning 'index' definition created for
    partitioning (0 if partitioning 'index' doesn't include partitioning
    fields)
  */
  uint part_fields;
  uint subpart_fields; /* Same as above for subpartitioning */
  
  /* 
    Number of the last partitioning field keypart in the index, or -1 if
    partitioning index definition doesn't include partitioning fields.
  */
  int last_part_partno;
  int last_subpart_partno; /* Same as above for supartitioning */

  /*
    is_part_keypart[i] == test(keypart #i in partitioning index is a member
                               used in partitioning)
    Used to maintain current values of cur_part_fields and cur_subpart_fields
  */
  my_bool *is_part_keypart;
  /* Same as above for subpartitioning */
  my_bool *is_subpart_keypart;

  /***************************************************************
   Following fields form find_used_partitions() recursion context:
   **************************************************************/
  SEL_ARG **arg_stack;     /* "Stack" of SEL_ARGs */
  SEL_ARG **arg_stack_end; /* Top of the stack    */
  /* Number of partitioning fields for which we have a SEL_ARG* in arg_stack */
  uint cur_part_fields;
  /* Same as cur_part_fields, but for subpartitioning */
  uint cur_subpart_fields;
2228

2229 2230 2231 2232 2233
  /* Iterator to be used to obtain the "current" set of used partitions */
  PARTITION_ITERATOR part_iter;

  /* Initialized bitmap of no_subparts size */
  MY_BITMAP subparts_bitmap;
2234 2235
} PART_PRUNE_PARAM;

2236
static bool create_partition_index_description(PART_PRUNE_PARAM *prune_par);
2237
static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
2238 2239 2240 2241
static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
                                       SEL_IMERGE *imerge);
static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
                                            List<SEL_IMERGE> &merges);
2242 2243 2244 2245 2246 2247 2248 2249
static void mark_all_partitions_as_used(partition_info *part_info);
static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, 
                                        uint32 num);

#ifndef DBUG_OFF
static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end);
static void dbug_print_field(Field *field);
static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part);
2250
static void dbug_print_singlepoint_range(SEL_ARG **start, uint num);
2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298
#endif


/*
  Perform partition pruning for a given table and condition.

  SYNOPSIS
    prune_partitions()
      thd           Thread handle
      table         Table to perform partition pruning for
      pprune_cond   Condition to use for partition pruning
  
  DESCRIPTION
    This function assumes that all partitions are marked as unused when it
    is invoked. The function analyzes the condition, finds partitions that
    need to be used to retrieve the records that match the condition, and 
    marks them as used by setting appropriate bit in part_info->used_partitions
    In the worst case all partitions are marked as used.

  NOTE
    This function returns promptly if called for non-partitioned table.

  RETURN
    TRUE   We've inferred that no partitions need to be used (i.e. no table
           records will satisfy pprune_cond)
    FALSE  Otherwise
*/

bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
{
  bool retval= FALSE;
  partition_info *part_info = table->part_info;
  DBUG_ENTER("prune_partitions");

  if (!part_info)
    DBUG_RETURN(FALSE); /* not a partitioned table */
  
  if (!pprune_cond)
  {
    mark_all_partitions_as_used(part_info);
    DBUG_RETURN(FALSE);
  }
  
  PART_PRUNE_PARAM prune_param;
  MEM_ROOT alloc;
  RANGE_OPT_PARAM  *range_par= &prune_param.range_param;

  prune_param.part_info= part_info;
2299
  prune_param.part_iter.has_null_value= FALSE;
2300 2301 2302 2303 2304

  init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
  range_par->mem_root= &alloc;
  range_par->old_root= thd->mem_root;

2305
  if (create_partition_index_description(&prune_param))
2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319
  {
    mark_all_partitions_as_used(part_info);
    free_root(&alloc,MYF(0));		// Return memory & allocator
    DBUG_RETURN(FALSE);
  }
  
  range_par->thd= thd;
  range_par->table= table;
  /* range_par->cond doesn't need initialization */
  range_par->prev_tables= range_par->read_tables= 0;
  range_par->current_table= table->map;

  range_par->keys= 1; // one index
  range_par->using_real_indexes= FALSE;
2320
  range_par->remove_jump_scans= FALSE;
2321 2322 2323 2324
  range_par->real_keynr[0]= 0;

  thd->no_errors=1;				// Don't warn about NULL
  thd->mem_root=&alloc;
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2325 2326 2327

  bitmap_clear_all(&part_info->used_partitions);

2328 2329 2330 2331 2332 2333
  prune_param.key= prune_param.range_param.key_parts;
  SEL_TREE *tree;
  SEL_ARG *arg;
  int res;

  tree= get_mm_tree(range_par, pprune_cond);
2334
  if (!tree)
2335 2336 2337 2338 2339 2340 2341
    goto all_used;

  if (tree->type == SEL_TREE::IMPOSSIBLE)
  {
    retval= TRUE;
    goto end;
  }
2342 2343 2344

  if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
    goto all_used;
2345

2346 2347
  if (tree->merges.is_empty())
  {
2348
    /* Range analysis has produced a single list of intervals. */
2349 2350 2351
    prune_param.arg_stack_end= prune_param.arg_stack;
    prune_param.cur_part_fields= 0;
    prune_param.cur_subpart_fields= 0;
2352
    init_all_partitions_iterator(part_info, &prune_param.part_iter);
2353 2354 2355 2356 2357 2358
    if (!tree->keys[0] || (-1 == (res= find_used_partitions(&prune_param,
                                                            tree->keys[0]))))
      goto all_used;
  }
  else
  {
2359 2360
    if (tree->merges.elements == 1)
    {
2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371
      /* 
        Range analysis has produced a "merge" of several intervals lists, a 
        SEL_TREE that represents an expression in form         
          sel_imerge = (tree1 OR tree2 OR ... OR treeN)
        that cannot be reduced to one tree. This can only happen when 
        partitioning index has several keyparts and the condition is OR of
        conditions that refer to different key parts. For example, we'll get
        here for "partitioning_field=const1 OR subpartitioning_field=const2"
      */
      if (-1 == (res= find_used_partitions_imerge(&prune_param,
                                                  tree->merges.head())))
2372 2373 2374
        goto all_used;
    }
    else
2375
    {
2376 2377 2378 2379 2380 2381 2382 2383 2384
      /* 
        Range analysis has produced a list of several imerges, i.e. a
        structure that represents a condition in form 
        imerge_list= (sel_imerge1 AND sel_imerge2 AND ... AND sel_imergeN)
        This is produced for complicated WHERE clauses that range analyzer
        can't really analyze properly.
      */
      if (-1 == (res= find_used_partitions_imerge_list(&prune_param,
                                                       tree->merges)))
2385 2386 2387 2388
        goto all_used;
    }
  }
  
2389 2390 2391 2392 2393 2394
  /*
    res == 0 => no used partitions => retval=TRUE
    res == 1 => some used partitions => retval=FALSE
    res == -1 - we jump over this line to all_used:
  */
  retval= test(!res);
2395 2396 2397
  goto end;

all_used:
2398
  retval= FALSE; // some partitions are used
2399 2400 2401 2402 2403 2404 2405 2406 2407 2408
  mark_all_partitions_as_used(prune_param.part_info);
end:
  thd->no_errors=0;
  thd->mem_root= range_par->old_root;
  free_root(&alloc,MYF(0));			// Return memory & allocator
  DBUG_RETURN(retval);
}


/*
2409
  Store field key image to table record
2410 2411

  SYNOPSIS
2412 2413 2414 2415 2416 2417 2418 2419 2420 2421
    store_key_image_to_rec()
      field  Field which key image should be stored
      ptr    Field value in key format
      len    Length of the value, in bytes

  DESCRIPTION
    Copy the field value from its key image to the table record. The source
    is the value in key image format, occupying len bytes in buffer pointed
    by ptr. The destination is table record, in "field value in table record"
    format.
2422 2423
*/

2424
void store_key_image_to_rec(Field *field, char *ptr, uint len)
2425 2426 2427 2428 2429 2430 2431 2432 2433
{
  /* Do the same as print_key() does */ 
  if (field->real_maybe_null())
  {
    if (*ptr)
    {
      field->set_null();
      return;
    }
2434
    field->set_notnull();
2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446
    ptr++;
  }    
  field->set_key_image(ptr, len); 
}


/*
  For SEL_ARG* array, store sel_arg->min values into table record buffer

  SYNOPSIS
    store_selargs_to_rec()
      ppar   Partition pruning context
2447
      start  Array of SEL_ARG* for which the minimum values should be stored
2448
      num    Number of elements in the array
2449 2450 2451 2452

  DESCRIPTION
    For each SEL_ARG* interval in the specified array, store the left edge
    field value (sel_arg->min, key image format) into the table record.
2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471
*/

static void store_selargs_to_rec(PART_PRUNE_PARAM *ppar, SEL_ARG **start,
                                 int num)
{
  KEY_PART *parts= ppar->range_param.key_parts;
  for (SEL_ARG **end= start + num; start != end; start++)
  {
    SEL_ARG *sel_arg= (*start);
    store_key_image_to_rec(sel_arg->field, sel_arg->min_value,
                           parts[sel_arg->part].length);
  }
}


/* Mark a partition as used in the case when there are no subpartitions */
static void mark_full_partition_used_no_parts(partition_info* part_info,
                                              uint32 part_id)
{
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2472 2473
  DBUG_ENTER("mark_full_partition_used_no_parts");
  DBUG_PRINT("enter", ("Mark partition %u as used", part_id));
2474
  bitmap_set_bit(&part_info->used_partitions, part_id);
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2475
  DBUG_VOID_RETURN;
2476 2477 2478 2479 2480 2481 2482 2483 2484
}


/* Mark a partition as used in the case when there are subpartitions */
static void mark_full_partition_used_with_parts(partition_info *part_info,
                                                uint32 part_id)
{
  uint32 start= part_id * part_info->no_subparts;
  uint32 end=   start + part_info->no_subparts; 
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2485 2486
  DBUG_ENTER("mark_full_partition_used_with_parts");

2487
  for (; start != end; start++)
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2488 2489
  {
    DBUG_PRINT("info", ("1:Mark subpartition %u as used", start));
2490
    bitmap_set_bit(&part_info->used_partitions, start);
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2491 2492
  }
  DBUG_VOID_RETURN;
2493 2494
}

2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505
/*
  Find the set of used partitions for List<SEL_IMERGE>
  SYNOPSIS
    find_used_partitions_imerge_list
      ppar      Partition pruning context.
      key_tree  Intervals tree to perform pruning for.
      
  DESCRIPTION
    List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...". 
    The set of used partitions is an intersection of used partitions sets
    for imerge_{i}.
2506
    We accumulate this intersection in a separate bitmap.
2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522
 
  RETURN 
    See find_used_partitions()
*/

static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
                                            List<SEL_IMERGE> &merges)
{
  MY_BITMAP all_merges;
  uint bitmap_bytes;
  uint32 *bitmap_buf;
  uint n_bits= ppar->part_info->used_partitions.n_bits;
  bitmap_bytes= bitmap_buffer_size(n_bits);
  if (!(bitmap_buf= (uint32*)alloc_root(ppar->range_param.mem_root,
                                        bitmap_bytes)))
  {
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2523
    /*
2524
      Fallback, process just the first SEL_IMERGE. This can leave us with more
2525 2526 2527 2528 2529 2530
      partitions marked as used then actually needed.
    */
    return find_used_partitions_imerge(ppar, merges.head());
  }
  bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
  bitmap_set_prefix(&all_merges, n_bits);
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2531

2532 2533 2534 2535 2536 2537 2538 2539 2540 2541
  List_iterator<SEL_IMERGE> it(merges);
  SEL_IMERGE *imerge;
  while ((imerge=it++))
  {
    int res= find_used_partitions_imerge(ppar, imerge);
    if (!res)
    {
      /* no used partitions on one ANDed imerge => no used partitions at all */
      return 0;
    }
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2542

2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572
    if (res != -1)
      bitmap_intersect(&all_merges, &ppar->part_info->used_partitions);

    if (bitmap_is_clear_all(&all_merges))
      return 0;

    bitmap_clear_all(&ppar->part_info->used_partitions);
  }
  memcpy(ppar->part_info->used_partitions.bitmap, all_merges.bitmap,
         bitmap_bytes);
  return 1;
}


/*
  Find the set of used partitions for SEL_IMERGE structure
  SYNOPSIS
    find_used_partitions_imerge()
      ppar      Partition pruning context.
      key_tree  Intervals tree to perform pruning for.
      
  DESCRIPTION
    SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
    trivial - just use mark used partitions for each tree and bail out early
    if for some tree_{i} all partitions are used.
 
  RETURN 
    See find_used_partitions().
*/

2573
static
2574
int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
2575
{
2576 2577 2578 2579 2580 2581
  int res= 0;
  for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
  {
    ppar->arg_stack_end= ppar->arg_stack;
    ppar->cur_part_fields= 0;
    ppar->cur_subpart_fields= 0;
2582
    init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
2583 2584 2585 2586
    if (-1 == (res |= find_used_partitions(ppar, (*ptree)->keys[0])))
      return -1;
  }
  return res;
2587 2588 2589 2590
}


/*
2591
  Collect partitioning ranges for the SEL_ARG tree and mark partitions as used
2592 2593 2594 2595

  SYNOPSIS
    find_used_partitions()
      ppar      Partition pruning context.
2596
      key_tree  SEL_ARG range tree to perform pruning for
2597 2598 2599

  DESCRIPTION
    This function 
2600 2601
      * recursively walks the SEL_ARG* tree collecting partitioning "intervals"
      * finds the partitions one needs to use to get rows in these intervals
2602
      * marks these partitions as used.
2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619
    The next session desribes the process in greater detail.
 
  IMPLEMENTATION
    TYPES OF RESTRICTIONS THAT WE CAN OBTAIN PARTITIONS FOR    
    We can find out which [sub]partitions to use if we obtain restrictions on 
    [sub]partitioning fields in the following form:
    1.  "partition_field1=const1 AND ... AND partition_fieldN=constN"
    1.1  Same as (1) but for subpartition fields

    If partitioning supports interval analysis (i.e. partitioning is a
    function of a single table field, and partition_info::
    get_part_iter_for_interval != NULL), then we can also use condition in
    this form:
    2.  "const1 <=? partition_field <=? const2"
    2.1  Same as (2) but for subpartition_field

    INFERRING THE RESTRICTIONS FROM SEL_ARG TREE
2620
    
2621
    The below is an example of what SEL_ARG tree may represent:
2622
    
2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650
    (start)
     |                           $
     |   Partitioning keyparts   $  subpartitioning keyparts
     |                           $
     |     ...          ...      $
     |      |            |       $
     | +---------+  +---------+  $  +-----------+  +-----------+
     \-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|
       +---------+  +---------+  $  +-----------+  +-----------+
            |                    $        |             |
            |                    $        |        +-----------+ 
            |                    $        |        | subpar2=c6|
            |                    $        |        +-----------+ 
            |                    $        |
            |                    $  +-----------+  +-----------+
            |                    $  | subpar1=c4|--| subpar2=c8|
            |                    $  +-----------+  +-----------+
            |                    $         
            |                    $
       +---------+               $  +------------+  +------------+
       | par1=c2 |------------------| subpar1=c10|--| subpar2=c12|
       +---------+               $  +------------+  +------------+
            |                    $
           ...                   $

    The up-down connections are connections via SEL_ARG::left and
    SEL_ARG::right. A horizontal connection to the right is the
    SEL_ARG::next_key_part connection.
2651
    
2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687
    find_used_partitions() traverses the entire tree via recursion on
     * SEL_ARG::next_key_part (from left to right on the picture)
     * SEL_ARG::left|right (up/down on the pic). Left-right recursion is
       performed for each depth level.
    
    Recursion descent on SEL_ARG::next_key_part is used to accumulate (in
    ppar->arg_stack) constraints on partitioning and subpartitioning fields.
    For the example in the above picture, one of stack states is:
      in find_used_partitions(key_tree = "subpar2=c5") (***)
      in find_used_partitions(key_tree = "subpar1=c3")
      in find_used_partitions(key_tree = "par2=c2")   (**)
      in find_used_partitions(key_tree = "par1=c1")
      in prune_partitions(...)
    We apply partitioning limits as soon as possible, e.g. when we reach the
    depth (**), we find which partition(s) correspond to "par1=c1 AND par2=c2",
    and save them in ppar->part_iter.
    When we reach the depth (***), we find which subpartition(s) correspond to
    "subpar1=c3 AND subpar2=c5", and then mark appropriate subpartitions in
    appropriate subpartitions as used.
    
    It is possible that constraints on some partitioning fields are missing.
    For the above example, consider this stack state:
      in find_used_partitions(key_tree = "subpar2=c12") (***)
      in find_used_partitions(key_tree = "subpar1=c10")
      in find_used_partitions(key_tree = "par1=c2")
      in prune_partitions(...)
    Here we don't have constraints for all partitioning fields. Since we've
    never set the ppar->part_iter to contain used set of partitions, we use
    its default "all partitions" value.  We get  subpartition id for 
    "subpar1=c3 AND subpar2=c5", and mark that subpartition as used in every
    partition.

    The inverse is also possible: we may get constraints on partitioning
    fields, but not constraints on subpartitioning fields. In that case,
    calls to find_used_partitions() with depth below (**) will return -1,
    and we will mark entire partition as used.
2688

2689 2690
  TODO
    Replace recursion on SEL_ARG::left and SEL_ARG::right with a loop
2691 2692 2693 2694 2695

  RETURN
    1   OK, one or more [sub]partitions are marked as used.
    0   The passed condition doesn't match any partitions
   -1   Couldn't infer any partition pruning "intervals" from the passed 
2696 2697
        SEL_ARG* tree (which means that all partitions should be marked as
        used) Marking partitions as used is the responsibility of the caller.
2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715
*/

static 
int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
{
  int res, left_res=0, right_res=0;
  int partno= (int)key_tree->part;
  bool pushed= FALSE;
  bool set_full_part_if_bad_ret= FALSE;

  if (key_tree->left != &null_element)
  {
    if (-1 == (left_res= find_used_partitions(ppar,key_tree->left)))
      return -1;
  }

  if (key_tree->type == SEL_ARG::KEY_RANGE)
  {
2716
    if (partno == 0 && (NULL != ppar->part_info->get_part_iter_for_interval))
2717 2718 2719
    {
      /* 
        Partitioning is done by RANGE|INTERVAL(monotonic_expr(fieldX)), and
2720
        we got "const1 CMP fieldX CMP const2" interval <-- psergey-todo: change
2721 2722 2723 2724
      */
      DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
                                                    ppar->range_param.
                                                    key_parts););
2725 2726 2727 2728 2729 2730 2731 2732 2733 2734
      res= ppar->part_info->
           get_part_iter_for_interval(ppar->part_info,
                                      FALSE,
                                      key_tree->min_value, 
                                      key_tree->max_value,
                                      key_tree->min_flag | key_tree->max_flag,
                                      &ppar->part_iter);
      if (!res)
        goto go_right; /* res=0 --> no satisfying partitions */
      if (res == -1)
2735
      {
2736 2737
        //get a full range iterator
        init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
2738 2739
      }
      /* 
2740
        Save our intent to mark full partition as used if we will not be able
2741 2742 2743 2744 2745 2746
        to obtain further limits on subpartitions
      */
      set_full_part_if_bad_ret= TRUE;
      goto process_next_key_part;
    }

2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782
    if (partno == ppar->last_subpart_partno && 
        (NULL != ppar->part_info->get_subpart_iter_for_interval))
    {
      PARTITION_ITERATOR subpart_iter;
      DBUG_EXECUTE("info", dbug_print_segment_range(key_tree,
                                                    ppar->range_param.
                                                    key_parts););
      res= ppar->part_info->
           get_subpart_iter_for_interval(ppar->part_info,
                                         TRUE,
                                         key_tree->min_value, 
                                         key_tree->max_value,
                                         key_tree->min_flag | key_tree->max_flag,
                                         &subpart_iter);
      DBUG_ASSERT(res); /* We can't get "no satisfying subpartitions" */
      if (res == -1)
        return -1; /* all subpartitions satisfy */
        
      uint32 subpart_id;
      bitmap_clear_all(&ppar->subparts_bitmap);
      while ((subpart_id= subpart_iter.get_next(&subpart_iter)) != NOT_A_PARTITION_ID)
        bitmap_set_bit(&ppar->subparts_bitmap, subpart_id);

      /* Mark each partition as used in each subpartition.  */
      uint32 part_id;
      while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
              NOT_A_PARTITION_ID)
      {
        for (uint i= 0; i < ppar->part_info->no_subparts; i++)
          if (bitmap_is_set(&ppar->subparts_bitmap, i))
            bitmap_set_bit(&ppar->part_info->used_partitions,
                           part_id * ppar->part_info->no_subparts + i);
      }
      goto go_right;
    }

2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797
    if (key_tree->is_singlepoint())
    {
      pushed= TRUE;
      ppar->cur_part_fields+=    ppar->is_part_keypart[partno];
      ppar->cur_subpart_fields+= ppar->is_subpart_keypart[partno];
      *(ppar->arg_stack_end++) = key_tree;

      if (partno == ppar->last_part_partno &&
          ppar->cur_part_fields == ppar->part_fields)
      {
        /* 
          Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all partitioning
          fields. Save all constN constants into table record buffer.
        */
        store_selargs_to_rec(ppar, ppar->arg_stack, ppar->part_fields);
2798
        DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack,
2799 2800
                                                       ppar->part_fields););
        uint32 part_id;
2801
        longlong func_value;
2802
        /* Find in which partition the {const1, ...,constN} tuple goes */
2803 2804
        if (ppar->get_top_partition_id_func(ppar->part_info, &part_id,
                                            &func_value))
2805 2806 2807 2808 2809
        {
          res= 0; /* No satisfying partitions */
          goto pop_and_go_right;
        }
        /* Rembember the limit we got - single partition #part_id */
2810
        init_single_partition_iterator(part_id, &ppar->part_iter);
2811 2812 2813 2814 2815 2816 2817 2818 2819
        
        /*
          If there are no subpartitions/we fail to get any limit for them, 
          then we'll mark full partition as used. 
        */
        set_full_part_if_bad_ret= TRUE;
        goto process_next_key_part;
      }

2820 2821
      if (partno == ppar->last_subpart_partno &&
          ppar->cur_subpart_fields == ppar->subpart_fields)
2822 2823 2824 2825 2826 2827 2828
      {
        /* 
          Ok, we've got "fieldN<=>constN"-type SEL_ARGs for all subpartitioning
          fields. Save all constN constants into table record buffer.
        */
        store_selargs_to_rec(ppar, ppar->arg_stack_end - ppar->subpart_fields,
                             ppar->subpart_fields);
2829
        DBUG_EXECUTE("info", dbug_print_singlepoint_range(ppar->arg_stack_end- 
2830 2831 2832 2833 2834 2835 2836
                                                       ppar->subpart_fields,
                                                       ppar->subpart_fields););
        /* Find the subpartition (it's HASH/KEY so we always have one) */
        partition_info *part_info= ppar->part_info;
        uint32 subpart_id= part_info->get_subpartition_id(part_info);
        
        /* Mark this partition as used in each subpartition. */
2837 2838 2839
        uint32 part_id;
        while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) !=
                NOT_A_PARTITION_ID)
2840 2841
        {
          bitmap_set_bit(&part_info->used_partitions,
2842
                         part_id * part_info->no_subparts + subpart_id);
2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862
        }
        res= 1; /* Some partitions were marked as used */
        goto pop_and_go_right;
      }
    }
    else
    {
      /* 
        Can't handle condition on current key part. If we're that deep that 
        we're processing subpartititoning's key parts, this means we'll not be
        able to infer any suitable condition, so bail out.
      */
      if (partno >= ppar->last_part_partno)
        return -1;
    }
  }

process_next_key_part:
  if (key_tree->next_key_part)
    res= find_used_partitions(ppar, key_tree->next_key_part);
2863
  else
2864
    res= -1;
2865 2866
 
  if (set_full_part_if_bad_ret)
2867
  {
2868
    if (res == -1)
2869
    {
2870 2871 2872 2873
      /* Got "full range" for subpartitioning fields */
      uint32 part_id;
      bool found= FALSE;
      while ((part_id= ppar->part_iter.get_next(&ppar->part_iter)) != NOT_A_PARTITION_ID)
2874
      {
2875 2876
        ppar->mark_full_partition_used(ppar->part_info, part_id);
        found= TRUE;
2877
      }
2878
      res= test(found);
2879
    }
2880 2881 2882 2883
    /*
      Restore the "used partitions iterator" to the default setting that
      specifies iteration over all partitions.
    */
2884
    init_all_partitions_iterator(ppar->part_info, &ppar->part_iter);
2885 2886 2887 2888 2889 2890 2891 2892 2893 2894
  }

  if (pushed)
  {
pop_and_go_right:
    /* Pop this key part info off the "stack" */
    ppar->arg_stack_end--;
    ppar->cur_part_fields-=    ppar->is_part_keypart[partno];
    ppar->cur_subpart_fields-= ppar->is_subpart_keypart[partno];
  }
2895 2896 2897 2898

  if (res == -1)
    return -1;
go_right:
2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955
  if (key_tree->right != &null_element)
  {
    if (-1 == (right_res= find_used_partitions(ppar,key_tree->right)))
      return -1;
  }
  return (left_res || right_res || res);
}
 

static void mark_all_partitions_as_used(partition_info *part_info)
{
  bitmap_set_all(&part_info->used_partitions);
}


/*
  Check if field types allow to construct partitioning index description
 
  SYNOPSIS
    fields_ok_for_partition_index()
      pfield  NULL-terminated array of pointers to fields.

  DESCRIPTION
    For an array of fields, check if we can use all of the fields to create
    partitioning index description.
    
    We can't process GEOMETRY fields - for these fields singlepoint intervals
    cant be generated, and non-singlepoint are "special" kinds of intervals
    to which our processing logic can't be applied.

    It is not known if we could process ENUM fields, so they are disabled to be
    on the safe side.

  RETURN 
    TRUE   Yes, fields can be used in partitioning index
    FALSE  Otherwise
*/

static bool fields_ok_for_partition_index(Field **pfield)
{
  if (!pfield)
    return FALSE;
  for (; (*pfield); pfield++)
  {
    enum_field_types ftype= (*pfield)->real_type();
    if (ftype == FIELD_TYPE_ENUM || ftype == FIELD_TYPE_GEOMETRY)
      return FALSE;
  }
  return TRUE;
}


/*
  Create partition index description and fill related info in the context
  struct

  SYNOPSIS
2956
    create_partition_index_description()
2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972
      prune_par  INOUT Partition pruning context

  DESCRIPTION
    Create partition index description. Partition index description is:

      part_index(used_fields_list(part_expr), used_fields_list(subpart_expr))

    If partitioning/sub-partitioning uses BLOB or Geometry fields, then
    corresponding fields_list(...) is not included into index description
    and we don't perform partition pruning for partitions/subpartitions.

  RETURN
    TRUE   Out of memory or can't do partition pruning at all
    FALSE  OK
*/

2973
static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993
{
  RANGE_OPT_PARAM *range_par= &(ppar->range_param);
  partition_info *part_info= ppar->part_info;
  uint used_part_fields, used_subpart_fields;

  used_part_fields= fields_ok_for_partition_index(part_info->part_field_array) ?
                      part_info->no_part_fields : 0;
  used_subpart_fields= 
    fields_ok_for_partition_index(part_info->subpart_field_array)? 
      part_info->no_subpart_fields : 0;
  
  uint total_parts= used_part_fields + used_subpart_fields;

  ppar->part_fields=      used_part_fields;
  ppar->last_part_partno= (int)used_part_fields - 1;

  ppar->subpart_fields= used_subpart_fields;
  ppar->last_subpart_partno= 
    used_subpart_fields?(int)(used_part_fields + used_subpart_fields - 1): -1;

2994
  if (part_info->is_sub_partitioned())
2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016
  {
    ppar->mark_full_partition_used=  mark_full_partition_used_with_parts;
    ppar->get_top_partition_id_func= part_info->get_part_partition_id;
  }
  else
  {
    ppar->mark_full_partition_used=  mark_full_partition_used_no_parts;
    ppar->get_top_partition_id_func= part_info->get_partition_id;
  }

  KEY_PART *key_part;
  MEM_ROOT *alloc= range_par->mem_root;
  if (!total_parts || 
      !(key_part= (KEY_PART*)alloc_root(alloc, sizeof(KEY_PART)*
                                               total_parts)) ||
      !(ppar->arg_stack= (SEL_ARG**)alloc_root(alloc, sizeof(SEL_ARG*)* 
                                                      total_parts)) ||
      !(ppar->is_part_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
                                                           total_parts)) ||
      !(ppar->is_subpart_keypart= (my_bool*)alloc_root(alloc, sizeof(my_bool)*
                                                           total_parts)))
    return TRUE;
3017 3018 3019 3020 3021 3022 3023 3024 3025
 
  if (ppar->subpart_fields)
  {
    uint32 *buf;
    uint32 bufsize= bitmap_buffer_size(ppar->part_info->no_subparts);
    if (!(buf= (uint32*)alloc_root(alloc, bufsize)))
      return TRUE;
    bitmap_init(&ppar->subparts_bitmap, buf, ppar->part_info->no_subparts, FALSE);
  }
3026 3027 3028
  range_par->key_parts= key_part;
  Field **field= (ppar->part_fields)? part_info->part_field_array :
                                           part_info->subpart_field_array;
3029
  bool in_subpart_fields= FALSE;
3030 3031 3032 3033 3034 3035 3036
  for (uint part= 0; part < total_parts; part++, key_part++)
  {
    key_part->key=          0;
    key_part->part=	    part;
    key_part->length=       (*field)->pack_length_in_rec();
    /* 
      psergey-todo: check yet again if this is correct for tricky field types,
3037
      e.g. see "Fix a fatal error in decimal key handling" in open_binary_frm()
3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049
    */
    key_part->store_length= (*field)->pack_length();
    if ((*field)->real_maybe_null())
      key_part->store_length+= HA_KEY_NULL_LENGTH;
    if ((*field)->type() == FIELD_TYPE_BLOB || 
        (*field)->real_type() == MYSQL_TYPE_VARCHAR)
      key_part->store_length+= HA_KEY_BLOB_LENGTH;

    key_part->field=        (*field);
    key_part->image_type =  Field::itRAW;
    /* We don't set key_parts->null_bit as it will not be used */

3050 3051
    ppar->is_part_keypart[part]= !in_subpart_fields;
    ppar->is_subpart_keypart[part]= in_subpart_fields;
3052 3053 3054 3055
 
    if (!*(++field))
    {
      field= part_info->subpart_field_array;
3056
      in_subpart_fields= TRUE;
3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077
    }
  }
  range_par->key_parts_end= key_part;

  DBUG_EXECUTE("info", print_partitioning_index(range_par->key_parts,
                                                range_par->key_parts_end););
  return FALSE;
}


#ifndef DBUG_OFF

static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
{
  DBUG_ENTER("print_partitioning_index");
  DBUG_LOCK_FILE;
  fprintf(DBUG_FILE, "partitioning INDEX(");
  for (KEY_PART *p=parts; p != parts_end; p++)
  {
    fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name);
  }
3078
  fputs(");\n", DBUG_FILE);
3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089
  DBUG_UNLOCK_FILE;
  DBUG_VOID_RETURN;
}

/* Print field value into debug trace, in NULL-aware way. */
static void dbug_print_field(Field *field)
{
  if (field->is_real_null())
    fprintf(DBUG_FILE, "NULL");
  else
  {
3090 3091 3092
    char buf[256];
    String str(buf, sizeof(buf), &my_charset_bin);
    str.length(0);
3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106
    String *pstr;
    pstr= field->val_str(&str);
    fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe());
  }
}


/* Print a "c1 < keypartX < c2" - type interval into debug trace. */
static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
{
  DBUG_ENTER("dbug_print_segment_range");
  DBUG_LOCK_FILE;
  if (!(arg->min_flag & NO_MIN_RANGE))
  {
3107
    store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length);
3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122
    dbug_print_field(part->field);
    if (arg->min_flag & NEAR_MIN)
      fputs(" < ", DBUG_FILE);
    else
      fputs(" <= ", DBUG_FILE);
  }

  fprintf(DBUG_FILE, "%s", part->field->field_name);

  if (!(arg->max_flag & NO_MAX_RANGE))
  {
    if (arg->max_flag & NEAR_MAX)
      fputs(" < ", DBUG_FILE);
    else
      fputs(" <= ", DBUG_FILE);
3123
    store_key_image_to_rec(part->field, (char*)(arg->max_value), part->length);
3124 3125
    dbug_print_field(part->field);
  }
3126
  fputs("\n", DBUG_FILE);
3127 3128 3129 3130 3131 3132 3133 3134 3135
  DBUG_UNLOCK_FILE;
  DBUG_VOID_RETURN;
}


/*
  Print a singlepoint multi-keypart range interval to debug trace
 
  SYNOPSIS
3136
    dbug_print_singlepoint_range()
3137 3138 3139 3140 3141 3142 3143 3144
      start  Array of SEL_ARG* ptrs representing conditions on key parts
      num    Number of elements in the array.

  DESCRIPTION
    This function prints a "keypartN=constN AND ... AND keypartK=constK"-type 
    interval to debug trace.
*/

3145
static void dbug_print_singlepoint_range(SEL_ARG **start, uint num)
3146
{
3147
  DBUG_ENTER("dbug_print_singlepoint_range");
3148 3149
  DBUG_LOCK_FILE;
  SEL_ARG **end= start + num;
3150

3151 3152 3153 3154 3155 3156
  for (SEL_ARG **arg= start; arg != end; arg++)
  {
    Field *field= (*arg)->field;
    fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name);
    dbug_print_field(field);
  }
3157
  fputs("\n", DBUG_FILE);
3158 3159 3160 3161 3162 3163 3164 3165 3166 3167
  DBUG_UNLOCK_FILE;
  DBUG_VOID_RETURN;
}
#endif

/****************************************************************************
 * Partition pruning code ends
 ****************************************************************************/
#endif

3168

3169
/*
3170 3171 3172 3173
  Get cost of 'sweep' full records retrieval.
  SYNOPSIS
    get_sweep_read_cost()
      param            Parameter from test_quick_select
3174
      records          # of records to be retrieved
3175
  RETURN
3176
    cost of sweep
3177
*/
3178

3179
double get_sweep_read_cost(const PARAM *param, ha_rows records)
3180
{
3181
  double result;
3182
  DBUG_ENTER("get_sweep_read_cost");
3183 3184
  if (param->table->file->primary_key_is_clustered())
  {
3185
    result= param->table->file->read_time(param->table->s->primary_key,
3186
                                          records, records);
3187 3188
  }
  else
3189
  {
3190
    double n_blocks=
3191
      ceil(ulonglong2double(param->table->file->data_file_length) / IO_SIZE);
3192 3193 3194 3195
    double busy_blocks=
      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(records)));
    if (busy_blocks < 1.0)
      busy_blocks= 1.0;
3196
    DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
3197
                       busy_blocks));
3198
    /*
3199
      Disabled: Bail out if # of blocks to read is bigger than # of blocks in
3200 3201 3202 3203 3204 3205 3206 3207
      table data file.
    if (max_cost != DBL_MAX  && (busy_blocks+index_reads_cost) >= n_blocks)
      return 1;
    */
    JOIN *join= param->thd->lex->select_lex.join;
    if (!join || join->tables == 1)
    {
      /* No join, assume reading is done in one 'sweep' */
3208
      result= busy_blocks*(DISK_SEEK_BASE_COST +
3209 3210 3211 3212
                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
    }
    else
    {
3213
      /*
3214 3215 3216
        Possibly this is a join with source table being non-last table, so
        assume that disk seeks are random here.
      */
3217
      result= busy_blocks;
3218 3219
    }
  }
3220
  DBUG_PRINT("info",("returning cost=%g", result));
3221
  DBUG_RETURN(result);
3222
}
3223 3224


3225 3226 3227 3228
/*
  Get best plan for a SEL_IMERGE disjunctive expression.
  SYNOPSIS
    get_best_disjunct_quick()
3229 3230
      param     Parameter from check_quick_select function
      imerge    Expression to use
3231
      read_time Don't create scans with cost > read_time
3232

3233
  NOTES
3234
    index_merge cost is calculated as follows:
3235
    index_merge_cost =
3236 3237 3238 3239 3240
      cost(index_reads) +         (see #1)
      cost(rowid_to_row_scan) +   (see #2)
      cost(unique_use)            (see #3)

    1. cost(index_reads) =SUM_i(cost(index_read_i))
3241 3242
       For non-CPK scans,
         cost(index_read_i) = {cost of ordinary 'index only' scan}
3243 3244 3245 3246 3247
       For CPK scan,
         cost(index_read_i) = {cost of non-'index only' scan}

    2. cost(rowid_to_row_scan)
      If table PK is clustered then
3248
        cost(rowid_to_row_scan) =
3249
          {cost of ordinary clustered PK scan with n_ranges=n_rows}
3250 3251

      Otherwise, we use the following model to calculate costs:
3252
      We need to retrieve n_rows rows from file that occupies n_blocks blocks.
3253
      We assume that offsets of rows we need are independent variates with
3254
      uniform distribution in [0..max_file_offset] range.
3255

3256 3257
      We'll denote block as "busy" if it contains row(s) we need to retrieve
      and "empty" if doesn't contain rows we need.
3258

3259
      Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
3260
      applies to any block in file). Let x_i be a variate taking value 1 if
3261
      block #i is empty and 0 otherwise.
3262

3263 3264
      Then E(x_i) = (1 - 1/n_blocks)^n_rows;

3265 3266
      E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
        = n_blocks * ((1 - 1/n_blocks)^n_rows) =
3267 3268 3269 3270
       ~= n_blocks * exp(-n_rows/n_blocks).

      E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
       ~= n_blocks * (1 - exp(-n_rows/n_blocks)).
3271

3272 3273
      Average size of "hole" between neighbor non-empty blocks is
           E(hole_size) = n_blocks/E(n_busy_blocks).
3274

3275 3276 3277 3278 3279 3280
      The total cost of reading all needed blocks in one "sweep" is:

      E(n_busy_blocks)*
       (DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).

    3. Cost of Unique use is calculated in Unique::get_use_cost function.
3281 3282 3283 3284 3285

  ROR-union cost is calculated in the same way index_merge, but instead of
  Unique a priority queue is used.

  RETURN
3286 3287
    Created read plan
    NULL - Out of memory or no read scan could be built.
3288
*/
3289

3290 3291
static
TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
3292
                                         double read_time)
3293 3294 3295 3296 3297 3298 3299
{
  SEL_TREE **ptree;
  TRP_INDEX_MERGE *imerge_trp= NULL;
  uint n_child_scans= imerge->trees_next - imerge->trees;
  TRP_RANGE **range_scans;
  TRP_RANGE **cur_child;
  TRP_RANGE **cpk_scan= NULL;
monty@mysql.com's avatar
monty@mysql.com committed
3300
  bool imerge_too_expensive= FALSE;
3301 3302 3303 3304
  double imerge_cost= 0.0;
  ha_rows cpk_scan_records= 0;
  ha_rows non_cpk_scan_records= 0;
  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
monty@mysql.com's avatar
monty@mysql.com committed
3305 3306
  bool all_scans_ror_able= TRUE;
  bool all_scans_rors= TRUE;
3307 3308 3309 3310 3311 3312 3313 3314 3315
  uint unique_calc_buff_size;
  TABLE_READ_PLAN **roru_read_plans;
  TABLE_READ_PLAN **cur_roru_plan;
  double roru_index_costs;
  ha_rows roru_total_records;
  double roru_intersect_part= 1.0;
  DBUG_ENTER("get_best_disjunct_quick");
  DBUG_PRINT("info", ("Full table scan cost =%g", read_time));

3316
  if (!(range_scans= (TRP_RANGE**)alloc_root(param->mem_root,
3317 3318 3319
                                             sizeof(TRP_RANGE*)*
                                             n_child_scans)))
    DBUG_RETURN(NULL);
3320
  /*
3321 3322 3323
    Collect best 'range' scan for each of disjuncts, and, while doing so,
    analyze possibility of ROR scans. Also calculate some values needed by
    other parts of the code.
3324
  */
3325
  for (ptree= imerge->trees, cur_child= range_scans;
3326
       ptree != imerge->trees_next;
3327
       ptree++, cur_child++)
3328
  {
3329 3330
    DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
                                        "tree in SEL_IMERGE"););
monty@mysql.com's avatar
monty@mysql.com committed
3331
    if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, read_time)))
3332 3333
    {
      /*
3334
        One of index scans in this index_merge is more expensive than entire
3335 3336 3337
        table read for another available option. The entire index_merge (and
        any possible ROR-union) will be more expensive then, too. We continue
        here only to update SQL_SELECT members.
3338
      */
monty@mysql.com's avatar
monty@mysql.com committed
3339
      imerge_too_expensive= TRUE;
3340 3341 3342
    }
    if (imerge_too_expensive)
      continue;
3343

3344 3345 3346
    imerge_cost += (*cur_child)->read_cost;
    all_scans_ror_able &= ((*ptree)->n_ror_scans > 0);
    all_scans_rors &= (*cur_child)->is_ror;
3347
    if (pk_is_clustered &&
3348 3349
        param->real_keynr[(*cur_child)->key_idx] ==
        param->table->s->primary_key)
3350
    {
3351 3352
      cpk_scan= cur_child;
      cpk_scan_records= (*cur_child)->records;
3353 3354
    }
    else
3355
      non_cpk_scan_records += (*cur_child)->records;
3356
  }
3357

3358
  DBUG_PRINT("info", ("index_merge scans cost=%g", imerge_cost));
3359
  if (imerge_too_expensive || (imerge_cost > read_time) ||
3360 3361
      (non_cpk_scan_records+cpk_scan_records >= param->table->file->records) &&
      read_time != DBL_MAX)
3362
  {
3363 3364
    /*
      Bail out if it is obvious that both index_merge and ROR-union will be
3365
      more expensive
3366
    */
3367 3368
    DBUG_PRINT("info", ("Sum of index_merge scans is more expensive than "
                        "full table scan, bailing out"));
3369
    DBUG_RETURN(NULL);
3370
  }
3371
  if (all_scans_rors)
3372
  {
3373 3374
    roru_read_plans= (TABLE_READ_PLAN**)range_scans;
    goto skip_to_ror_scan;
3375
  }
3376 3377
  if (cpk_scan)
  {
3378 3379
    /*
      Add one ROWID comparison for each row retrieved on non-CPK scan.  (it
3380 3381 3382
      is done in QUICK_RANGE_SELECT::row_in_ranges)
     */
    imerge_cost += non_cpk_scan_records / TIME_FOR_COMPARE_ROWID;
3383 3384 3385
  }

  /* Calculate cost(rowid_to_row_scan) */
3386
  imerge_cost += get_sweep_read_cost(param, non_cpk_scan_records);
3387
  DBUG_PRINT("info",("index_merge cost with rowid-to-row scan: %g",
3388
                     imerge_cost));
3389 3390
  if (imerge_cost > read_time)
    goto build_ror_index_merge;
3391 3392

  /* Add Unique operations cost */
3393 3394
  unique_calc_buff_size=
    Unique::get_cost_calc_buff_size(non_cpk_scan_records,
3395 3396 3397 3398 3399 3400
                                    param->table->file->ref_length,
                                    param->thd->variables.sortbuff_size);
  if (param->imerge_cost_buff_size < unique_calc_buff_size)
  {
    if (!(param->imerge_cost_buff= (uint*)alloc_root(param->mem_root,
                                                     unique_calc_buff_size)))
3401
      DBUG_RETURN(NULL);
3402 3403 3404
    param->imerge_cost_buff_size= unique_calc_buff_size;
  }

3405
  imerge_cost +=
3406
    Unique::get_use_cost(param->imerge_cost_buff, non_cpk_scan_records,
3407 3408
                         param->table->file->ref_length,
                         param->thd->variables.sortbuff_size);
3409
  DBUG_PRINT("info",("index_merge total cost: %g (wanted: less then %g)",
3410 3411 3412 3413 3414 3415 3416
                     imerge_cost, read_time));
  if (imerge_cost < read_time)
  {
    if ((imerge_trp= new (param->mem_root)TRP_INDEX_MERGE))
    {
      imerge_trp->read_cost= imerge_cost;
      imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
3417
      imerge_trp->records= min(imerge_trp->records,
3418 3419 3420 3421 3422 3423
                               param->table->file->records);
      imerge_trp->range_scans= range_scans;
      imerge_trp->range_scans_end= range_scans + n_child_scans;
      read_time= imerge_cost;
    }
  }
3424

3425
build_ror_index_merge:
3426 3427
  if (!all_scans_ror_able || param->thd->lex->sql_command == SQLCOM_DELETE)
    DBUG_RETURN(imerge_trp);
3428

3429 3430
  /* Ok, it is possible to build a ROR-union, try it. */
  bool dummy;
3431
  if (!(roru_read_plans=
3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444
          (TABLE_READ_PLAN**)alloc_root(param->mem_root,
                                        sizeof(TABLE_READ_PLAN*)*
                                        n_child_scans)))
    DBUG_RETURN(imerge_trp);
skip_to_ror_scan:
  roru_index_costs= 0.0;
  roru_total_records= 0;
  cur_roru_plan= roru_read_plans;

  /* Find 'best' ROR scan for each of trees in disjunction */
  for (ptree= imerge->trees, cur_child= range_scans;
       ptree != imerge->trees_next;
       ptree++, cur_child++, cur_roru_plan++)
3445
  {
3446 3447
    /*
      Assume the best ROR scan is the one that has cheapest full-row-retrieval
3448 3449
      scan cost.
      Also accumulate index_only scan costs as we'll need them to calculate
3450 3451 3452 3453 3454 3455 3456
      overall index_intersection cost.
    */
    double cost;
    if ((*cur_child)->is_ror)
    {
      /* Ok, we have index_only cost, now get full rows scan cost */
      cost= param->table->file->
3457
              read_time(param->real_keynr[(*cur_child)->key_idx], 1,
3458 3459 3460 3461 3462 3463 3464
                        (*cur_child)->records) +
              rows2double((*cur_child)->records) / TIME_FOR_COMPARE;
    }
    else
      cost= read_time;

    TABLE_READ_PLAN *prev_plan= *cur_child;
3465
    if (!(*cur_roru_plan= get_best_ror_intersect(param, *ptree, cost,
3466 3467 3468 3469 3470 3471 3472 3473 3474
                                                 &dummy)))
    {
      if (prev_plan->is_ror)
        *cur_roru_plan= prev_plan;
      else
        DBUG_RETURN(imerge_trp);
      roru_index_costs += (*cur_roru_plan)->read_cost;
    }
    else
3475 3476
      roru_index_costs +=
        ((TRP_ROR_INTERSECT*)(*cur_roru_plan))->index_scan_costs;
3477
    roru_total_records += (*cur_roru_plan)->records;
3478
    roru_intersect_part *= (*cur_roru_plan)->records /
3479
                           param->table->file->records;
3480
  }
3481

3482 3483
  /*
    rows to retrieve=
3484
      SUM(rows_in_scan_i) - table_rows * PROD(rows_in_scan_i / table_rows).
3485
    This is valid because index_merge construction guarantees that conditions
3486 3487 3488
    in disjunction do not share key parts.
  */
  roru_total_records -= (ha_rows)(roru_intersect_part*
3489 3490 3491
                                  param->table->file->records);
  /* ok, got a ROR read plan for each of the disjuncts
    Calculate cost:
3492 3493 3494 3495 3496 3497
    cost(index_union_scan(scan_1, ... scan_n)) =
      SUM_i(cost_of_index_only_scan(scan_i)) +
      queue_use_cost(rowid_len, n) +
      cost_of_row_retrieval
    See get_merge_buffers_cost function for queue_use_cost formula derivation.
  */
3498

3499
  double roru_total_cost;
3500 3501 3502
  roru_total_cost= roru_index_costs +
                   rows2double(roru_total_records)*log((double)n_child_scans) /
                   (TIME_FOR_COMPARE_ROWID * M_LN2) +
3503 3504
                   get_sweep_read_cost(param, roru_total_records);

3505
  DBUG_PRINT("info", ("ROR-union: cost %g, %d members", roru_total_cost,
3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519
                      n_child_scans));
  TRP_ROR_UNION* roru;
  if (roru_total_cost < read_time)
  {
    if ((roru= new (param->mem_root) TRP_ROR_UNION))
    {
      roru->first_ror= roru_read_plans;
      roru->last_ror= roru_read_plans + n_child_scans;
      roru->read_cost= roru_total_cost;
      roru->records= roru_total_records;
      DBUG_RETURN(roru);
    }
  }
  DBUG_RETURN(imerge_trp);
3520 3521 3522 3523 3524 3525 3526
}


/*
  Calculate cost of 'index only' scan for given index and number of records.

  SYNOPSIS
3527
    get_index_only_read_time()
3528 3529 3530 3531 3532
      param    parameters structure
      records  #of records to read
      keynr    key to read

  NOTES
3533
    It is assumed that we will read trough the whole key range and that all
3534 3535 3536 3537
    key blocks are half full (normally things are much better). It is also
    assumed that each time we read the next key from the index, the handler
    performs a random seek, thus the cost is proportional to the number of
    blocks read.
3538 3539 3540 3541 3542 3543

  TODO:
    Move this to handler->read_time() by adding a flag 'index-only-read' to
    this call. The reason for doing this is that the current function doesn't
    handle the case when the row is stored in the b-tree (like in innodb
    clustered index)
3544 3545
*/

3546
static double get_index_only_read_time(const PARAM* param, ha_rows records,
3547
                                       int keynr)
3548 3549 3550 3551 3552 3553 3554
{
  double read_time;
  uint keys_per_block= (param->table->file->block_size/2/
			(param->table->key_info[keynr].key_length+
			 param->table->file->ref_length) + 1);
  read_time=((double) (records+keys_per_block-1)/
             (double) keys_per_block);
3555
  return read_time;
3556 3557
}

3558

3559 3560
typedef struct st_ror_scan_info
{
3561 3562 3563 3564 3565
  uint      idx;      /* # of used key in param->keys */
  uint      keynr;    /* # of used key in table */
  ha_rows   records;  /* estimate of # records this scan will return */

  /* Set of intervals over key fields that will be used for row retrieval. */
3566
  SEL_ARG   *sel_arg;
3567 3568

  /* Fields used in the query and covered by this ROR scan. */
3569 3570
  MY_BITMAP covered_fields;
  uint      used_fields_covered; /* # of set bits in covered_fields */
3571
  int       key_rec_length; /* length of key record (including rowid) */
3572 3573

  /*
3574 3575
    Cost of reading all index records with values in sel_arg intervals set
    (assuming there is no need to access full table records)
3576 3577
  */
  double    index_read_cost;
3578 3579 3580
  uint      first_uncovered_field; /* first unused bit in covered_fields */
  uint      key_components; /* # of parts in the key */
} ROR_SCAN_INFO;
3581 3582 3583


/*
3584
  Create ROR_SCAN_INFO* structure with a single ROR scan on index idx using
3585
  sel_arg set of intervals.
3586

3587 3588
  SYNOPSIS
    make_ror_scan()
3589 3590 3591
      param    Parameter from test_quick_select function
      idx      Index of key in param->keys
      sel_arg  Set of intervals for a given key
3592

3593
  RETURN
3594
    NULL - out of memory
3595
    ROR scan structure containing a scan for {idx, sel_arg}
3596 3597 3598 3599 3600 3601
*/

static
ROR_SCAN_INFO *make_ror_scan(const PARAM *param, int idx, SEL_ARG *sel_arg)
{
  ROR_SCAN_INFO *ror_scan;
3602
  uint32 *bitmap_buf;
3603 3604
  uint keynr;
  DBUG_ENTER("make_ror_scan");
3605

3606 3607 3608 3609 3610 3611
  if (!(ror_scan= (ROR_SCAN_INFO*)alloc_root(param->mem_root,
                                             sizeof(ROR_SCAN_INFO))))
    DBUG_RETURN(NULL);

  ror_scan->idx= idx;
  ror_scan->keynr= keynr= param->real_keynr[idx];
3612 3613
  ror_scan->key_rec_length= (param->table->key_info[keynr].key_length +
                             param->table->file->ref_length);
3614 3615
  ror_scan->sel_arg= sel_arg;
  ror_scan->records= param->table->quick_rows[keynr];
3616

3617
  if (!(bitmap_buf= (uint32*)alloc_root(param->mem_root,
monty@mysql.com's avatar
monty@mysql.com committed
3618
                                        param->fields_bitmap_size)))
3619
    DBUG_RETURN(NULL);
3620

3621
  if (bitmap_init(&ror_scan->covered_fields, bitmap_buf,
monty@mysql.com's avatar
monty@mysql.com committed
3622
                  param->fields_bitmap_size*8, FALSE))
3623 3624
    DBUG_RETURN(NULL);
  bitmap_clear_all(&ror_scan->covered_fields);
3625

3626
  KEY_PART_INFO *key_part= param->table->key_info[keynr].key_part;
3627
  KEY_PART_INFO *key_part_end= key_part +
3628 3629 3630 3631 3632 3633
                               param->table->key_info[keynr].key_parts;
  for (;key_part != key_part_end; ++key_part)
  {
    if (bitmap_is_set(&param->needed_fields, key_part->fieldnr))
      bitmap_set_bit(&ror_scan->covered_fields, key_part->fieldnr);
  }
3634
  ror_scan->index_read_cost=
3635 3636 3637 3638 3639 3640
    get_index_only_read_time(param, param->table->quick_rows[ror_scan->keynr],
                             ror_scan->keynr);
  DBUG_RETURN(ror_scan);
}


3641
/*
3642 3643 3644 3645 3646 3647 3648
  Compare two ROR_SCAN_INFO** by  E(#records_matched) * key_record_length.
  SYNOPSIS
    cmp_ror_scan_info()
      a ptr to first compared value
      b ptr to second compared value

  RETURN
3649
   -1 a < b
3650 3651
    0 a = b
    1 a > b
3652
*/
3653

3654
static int cmp_ror_scan_info(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
3655 3656 3657 3658 3659 3660 3661
{
  double val1= rows2double((*a)->records) * (*a)->key_rec_length;
  double val2= rows2double((*b)->records) * (*b)->key_rec_length;
  return (val1 < val2)? -1: (val1 == val2)? 0 : 1;
}

/*
3662 3663 3664
  Compare two ROR_SCAN_INFO** by
   (#covered fields in F desc,
    #components asc,
3665
    number of first not covered component asc)
3666 3667 3668 3669 3670 3671 3672

  SYNOPSIS
    cmp_ror_scan_info_covering()
      a ptr to first compared value
      b ptr to second compared value

  RETURN
3673
   -1 a < b
3674 3675
    0 a = b
    1 a > b
3676
*/
3677

3678
static int cmp_ror_scan_info_covering(ROR_SCAN_INFO** a, ROR_SCAN_INFO** b)
3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694
{
  if ((*a)->used_fields_covered > (*b)->used_fields_covered)
    return -1;
  if ((*a)->used_fields_covered < (*b)->used_fields_covered)
    return 1;
  if ((*a)->key_components < (*b)->key_components)
    return -1;
  if ((*a)->key_components > (*b)->key_components)
    return 1;
  if ((*a)->first_uncovered_field < (*b)->first_uncovered_field)
    return -1;
  if ((*a)->first_uncovered_field > (*b)->first_uncovered_field)
    return 1;
  return 0;
}

3695

3696
/* Auxiliary structure for incremental ROR-intersection creation */
3697
typedef struct
3698 3699 3700
{
  const PARAM *param;
  MY_BITMAP covered_fields; /* union of fields covered by all scans */
3701
  /*
3702
    Fraction of table records that satisfies conditions of all scans.
3703
    This is the number of full records that will be retrieved if a
3704 3705
    non-index_only index intersection will be employed.
  */
3706 3707 3708 3709
  double out_rows;
  /* TRUE if covered_fields is a superset of needed_fields */
  bool is_covering;

3710
  ha_rows index_records; /* sum(#records to look in indexes) */
3711 3712
  double index_scan_costs; /* SUM(cost of 'index-only' scans) */
  double total_cost;
3713
} ROR_INTERSECT_INFO;
3714 3715


3716 3717 3718 3719
/*
  Allocate a ROR_INTERSECT_INFO and initialize it to contain zero scans.

  SYNOPSIS
3720 3721 3722
    ror_intersect_init()
      param         Parameter from test_quick_select

3723 3724 3725 3726 3727 3728
  RETURN
    allocated structure
    NULL on error
*/

static
3729
ROR_INTERSECT_INFO* ror_intersect_init(const PARAM *param)
3730 3731
{
  ROR_INTERSECT_INFO *info;
3732
  uint32* buf;
3733
  if (!(info= (ROR_INTERSECT_INFO*)alloc_root(param->mem_root,
3734 3735 3736
                                              sizeof(ROR_INTERSECT_INFO))))
    return NULL;
  info->param= param;
3737
  if (!(buf= (uint32*)alloc_root(param->mem_root,
monty@mysql.com's avatar
monty@mysql.com committed
3738
                                 param->fields_bitmap_size)))
3739 3740
    return NULL;
  if (bitmap_init(&info->covered_fields, buf, param->fields_bitmap_size*8,
monty@mysql.com's avatar
monty@mysql.com committed
3741
                  FALSE))
3742
    return NULL;
3743
  info->is_covering= FALSE;
3744
  info->index_scan_costs= 0.0;
3745 3746 3747
  info->index_records= 0;
  info->out_rows= param->table->file->records;
  bitmap_clear_all(&info->covered_fields);
3748 3749 3750
  return info;
}

3751 3752 3753 3754
void ror_intersect_cpy(ROR_INTERSECT_INFO *dst, const ROR_INTERSECT_INFO *src)
{
  dst->param= src->param;
  memcpy(dst->covered_fields.bitmap, src->covered_fields.bitmap, 
3755
         no_bytes_in_map(&src->covered_fields));
3756 3757 3758 3759 3760 3761
  dst->out_rows= src->out_rows;
  dst->is_covering= src->is_covering;
  dst->index_records= src->index_records;
  dst->index_scan_costs= src->index_scan_costs;
  dst->total_cost= src->total_cost;
}
3762 3763


3764
/*
3765
  Get selectivity of a ROR scan wrt ROR-intersection.
3766

3767
  SYNOPSIS
3768 3769 3770 3771
    ror_scan_selectivity()
      info  ROR-interection 
      scan  ROR scan
      
3772
  NOTES
3773
    Suppose we have a condition on several keys
3774 3775
    cond=k_11=c_11 AND k_12=c_12 AND ...  // parts of first key
         k_21=c_21 AND k_22=c_22 AND ...  // parts of second key
3776
          ...
3777
         k_n1=c_n1 AND k_n3=c_n3 AND ...  (1) //parts of the key used by *scan
3778

3779 3780
    where k_ij may be the same as any k_pq (i.e. keys may have common parts).

3781
    A full row is retrieved if entire condition holds.
3782 3783

    The recursive procedure for finding P(cond) is as follows:
3784

3785
    First step:
3786
    Pick 1st part of 1st key and break conjunction (1) into two parts:
3787 3788
      cond= (k_11=c_11 AND R)

3789
    Here R may still contain condition(s) equivalent to k_11=c_11.
3790 3791
    Nevertheless, the following holds:

3792
      P(k_11=c_11 AND R) = P(k_11=c_11) * P(R | k_11=c_11).
3793 3794 3795 3796 3797

    Mark k_11 as fixed field (and satisfied condition) F, save P(F),
    save R to be cond and proceed to recursion step.

    Recursion step:
3798
    We have a set of fixed fields/satisfied conditions) F, probability P(F),
3799 3800 3801
    and remaining conjunction R
    Pick next key part on current key and its condition "k_ij=c_ij".
    We will add "k_ij=c_ij" into F and update P(F).
3802
    Lets denote k_ij as t,  R = t AND R1, where R1 may still contain t. Then
3803

3804
     P((t AND R1)|F) = P(t|F) * P(R1|t|F) = P(t|F) * P(R1|(t AND F)) (2)
3805 3806 3807 3808 3809 3810 3811

    (where '|' mean conditional probability, not "or")

    Consider the first multiplier in (2). One of the following holds:
    a) F contains condition on field used in t (i.e. t AND F = F).
      Then P(t|F) = 1

3812 3813
    b) F doesn't contain condition on field used in t. Then F and t are
     considered independent.
3814

3815
     P(t|F) = P(t|(fields_before_t_in_key AND other_fields)) =
3816 3817
          = P(t|fields_before_t_in_key).

3818 3819
     P(t|fields_before_t_in_key) = #records(fields_before_t_in_key) /
                                   #records(fields_before_t_in_key, t)
3820 3821

    The second multiplier is calculated by applying this step recursively.
3822

3823 3824 3825 3826 3827
  IMPLEMENTATION
    This function calculates the result of application of the "recursion step"
    described above for all fixed key members of a single key, accumulating set
    of covered fields, selectivity, etc.

3828
    The calculation is conducted as follows:
3829
    Lets denote #records(keypart1, ... keypartK) as n_k. We need to calculate
3830

3831 3832
     n_{k1}      n_{k_2}
    --------- * ---------  * .... (3)
3833
     n_{k1-1}    n_{k2_1}
3834

3835 3836 3837 3838
    where k1,k2,... are key parts which fields were not yet marked as fixed
    ( this is result of application of option b) of the recursion step for
      parts of a single key).
    Since it is reasonable to expect that most of the fields are not marked
3839
    as fixed, we calculate (3) as
3840 3841 3842

                                  n_{i1}      n_{i_2}
    (3) = n_{max_key_part}  / (   --------- * ---------  * ....  )
3843 3844 3845 3846
                                  n_{i1-1}    n_{i2_1}

    where i1,i2, .. are key parts that were already marked as fixed.

3847 3848
    In order to minimize number of expensive records_in_range calls we group
    and reduce adjacent fractions.
3849

3850
  RETURN
3851 3852
    Selectivity of given ROR scan.
    
3853 3854
*/

3855 3856
static double ror_scan_selectivity(const ROR_INTERSECT_INFO *info, 
                                   const ROR_SCAN_INFO *scan)
3857 3858
{
  double selectivity_mult= 1.0;
3859
  KEY_PART_INFO *key_part= info->param->table->key_info[scan->keynr].key_part;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
3860
  byte key_val[MAX_KEY_LENGTH+MAX_FIELD_WIDTH]; /* key values tuple */
3861
  char *key_ptr= (char*) key_val;
3862 3863
  SEL_ARG *sel_arg, *tuple_arg= NULL;
  bool cur_covered;
3864 3865
  bool prev_covered= test(bitmap_is_set(&info->covered_fields,
                                        key_part->fieldnr));
sergefp@mysql.com's avatar
sergefp@mysql.com committed
3866 3867 3868 3869 3870 3871
  key_range min_range;
  key_range max_range;
  min_range.key= (byte*) key_val;
  min_range.flag= HA_READ_KEY_EXACT;
  max_range.key= (byte*) key_val;
  max_range.flag= HA_READ_AFTER_KEY;
3872 3873
  ha_rows prev_records= info->param->table->file->records;
  DBUG_ENTER("ror_intersect_selectivity");
3874 3875 3876

  for (sel_arg= scan->sel_arg; sel_arg;
       sel_arg= sel_arg->next_key_part)
3877
  {
3878
    DBUG_PRINT("info",("sel_arg step"));
3879
    cur_covered= test(bitmap_is_set(&info->covered_fields,
3880
                                    key_part[sel_arg->part].fieldnr));
3881
    if (cur_covered != prev_covered)
3882
    {
3883
      /* create (part1val, ..., part{n-1}val) tuple. */
3884 3885
      ha_rows records;
      if (!tuple_arg)
3886
      {
3887 3888
        tuple_arg= scan->sel_arg;
        /* Here we use the length of the first key part */
3889
        tuple_arg->store_min(key_part->store_length, &key_ptr, 0);
3890 3891 3892 3893
      }
      while (tuple_arg->next_key_part != sel_arg)
      {
        tuple_arg= tuple_arg->next_key_part;
3894
        tuple_arg->store_min(key_part[tuple_arg->part].store_length, &key_ptr, 0);
3895
      }
3896
      min_range.length= max_range.length= ((char*) key_ptr - (char*) key_val);
3897 3898
      records= (info->param->table->file->
                records_in_range(scan->keynr, &min_range, &max_range));
3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909
      if (cur_covered)
      {
        /* uncovered -> covered */
        double tmp= rows2double(records)/rows2double(prev_records);
        DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
        selectivity_mult *= tmp;
        prev_records= HA_POS_ERROR;
      }
      else
      {
        /* covered -> uncovered */
3910
        prev_records= records;
3911
      }
3912
    }
3913 3914 3915 3916
    prev_covered= cur_covered;
  }
  if (!prev_covered)
  {
3917
    double tmp= rows2double(info->param->table->quick_rows[scan->keynr]) /
3918 3919
                rows2double(prev_records);
    DBUG_PRINT("info", ("Selectivity multiplier: %g", tmp));
3920
    selectivity_mult *= tmp;
3921
  }
3922 3923 3924
  DBUG_PRINT("info", ("Returning multiplier: %g", selectivity_mult));
  DBUG_RETURN(selectivity_mult);
}
3925

3926

3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963
/*
  Check if adding a ROR scan to a ROR-intersection reduces its cost of
  ROR-intersection and if yes, update parameters of ROR-intersection,
  including its cost.

  SYNOPSIS
    ror_intersect_add()
      param        Parameter from test_quick_select
      info         ROR-intersection structure to add the scan to.
      ror_scan     ROR scan info to add.
      is_cpk_scan  If TRUE, add the scan as CPK scan (this can be inferred
                   from other parameters and is passed separately only to
                   avoid duplicating the inference code)

  NOTES
    Adding a ROR scan to ROR-intersect "makes sense" iff the cost of ROR-
    intersection decreases. The cost of ROR-intersection is calculated as
    follows:

    cost= SUM_i(key_scan_cost_i) + cost_of_full_rows_retrieval

    When we add a scan the first increases and the second decreases.

    cost_of_full_rows_retrieval=
      (union of indexes used covers all needed fields) ?
        cost_of_sweep_read(E(rows_to_retrieve), rows_in_table) :
        0

    E(rows_to_retrieve) = #rows_in_table * ror_scan_selectivity(null, scan1) *
                           ror_scan_selectivity({scan1}, scan2) * ... *
                           ror_scan_selectivity({scan1,...}, scanN). 
  RETURN
    TRUE   ROR scan added to ROR-intersection, cost updated.
    FALSE  It doesn't make sense to add this ROR scan to this ROR-intersection.
*/

static bool ror_intersect_add(ROR_INTERSECT_INFO *info,
3964
                              ROR_SCAN_INFO* ror_scan, bool is_cpk_scan)
3965 3966 3967 3968 3969 3970 3971 3972 3973 3974
{
  double selectivity_mult= 1.0;

  DBUG_ENTER("ror_intersect_add");
  DBUG_PRINT("info", ("Current out_rows= %g", info->out_rows));
  DBUG_PRINT("info", ("Adding scan on %s",
                      info->param->table->key_info[ror_scan->keynr].name));
  DBUG_PRINT("info", ("is_cpk_scan=%d",is_cpk_scan));

  selectivity_mult = ror_scan_selectivity(info, ror_scan);
3975 3976 3977
  if (selectivity_mult == 1.0)
  {
    /* Don't add this scan if it doesn't improve selectivity. */
3978
    DBUG_PRINT("info", ("The scan doesn't improve selectivity."));
3979
    DBUG_RETURN(FALSE);
3980
  }
3981 3982 3983 3984
  
  info->out_rows *= selectivity_mult;
  DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost));
  
3985
  if (is_cpk_scan)
3986
  {
3987 3988 3989 3990 3991 3992
    /*
      CPK scan is used to filter out rows. We apply filtering for 
      each record of every scan. Assuming 1/TIME_FOR_COMPARE_ROWID
      per check this gives us:
    */
    info->index_scan_costs += rows2double(info->index_records) / 
3993 3994 3995 3996
                              TIME_FOR_COMPARE_ROWID;
  }
  else
  {
3997
    info->index_records += info->param->table->quick_rows[ror_scan->keynr];
3998 3999
    info->index_scan_costs += ror_scan->index_read_cost;
    bitmap_union(&info->covered_fields, &ror_scan->covered_fields);
4000 4001 4002 4003 4004 4005
    if (!info->is_covering && bitmap_is_subset(&info->param->needed_fields,
                                               &info->covered_fields))
    {
      DBUG_PRINT("info", ("ROR-intersect is covering now"));
      info->is_covering= TRUE;
    }
4006
  }
4007

4008
  info->total_cost= info->index_scan_costs;
4009
  DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost));
4010 4011
  if (!info->is_covering)
  {
4012 4013 4014
    info->total_cost += 
      get_sweep_read_cost(info->param, double2rows(info->out_rows));
    DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost));
4015
  }
4016
  DBUG_PRINT("info", ("New out_rows= %g", info->out_rows));
4017
  DBUG_PRINT("info", ("New cost= %g, %scovering", info->total_cost,
4018
                      info->is_covering?"" : "non-"));
4019
  DBUG_RETURN(TRUE);
4020 4021
}

4022

4023 4024
/*
  Get best ROR-intersection plan using non-covering ROR-intersection search
4025 4026 4027 4028
  algorithm. The returned plan may be covering.

  SYNOPSIS
    get_best_ror_intersect()
4029 4030 4031
      param            Parameter from test_quick_select function.
      tree             Transformed restriction condition to be used to look
                       for ROR scans.
4032
      read_time        Do not return read plans with cost > read_time.
4033
      are_all_covering [out] set to TRUE if union of all scans covers all
4034 4035
                       fields needed by the query (and it is possible to build
                       a covering ROR-intersection)
4036

4037
  NOTES
4038 4039 4040 4041 4042
    get_key_scans_params must be called before this function can be called.
    
    When this function is called by ROR-union construction algorithm it
    assumes it is building an uncovered ROR-intersection (and thus # of full
    records to be retrieved is wrong here). This is a hack.
4043

4044
  IMPLEMENTATION
4045
    The approximate best non-covering plan search algorithm is as follows:
4046

4047 4048 4049 4050
    find_min_ror_intersection_scan()
    {
      R= select all ROR scans;
      order R by (E(#records_matched) * key_record_length).
4051

4052 4053 4054 4055 4056 4057
      S= first(R); -- set of scans that will be used for ROR-intersection
      R= R-first(S);
      min_cost= cost(S);
      min_scan= make_scan(S);
      while (R is not empty)
      {
4058 4059
        firstR= R - first(R);
        if (!selectivity(S + firstR < selectivity(S)))
4060
          continue;
4061
          
4062 4063 4064 4065 4066 4067 4068 4069 4070
        S= S + first(R);
        if (cost(S) < min_cost)
        {
          min_cost= cost(S);
          min_scan= make_scan(S);
        }
      }
      return min_scan;
    }
4071

4072
    See ror_intersect_add function for ROR intersection costs.
4073

4074
    Special handling for Clustered PK scans
4075 4076
    Clustered PK contains all table fields, so using it as a regular scan in
    index intersection doesn't make sense: a range scan on CPK will be less
4077 4078
    expensive in this case.
    Clustered PK scan has special handling in ROR-intersection: it is not used
4079
    to retrieve rows, instead its condition is used to filter row references
4080
    we get from scans on other keys.
4081 4082

  RETURN
4083
    ROR-intersection table read plan
4084
    NULL if out of memory or no suitable plan found.
4085 4086
*/

4087 4088 4089 4090 4091 4092
static
TRP_ROR_INTERSECT *get_best_ror_intersect(const PARAM *param, SEL_TREE *tree,
                                          double read_time,
                                          bool *are_all_covering)
{
  uint idx;
4093
  double min_cost= DBL_MAX;
4094
  DBUG_ENTER("get_best_ror_intersect");
4095

4096
  if ((tree->n_ror_scans < 2) || !param->table->file->records)
4097
    DBUG_RETURN(NULL);
4098 4099

  /*
4100 4101
    Step1: Collect ROR-able SEL_ARGs and create ROR_SCAN_INFO for each of 
    them. Also find and save clustered PK scan if there is one.
4102
  */
4103
  ROR_SCAN_INFO **cur_ror_scan;
4104
  ROR_SCAN_INFO *cpk_scan= NULL;
4105
  uint cpk_no;
monty@mysql.com's avatar
monty@mysql.com committed
4106
  bool cpk_scan_used= FALSE;
4107

4108 4109 4110 4111
  if (!(tree->ror_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
                                                     sizeof(ROR_SCAN_INFO*)*
                                                     param->keys)))
    return NULL;
4112 4113
  cpk_no= ((param->table->file->primary_key_is_clustered()) ?
           param->table->s->primary_key : MAX_KEY);
4114

4115
  for (idx= 0, cur_ror_scan= tree->ror_scans; idx < param->keys; idx++)
4116
  {
4117
    ROR_SCAN_INFO *scan;
4118
    if (!tree->ror_scans_map.is_set(idx))
4119
      continue;
4120
    if (!(scan= make_ror_scan(param, idx, tree->keys[idx])))
4121
      return NULL;
4122
    if (param->real_keynr[idx] == cpk_no)
4123
    {
4124 4125
      cpk_scan= scan;
      tree->n_ror_scans--;
4126 4127
    }
    else
4128
      *(cur_ror_scan++)= scan;
4129
  }
4130

4131
  tree->ror_scans_end= cur_ror_scan;
4132 4133
  DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "original",
                                          tree->ror_scans,
4134 4135
                                          tree->ror_scans_end););
  /*
4136
    Ok, [ror_scans, ror_scans_end) is array of ptrs to initialized
4137 4138
    ROR_SCAN_INFO's.
    Step 2: Get best ROR-intersection using an approximate algorithm.
4139 4140 4141
  */
  qsort(tree->ror_scans, tree->n_ror_scans, sizeof(ROR_SCAN_INFO*),
        (qsort_cmp)cmp_ror_scan_info);
4142 4143
  DBUG_EXECUTE("info",print_ror_scans_arr(param->table, "ordered",
                                          tree->ror_scans,
4144
                                          tree->ror_scans_end););
4145

4146 4147 4148 4149 4150 4151 4152 4153 4154
  ROR_SCAN_INFO **intersect_scans; /* ROR scans used in index intersection */
  ROR_SCAN_INFO **intersect_scans_end;
  if (!(intersect_scans= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
                                                     sizeof(ROR_SCAN_INFO*)*
                                                     tree->n_ror_scans)))
    return NULL;
  intersect_scans_end= intersect_scans;

  /* Create and incrementally update ROR intersection. */
4155 4156 4157
  ROR_INTERSECT_INFO *intersect, *intersect_best;
  if (!(intersect= ror_intersect_init(param)) || 
      !(intersect_best= ror_intersect_init(param)))
4158
    return NULL;
4159

4160
  /* [intersect_scans,intersect_scans_best) will hold the best intersection */
4161
  ROR_SCAN_INFO **intersect_scans_best;
4162
  cur_ror_scan= tree->ror_scans;
4163
  intersect_scans_best= intersect_scans;
4164
  while (cur_ror_scan != tree->ror_scans_end && !intersect->is_covering)
4165
  {
4166
    /* S= S + first(R);  R= R - first(R); */
4167
    if (!ror_intersect_add(intersect, *cur_ror_scan, FALSE))
4168 4169 4170 4171 4172 4173
    {
      cur_ror_scan++;
      continue;
    }
    
    *(intersect_scans_end++)= *(cur_ror_scan++);
4174

4175
    if (intersect->total_cost < min_cost)
4176
    {
4177
      /* Local minimum found, save it */
4178
      ror_intersect_cpy(intersect_best, intersect);
4179
      intersect_scans_best= intersect_scans_end;
4180
      min_cost = intersect->total_cost;
4181 4182
    }
  }
4183

4184 4185 4186 4187 4188 4189
  if (intersect_scans_best == intersect_scans)
  {
    DBUG_PRINT("info", ("None of scans increase selectivity"));
    DBUG_RETURN(NULL);
  }
    
4190 4191 4192 4193
  DBUG_EXECUTE("info",print_ror_scans_arr(param->table,
                                          "best ROR-intersection",
                                          intersect_scans,
                                          intersect_scans_best););
4194

4195
  *are_all_covering= intersect->is_covering;
4196
  uint best_num= intersect_scans_best - intersect_scans;
4197 4198
  ror_intersect_cpy(intersect, intersect_best);

4199 4200
  /*
    Ok, found the best ROR-intersection of non-CPK key scans.
4201 4202
    Check if we should add a CPK scan. If the obtained ROR-intersection is 
    covering, it doesn't make sense to add CPK scan.
4203 4204
  */
  if (cpk_scan && !intersect->is_covering)
4205
  {
4206
    if (ror_intersect_add(intersect, cpk_scan, TRUE) && 
4207
        (intersect->total_cost < min_cost))
4208
    {
monty@mysql.com's avatar
monty@mysql.com committed
4209
      cpk_scan_used= TRUE;
4210
      intersect_best= intersect; //just set pointer here
4211 4212
    }
  }
4213

4214
  /* Ok, return ROR-intersect plan if we have found one */
4215
  TRP_ROR_INTERSECT *trp= NULL;
4216
  if (min_cost < read_time && (cpk_scan_used || best_num > 1))
4217
  {
4218 4219
    if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
      DBUG_RETURN(trp);
4220 4221
    if (!(trp->first_scan=
           (ROR_SCAN_INFO**)alloc_root(param->mem_root,
4222 4223 4224 4225
                                       sizeof(ROR_SCAN_INFO*)*best_num)))
      DBUG_RETURN(NULL);
    memcpy(trp->first_scan, intersect_scans, best_num*sizeof(ROR_SCAN_INFO*));
    trp->last_scan=  trp->first_scan + best_num;
4226 4227 4228 4229 4230 4231
    trp->is_covering= intersect_best->is_covering;
    trp->read_cost= intersect_best->total_cost;
    /* Prevent divisons by zero */
    ha_rows best_rows = double2rows(intersect_best->out_rows);
    if (!best_rows)
      best_rows= 1;
4232
    trp->records= best_rows;
4233 4234 4235 4236 4237
    trp->index_scan_costs= intersect_best->index_scan_costs;
    trp->cpk_scan= cpk_scan_used? cpk_scan: NULL;
    DBUG_PRINT("info", ("Returning non-covering ROR-intersect plan:"
                        "cost %g, records %lu",
                        trp->read_cost, (ulong) trp->records));
4238
  }
4239
  DBUG_RETURN(trp);
4240 4241 4242 4243
}


/*
4244
  Get best covering ROR-intersection.
4245
  SYNOPSIS
4246
    get_best_covering_ror_intersect()
4247 4248 4249
      param     Parameter from test_quick_select function.
      tree      SEL_TREE with sets of intervals for different keys.
      read_time Don't return table read plans with cost > read_time.
4250

4251 4252
  RETURN
    Best covering ROR-intersection plan
4253
    NULL if no plan found.
4254 4255

  NOTES
4256
    get_best_ror_intersect must be called for a tree before calling this
4257
    function for it.
4258
    This function invalidates tree->ror_scans member values.
4259

4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272
  The following approximate algorithm is used:
    I=set of all covering indexes
    F=set of all fields to cover
    S={}

    do {
      Order I by (#covered fields in F desc,
                  #components asc,
                  number of first not covered component asc);
      F=F-covered by first(I);
      S=S+first(I);
      I=I-first(I);
    } while F is not empty.
4273 4274
*/

4275
static
4276 4277
TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
                                                   SEL_TREE *tree,
4278
                                                   double read_time)
4279
{
4280
  ROR_SCAN_INFO **ror_scan_mark;
4281
  ROR_SCAN_INFO **ror_scans_end= tree->ror_scans_end;
4282 4283 4284 4285
  DBUG_ENTER("get_best_covering_ror_intersect");
  uint nbits= param->fields_bitmap_size*8;

  for (ROR_SCAN_INFO **scan= tree->ror_scans; scan != ror_scans_end; ++scan)
4286
    (*scan)->key_components=
4287
      param->table->key_info[(*scan)->keynr].key_parts;
4288

4289 4290
  /*
    Run covering-ROR-search algorithm.
4291
    Assume set I is [ror_scan .. ror_scans_end)
4292
  */
4293

4294 4295
  /*I=set of all covering indexes */
  ror_scan_mark= tree->ror_scans;
4296

mronstrom@mysql.com's avatar
mronstrom@mysql.com committed
4297
  uint32 int_buf[MAX_KEY/32+1];
4298
  MY_BITMAP covered_fields;
4299
  if (bitmap_init(&covered_fields, int_buf, nbits, FALSE))
4300 4301 4302 4303 4304
    DBUG_RETURN(0);
  bitmap_clear_all(&covered_fields);

  double total_cost= 0.0f;
  ha_rows records=0;
4305 4306
  bool all_covered;

4307 4308 4309 4310 4311 4312
  DBUG_PRINT("info", ("Building covering ROR-intersection"));
  DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
                                           "building covering ROR-I",
                                           ror_scan_mark, ror_scans_end););
  do {
    /*
4313
      Update changed sorting info:
4314
        #covered fields,
4315
	number of first not covered component
4316 4317 4318 4319 4320
      Calculate and save these values for each of remaining scans.
    */
    for (ROR_SCAN_INFO **scan= ror_scan_mark; scan != ror_scans_end; ++scan)
    {
      bitmap_subtract(&(*scan)->covered_fields, &covered_fields);
4321
      (*scan)->used_fields_covered=
4322
        bitmap_bits_set(&(*scan)->covered_fields);
4323
      (*scan)->first_uncovered_field=
4324 4325 4326 4327 4328 4329 4330 4331 4332
        bitmap_get_first(&(*scan)->covered_fields);
    }

    qsort(ror_scan_mark, ror_scans_end-ror_scan_mark, sizeof(ROR_SCAN_INFO*),
          (qsort_cmp)cmp_ror_scan_info_covering);

    DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
                                             "remaining scans",
                                             ror_scan_mark, ror_scans_end););
4333

4334 4335 4336
    /* I=I-first(I) */
    total_cost += (*ror_scan_mark)->index_read_cost;
    records += (*ror_scan_mark)->records;
4337
    DBUG_PRINT("info", ("Adding scan on %s",
4338 4339 4340 4341 4342 4343
                        param->table->key_info[(*ror_scan_mark)->keynr].name));
    if (total_cost > read_time)
      DBUG_RETURN(NULL);
    /* F=F-covered by first(I) */
    bitmap_union(&covered_fields, &(*ror_scan_mark)->covered_fields);
    all_covered= bitmap_is_subset(&param->needed_fields, &covered_fields);
4344 4345 4346 4347
  } while ((++ror_scan_mark < ror_scans_end) && !all_covered);
  
  if (!all_covered || (ror_scan_mark - tree->ror_scans) == 1)
    DBUG_RETURN(NULL);
4348 4349 4350 4351 4352 4353 4354 4355 4356

  /*
    Ok, [tree->ror_scans .. ror_scan) holds covering index_intersection with
    cost total_cost.
  */
  DBUG_PRINT("info", ("Covering ROR-intersect scans cost: %g", total_cost));
  DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
                                           "creating covering ROR-intersect",
                                           tree->ror_scans, ror_scan_mark););
4357

4358
  /* Add priority queue use cost. */
4359 4360
  total_cost += rows2double(records)*
                log((double)(ror_scan_mark - tree->ror_scans)) /
4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376
                (TIME_FOR_COMPARE_ROWID * M_LN2);
  DBUG_PRINT("info", ("Covering ROR-intersect full cost: %g", total_cost));

  if (total_cost > read_time)
    DBUG_RETURN(NULL);

  TRP_ROR_INTERSECT *trp;
  if (!(trp= new (param->mem_root) TRP_ROR_INTERSECT))
    DBUG_RETURN(trp);
  uint best_num= (ror_scan_mark - tree->ror_scans);
  if (!(trp->first_scan= (ROR_SCAN_INFO**)alloc_root(param->mem_root,
                                                     sizeof(ROR_SCAN_INFO*)*
                                                     best_num)))
    DBUG_RETURN(NULL);
  memcpy(trp->first_scan, ror_scan_mark, best_num*sizeof(ROR_SCAN_INFO*));
  trp->last_scan=  trp->first_scan + best_num;
monty@mysql.com's avatar
monty@mysql.com committed
4377
  trp->is_covering= TRUE;
4378 4379
  trp->read_cost= total_cost;
  trp->records= records;
4380
  trp->cpk_scan= NULL;
4381

4382 4383 4384
  DBUG_PRINT("info",
             ("Returning covering ROR-intersect plan: cost %g, records %lu",
              trp->read_cost, (ulong) trp->records));
4385
  DBUG_RETURN(trp);
4386 4387 4388
}


4389
/*
4390
  Get best "range" table read plan for given SEL_TREE.
4391
  Also update PARAM members and store ROR scans info in the SEL_TREE.
4392
  SYNOPSIS
4393
    get_key_scans_params
4394
      param        parameters from test_quick_select
4395
      tree         make range select for this SEL_TREE
monty@mysql.com's avatar
monty@mysql.com committed
4396
      index_read_must_be_used if TRUE, assume 'index only' option will be set
4397
                             (except for clustered PK indexes)
4398 4399
      read_time    don't create read plans with cost > read_time.
  RETURN
4400
    Best range read plan
4401
    NULL if no plan found or error occurred
4402 4403
*/

4404
static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
4405
                                       bool index_read_must_be_used,
4406
                                       double read_time)
4407 4408
{
  int idx;
4409 4410 4411
  SEL_ARG **key,**end, **key_to_read= NULL;
  ha_rows best_records;
  TRP_RANGE* read_plan= NULL;
4412
  bool pk_is_clustered= param->table->file->primary_key_is_clustered();
4413 4414
  DBUG_ENTER("get_key_scans_params");
  LINT_INIT(best_records); /* protected by key_to_read */
4415
  /*
4416 4417
    Note that there may be trees that have type SEL_TREE::KEY but contain no
    key reads at all, e.g. tree for expression "key1 is not null" where key1
4418
    is defined as "not null".
4419 4420
  */
  DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->keys_map,
4421 4422 4423 4424
                                      "tree scans"););
  tree->ror_scans_map.clear_all();
  tree->n_ror_scans= 0;
  for (idx= 0,key=tree->keys, end=key+param->keys;
4425 4426 4427 4428 4429 4430 4431
       key != end ;
       key++,idx++)
  {
    ha_rows found_records;
    double found_read_time;
    if (*key)
    {
4432
      uint keynr= param->real_keynr[idx];
4433 4434
      if ((*key)->type == SEL_ARG::MAYBE_KEY ||
          (*key)->maybe_flag)
4435
        param->needed_reg->set_bit(keynr);
4436

monty@mysql.com's avatar
monty@mysql.com committed
4437 4438
      bool read_index_only= index_read_must_be_used ? TRUE :
                            (bool) param->table->used_keys.is_set(keynr);
4439

4440 4441 4442 4443 4444 4445
      found_records= check_quick_select(param, idx, *key);
      if (param->is_ror_scan)
      {
        tree->n_ror_scans++;
        tree->ror_scans_map.set_bit(idx);
      }
4446
      double cpu_cost= (double) found_records / TIME_FOR_COMPARE;
4447
      if (found_records != HA_POS_ERROR && found_records > 2 &&
sergefp@mysql.com's avatar
sergefp@mysql.com committed
4448
          read_index_only &&
monty@mysql.com's avatar
monty@mysql.com committed
4449
          (param->table->file->index_flags(keynr, param->max_key_part,1) &
monty@mysql.com's avatar
monty@mysql.com committed
4450
           HA_KEYREAD_ONLY) &&
4451
          !(pk_is_clustered && keynr == param->table->s->primary_key))
4452 4453 4454 4455 4456
      {
        /*
          We can resolve this by only reading through this key. 
          0.01 is added to avoid races between range and 'index' scan.
        */
4457
        found_read_time= get_index_only_read_time(param,found_records,keynr) +
4458 4459
                         cpu_cost + 0.01;
      }
4460
      else
4461
      {
4462
        /*
4463 4464 4465
          cost(read_through_index) = cost(disk_io) + cost(row_in_range_checks)
          The row_in_range check is in QUICK_RANGE_SELECT::cmp_next function.
        */
4466 4467 4468
	found_read_time= param->table->file->read_time(keynr,
                                                       param->range_count,
                                                       found_records) +
4469 4470
			 cpu_cost + 0.01;
      }
4471 4472 4473
      DBUG_PRINT("info",("key %s: found_read_time: %g (cur. read_time: %g)",
                         param->table->key_info[keynr].name, found_read_time,
                         read_time));
4474

4475 4476
      if (read_time > found_read_time && found_records != HA_POS_ERROR
          /*|| read_time == DBL_MAX*/ )
4477
      {
4478
        read_time=    found_read_time;
4479
        best_records= found_records;
4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495
        key_to_read=  key;
      }

    }
  }

  DBUG_EXECUTE("info", print_sel_tree(param, tree, &tree->ror_scans_map,
                                      "ROR scans"););
  if (key_to_read)
  {
    idx= key_to_read - tree->keys;
    if ((read_plan= new (param->mem_root) TRP_RANGE(*key_to_read, idx)))
    {
      read_plan->records= best_records;
      read_plan->is_ror= tree->ror_scans_map.is_set(idx);
      read_plan->read_cost= read_time;
4496 4497 4498 4499
      DBUG_PRINT("info",
                 ("Returning range plan for key %s, cost %g, records %lu",
                  param->table->key_info[param->real_keynr[idx]].name,
                  read_plan->read_cost, (ulong) read_plan->records));
4500 4501 4502 4503 4504 4505 4506 4507 4508
    }
  }
  else
    DBUG_PRINT("info", ("No 'range' table read plan found"));

  DBUG_RETURN(read_plan);
}


4509
QUICK_SELECT_I *TRP_INDEX_MERGE::make_quick(PARAM *param,
4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520
                                            bool retrieve_full_rows,
                                            MEM_ROOT *parent_alloc)
{
  QUICK_INDEX_MERGE_SELECT *quick_imerge;
  QUICK_RANGE_SELECT *quick;
  /* index_merge always retrieves full rows, ignore retrieve_full_rows */
  if (!(quick_imerge= new QUICK_INDEX_MERGE_SELECT(param->thd, param->table)))
    return NULL;

  quick_imerge->records= records;
  quick_imerge->read_time= read_cost;
4521 4522
  for (TRP_RANGE **range_scan= range_scans; range_scan != range_scans_end;
       range_scan++)
4523 4524
  {
    if (!(quick= (QUICK_RANGE_SELECT*)
4525
          ((*range_scan)->make_quick(param, FALSE, &quick_imerge->alloc)))||
4526 4527 4528 4529 4530 4531 4532 4533 4534 4535
        quick_imerge->push_quick_back(quick))
    {
      delete quick;
      delete quick_imerge;
      return NULL;
    }
  }
  return quick_imerge;
}

4536
QUICK_SELECT_I *TRP_ROR_INTERSECT::make_quick(PARAM *param,
4537 4538 4539 4540 4541 4542 4543
                                              bool retrieve_full_rows,
                                              MEM_ROOT *parent_alloc)
{
  QUICK_ROR_INTERSECT_SELECT *quick_intrsect;
  QUICK_RANGE_SELECT *quick;
  DBUG_ENTER("TRP_ROR_INTERSECT::make_quick");
  MEM_ROOT *alloc;
4544 4545

  if ((quick_intrsect=
4546
         new QUICK_ROR_INTERSECT_SELECT(param->thd, param->table,
monty@mysql.com's avatar
monty@mysql.com committed
4547
                                        retrieve_full_rows? (!is_covering):FALSE,
4548 4549
                                        parent_alloc)))
  {
4550
    DBUG_EXECUTE("info", print_ror_scans_arr(param->table,
4551 4552 4553
                                             "creating ROR-intersect",
                                             first_scan, last_scan););
    alloc= parent_alloc? parent_alloc: &quick_intrsect->alloc;
4554
    for (; first_scan != last_scan;++first_scan)
4555 4556 4557 4558
    {
      if (!(quick= get_quick_select(param, (*first_scan)->idx,
                                    (*first_scan)->sel_arg, alloc)) ||
          quick_intrsect->push_quick_back(quick))
4559
      {
4560 4561
        delete quick_intrsect;
        DBUG_RETURN(NULL);
4562 4563
      }
    }
4564 4565 4566 4567
    if (cpk_scan)
    {
      if (!(quick= get_quick_select(param, cpk_scan->idx,
                                    cpk_scan->sel_arg, alloc)))
4568
      {
4569 4570
        delete quick_intrsect;
        DBUG_RETURN(NULL);
4571
      }
4572
      quick->file= NULL; 
4573
      quick_intrsect->cpk_quick= quick;
4574
    }
4575
    quick_intrsect->records= records;
4576
    quick_intrsect->read_time= read_cost;
4577
  }
4578 4579 4580
  DBUG_RETURN(quick_intrsect);
}

4581

4582
QUICK_SELECT_I *TRP_ROR_UNION::make_quick(PARAM *param,
4583 4584 4585 4586 4587 4588 4589
                                          bool retrieve_full_rows,
                                          MEM_ROOT *parent_alloc)
{
  QUICK_ROR_UNION_SELECT *quick_roru;
  TABLE_READ_PLAN **scan;
  QUICK_SELECT_I *quick;
  DBUG_ENTER("TRP_ROR_UNION::make_quick");
4590 4591
  /*
    It is impossible to construct a ROR-union that will not retrieve full
4592
    rows, ignore retrieve_full_rows parameter.
4593 4594 4595
  */
  if ((quick_roru= new QUICK_ROR_UNION_SELECT(param->thd, param->table)))
  {
4596
    for (scan= first_ror; scan != last_ror; scan++)
4597
    {
4598
      if (!(quick= (*scan)->make_quick(param, FALSE, &quick_roru->alloc)) ||
4599 4600 4601 4602 4603
          quick_roru->push_quick_back(quick))
        DBUG_RETURN(NULL);
    }
    quick_roru->records= records;
    quick_roru->read_time= read_cost;
4604
  }
4605
  DBUG_RETURN(quick_roru);
4606 4607
}

4608

4609
/*
monty@mysql.com's avatar
monty@mysql.com committed
4610
  Build a SEL_TREE for <> or NOT BETWEEN predicate
4611 4612 4613 4614 4615 4616
 
  SYNOPSIS
    get_ne_mm_tree()
      param       PARAM from SQL_SELECT::test_quick_select
      cond_func   item for the predicate
      field       field in the predicate
monty@mysql.com's avatar
monty@mysql.com committed
4617 4618
      lt_value    constant that field should be smaller
      gt_value    constant that field should be greaterr
4619 4620 4621
      cmp_type    compare type for the field

  RETURN 
monty@mysql.com's avatar
monty@mysql.com committed
4622 4623
    #  Pointer to tree built tree
    0  on error
4624 4625
*/

4626
static SEL_TREE *get_ne_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, 
monty@mysql.com's avatar
monty@mysql.com committed
4627 4628
                                Field *field,
                                Item *lt_value, Item *gt_value,
4629 4630
                                Item_result cmp_type)
{
monty@mysql.com's avatar
monty@mysql.com committed
4631
  SEL_TREE *tree;
4632
  tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
monty@mysql.com's avatar
monty@mysql.com committed
4633
                     lt_value, cmp_type);
4634 4635 4636 4637
  if (tree)
  {
    tree= tree_or(param, tree, get_mm_parts(param, cond_func, field,
					    Item_func::GT_FUNC,
monty@mysql.com's avatar
monty@mysql.com committed
4638
					    gt_value, cmp_type));
4639 4640 4641 4642 4643
  }
  return tree;
}
   

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4644 4645 4646 4647 4648 4649 4650 4651 4652 4653
/*
  Build a SEL_TREE for a simple predicate
 
  SYNOPSIS
    get_func_mm_tree()
      param       PARAM from SQL_SELECT::test_quick_select
      cond_func   item for the predicate
      field       field in the predicate
      value       constant in the predicate
      cmp_type    compare type for the field
4654
      inv         TRUE <> NOT cond_func is considered
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4655
                  (makes sense only when cond_func is BETWEEN or IN) 
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4656 4657

  RETURN 
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4658
    Pointer to the tree built tree
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4659 4660
*/

4661
static SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Item_func *cond_func, 
4662
                                  Field *field, Item *value,
4663
                                  Item_result cmp_type, bool inv)
4664 4665 4666 4667
{
  SEL_TREE *tree= 0;
  DBUG_ENTER("get_func_mm_tree");

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4668
  switch (cond_func->functype()) {
4669

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4670
  case Item_func::NE_FUNC:
monty@mysql.com's avatar
monty@mysql.com committed
4671
    tree= get_ne_mm_tree(param, cond_func, field, value, value, cmp_type);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4672
    break;
4673

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4674
  case Item_func::BETWEEN:
4675
    if (inv)
4676
    {
monty@mysql.com's avatar
monty@mysql.com committed
4677 4678
      tree= get_ne_mm_tree(param, cond_func, field, cond_func->arguments()[1],
                           cond_func->arguments()[2], cmp_type);
4679 4680
    }
    else
4681
    {
4682 4683 4684 4685 4686 4687 4688 4689 4690
      tree= get_mm_parts(param, cond_func, field, Item_func::GE_FUNC,
		         cond_func->arguments()[1],cmp_type);
      if (tree)
      {
        tree= tree_and(param, tree, get_mm_parts(param, cond_func, field,
					         Item_func::LE_FUNC,
					         cond_func->arguments()[2],
                                                 cmp_type));
      }
4691
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4692
    break;
4693

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4694
  case Item_func::IN_FUNC:
4695 4696
  {
    Item_func_in *func=(Item_func_in*) cond_func;
4697 4698

    if (inv)
4699
    {
4700
      tree= get_ne_mm_tree(param, cond_func, field,
monty@mysql.com's avatar
monty@mysql.com committed
4701 4702
                           func->arguments()[1], func->arguments()[1],
                           cmp_type);
4703
      if (tree)
4704
      {
4705 4706 4707 4708 4709
        Item **arg, **end;
        for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
             arg < end ; arg++)
        {
          tree=  tree_and(param, tree, get_ne_mm_tree(param, cond_func, field, 
monty@mysql.com's avatar
monty@mysql.com committed
4710
                                                      *arg, *arg, cmp_type));
4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727
        }
      }
    }
    else
    {    
      tree= get_mm_parts(param, cond_func, field, Item_func::EQ_FUNC,
                         func->arguments()[1], cmp_type);
      if (tree)
      {
        Item **arg, **end;
        for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
             arg < end ; arg++)
        {
          tree= tree_or(param, tree, get_mm_parts(param, cond_func, field, 
                                                  Item_func::EQ_FUNC,
                                                  *arg, cmp_type));
        }
4728 4729
      }
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4730
    break;
4731
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4732
  default: 
4733
  {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4734 4735 4736 4737 4738 4739 4740
    /* 
       Here the function for the following predicates are processed:
       <, <=, =, >=, >, LIKE, IS NULL, IS NOT NULL.
       If the predicate is of the form (value op field) it is handled
       as the equivalent predicate (field rev_op value), e.g.
       2 <= a is handled as a >= 2.
    */
4741 4742 4743
    Item_func::Functype func_type=
      (value != cond_func->arguments()[0]) ? cond_func->functype() :
        ((Item_bool_func2*) cond_func)->rev_functype();
4744
    tree= get_mm_parts(param, cond_func, field, func_type, value, cmp_type);
4745
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4746 4747
  }

4748
  DBUG_RETURN(tree);
4749

4750 4751
}

bk@work.mysql.com's avatar
bk@work.mysql.com committed
4752 4753
	/* make a select tree of all keys in condition */

4754
static SEL_TREE *get_mm_tree(RANGE_OPT_PARAM *param,COND *cond)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4755 4756
{
  SEL_TREE *tree=0;
4757 4758
  SEL_TREE *ftree= 0;
  Item_field *field_item= 0;
4759
  bool inv= FALSE;
4760
  Item *value;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773
  DBUG_ENTER("get_mm_tree");

  if (cond->type() == Item::COND_ITEM)
  {
    List_iterator<Item> li(*((Item_cond*) cond)->argument_list());

    if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
    {
      tree=0;
      Item *item;
      while ((item=li++))
      {
	SEL_TREE *new_tree=get_mm_tree(param,item);
4774
	if (param->thd->is_fatal_error)
4775
	  DBUG_RETURN(0);	// out of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790
	tree=tree_and(param,tree,new_tree);
	if (tree && tree->type == SEL_TREE::IMPOSSIBLE)
	  break;
      }
    }
    else
    {						// COND OR
      tree=get_mm_tree(param,li++);
      if (tree)
      {
	Item *item;
	while ((item=li++))
	{
	  SEL_TREE *new_tree=get_mm_tree(param,item);
	  if (!new_tree)
4791
	    DBUG_RETURN(0);	// out of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802
	  tree=tree_or(param,tree,new_tree);
	  if (!tree || tree->type == SEL_TREE::ALWAYS)
	    break;
	}
      }
    }
    DBUG_RETURN(tree);
  }
  /* Here when simple cond */
  if (cond->const_item())
  {
4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814
    /*
      During the cond->val_int() evaluation we can come across a subselect 
      item which may allocate memory on the thd->mem_root and assumes 
      all the memory allocated has the same life span as the subselect 
      item itself. So we have to restore the thread's mem_root here.
    */
    MEM_ROOT *tmp_root= param->mem_root;
    param->thd->mem_root= param->old_root;
    tree= cond->val_int() ? new(tmp_root) SEL_TREE(SEL_TREE::ALWAYS) :
                            new(tmp_root) SEL_TREE(SEL_TREE::IMPOSSIBLE);
    param->thd->mem_root= tmp_root;
    DBUG_RETURN(tree);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4815
  }
4816

4817 4818 4819
  table_map ref_tables= 0;
  table_map param_comp= ~(param->prev_tables | param->read_tables |
		          param->current_table);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4820 4821
  if (cond->type() != Item::FUNC_ITEM)
  {						// Should be a field
4822
    ref_tables= cond->used_tables();
4823 4824
    if ((ref_tables & param->current_table) ||
	(ref_tables & ~(param->prev_tables | param->read_tables)))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4825 4826 4827
      DBUG_RETURN(0);
    DBUG_RETURN(new SEL_TREE(SEL_TREE::MAYBE));
  }
4828

bk@work.mysql.com's avatar
bk@work.mysql.com committed
4829
  Item_func *cond_func= (Item_func*) cond;
4830 4831 4832
  if (cond_func->functype() == Item_func::BETWEEN ||
      cond_func->functype() == Item_func::IN_FUNC)
    inv= ((Item_func_opt_neg *) cond_func)->negated;
4833
  else if (cond_func->select_optimize() == Item_func::OPTIMIZE_NONE)
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4834
    DBUG_RETURN(0);			       
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4835

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4836 4837
  param->cond= cond;

igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4838 4839
  switch (cond_func->functype()) {
  case Item_func::BETWEEN:
4840
    if (cond_func->arguments()[0]->real_item()->type() != Item::FIELD_ITEM)
4841
      DBUG_RETURN(0);
4842
    field_item= (Item_field*) (cond_func->arguments()[0]->real_item());
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4843 4844 4845
    value= NULL;
    break;
  case Item_func::IN_FUNC:
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4846 4847
  {
    Item_func_in *func=(Item_func_in*) cond_func;
4848
    if (func->key_item()->real_item()->type() != Item::FIELD_ITEM)
4849
      DBUG_RETURN(0);
4850
    field_item= (Item_field*) (func->key_item()->real_item());
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4851 4852
    value= NULL;
    break;
4853
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4854
  case Item_func::MULT_EQUAL_FUNC:
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4855
  {
4856 4857
    Item_equal *item_equal= (Item_equal *) cond;    
    if (!(value= item_equal->get_const()))
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4858 4859 4860 4861
      DBUG_RETURN(0);
    Item_equal_iterator it(*item_equal);
    ref_tables= value->used_tables();
    while ((field_item= it++))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4862
    {
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4863 4864 4865
      Field *field= field_item->field;
      Item_result cmp_type= field->cmp_type();
      if (!((ref_tables | field->table->map) & param_comp))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4866
      {
4867
        tree= get_mm_parts(param, cond, field, Item_func::EQ_FUNC,
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4868 4869
		           value,cmp_type);
        ftree= !ftree ? tree : tree_and(param, ftree, tree);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4870 4871
      }
    }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
4872
    
4873
    DBUG_RETURN(ftree);
4874 4875
  }
  default:
4876
    if (cond_func->arguments()[0]->real_item()->type() == Item::FIELD_ITEM)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4877
    {
4878
      field_item= (Item_field*) (cond_func->arguments()[0]->real_item());
4879
      value= cond_func->arg_count > 1 ? cond_func->arguments()[1] : 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4880
    }
4881
    else if (cond_func->have_rev_func() &&
4882 4883
             cond_func->arguments()[1]->real_item()->type() ==
                                                            Item::FIELD_ITEM)
4884
    {
4885
      field_item= (Item_field*) (cond_func->arguments()[1]->real_item());
4886 4887 4888 4889
      value= cond_func->arguments()[0];
    }
    else
      DBUG_RETURN(0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4890
  }
4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905

  /* 
     If the where condition contains a predicate (ti.field op const),
     then not only SELL_TREE for this predicate is built, but
     the trees for the results of substitution of ti.field for
     each tj.field belonging to the same multiple equality as ti.field
     are built as well.
     E.g. for WHERE t1.a=t2.a AND t2.a > 10 
     a SEL_TREE for t2.a > 10 will be built for quick select from t2
     and   
     a SEL_TREE for t1.a > 10 will be built for quick select from t1.
  */
     
  for (uint i= 0; i < cond_func->arg_count; i++)
  {
4906
    Item *arg= cond_func->arguments()[i]->real_item();
4907 4908 4909 4910 4911 4912
    if (arg != field_item)
      ref_tables|= arg->used_tables();
  }
  Field *field= field_item->field;
  Item_result cmp_type= field->cmp_type();
  if (!((ref_tables | field->table->map) & param_comp))
4913
    ftree= get_func_mm_tree(param, cond_func, field, value, cmp_type, inv);
4914 4915 4916 4917 4918 4919
  Item_equal *item_equal= field_item->item_equal;
  if (item_equal)
  {
    Item_equal_iterator it(*item_equal);
    Item_field *item;
    while ((item= it++))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4920
    {
4921 4922 4923 4924
      Field *f= item->field;
      if (field->eq(f))
        continue;
      if (!((ref_tables | f->table->map) & param_comp))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4925
      {
4926
        tree= get_func_mm_tree(param, cond_func, f, value, cmp_type, inv);
4927
        ftree= !ftree ? tree : tree_and(param, ftree, tree);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4928 4929 4930
      }
    }
  }
4931
  DBUG_RETURN(ftree);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4932 4933 4934 4935
}


static SEL_TREE *
4936
get_mm_parts(RANGE_OPT_PARAM *param, COND *cond_func, Field *field,
4937
	     Item_func::Functype type,
4938
	     Item *value, Item_result cmp_type)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4939 4940 4941 4942 4943
{
  DBUG_ENTER("get_mm_parts");
  if (field->table != param->table)
    DBUG_RETURN(0);

4944 4945
  KEY_PART *key_part = param->key_parts;
  KEY_PART *end = param->key_parts_end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4946 4947 4948 4949
  SEL_TREE *tree=0;
  if (value &&
      value->used_tables() & ~(param->prev_tables | param->read_tables))
    DBUG_RETURN(0);
4950
  for (; key_part != end ; key_part++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4951 4952 4953 4954
  {
    if (field->eq(key_part->field))
    {
      SEL_ARG *sel_arg=0;
4955
      if (!tree && !(tree=new SEL_TREE()))
4956
	DBUG_RETURN(0);				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4957 4958
      if (!value || !(value->used_tables() & ~param->read_tables))
      {
4959 4960
	sel_arg=get_mm_leaf(param,cond_func,
			    key_part->field,key_part,type,value);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4961 4962 4963 4964 4965 4966 4967 4968
	if (!sel_arg)
	  continue;
	if (sel_arg->type == SEL_ARG::IMPOSSIBLE)
	{
	  tree->type=SEL_TREE::IMPOSSIBLE;
	  DBUG_RETURN(tree);
	}
      }
4969 4970
      else
      {
4971
	// This key may be used later
4972
	if (!(sel_arg= new SEL_ARG(SEL_ARG::MAYBE_KEY)))
4973
	  DBUG_RETURN(0);			// OOM
4974
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4975 4976
      sel_arg->part=(uchar) key_part->part;
      tree->keys[key_part->key]=sel_add(tree->keys[key_part->key],sel_arg);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
4977
      tree->keys_map.set_bit(key_part->key);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4978 4979
    }
  }
4980

bk@work.mysql.com's avatar
bk@work.mysql.com committed
4981 4982 4983 4984 4985
  DBUG_RETURN(tree);
}


static SEL_ARG *
4986
get_mm_leaf(RANGE_OPT_PARAM *param, COND *conf_func, Field *field, KEY_PART *key_part,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4987 4988
	    Item_func::Functype type,Item *value)
{
4989
  uint maybe_null=(uint) field->real_maybe_null();
4990
  bool optimize_range;
4991 4992
  SEL_ARG *tree= 0;
  MEM_ROOT *alloc= param->mem_root;
4993
  char *str;
evgen@moonbone.local's avatar
evgen@moonbone.local committed
4994
  ulong orig_sql_mode;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4995 4996
  DBUG_ENTER("get_mm_leaf");

4997 4998
  /*
    We need to restore the runtime mem_root of the thread in this
konstantin@mysql.com's avatar
konstantin@mysql.com committed
4999
    function because it evaluates the value of its argument, while
5000 5001 5002 5003 5004 5005
    the argument can be any, e.g. a subselect. The subselect
    items, in turn, assume that all the memory allocated during
    the evaluation has the same life span as the item itself.
    TODO: opt_range.cc should not reset thd->mem_root at all.
  */
  param->thd->mem_root= param->old_root;
5006 5007
  if (!value)					// IS NULL or IS NOT NULL
  {
5008
    if (field->table->maybe_null)		// Can't use a key on this
5009
      goto end;
5010
    if (!maybe_null)				// Not null field
5011 5012 5013 5014 5015 5016 5017
    {
      if (type == Item_func::ISNULL_FUNC)
        tree= &null_element;
      goto end;
    }
    if (!(tree= new (alloc) SEL_ARG(field,is_null_string,is_null_string)))
      goto end;                                 // out of memory
5018 5019 5020 5021 5022
    if (type == Item_func::ISNOTNULL_FUNC)
    {
      tree->min_flag=NEAR_MIN;		    /* IS NOT NULL ->  X > NULL */
      tree->max_flag=NO_MAX_RANGE;
    }
5023
    goto end;
5024 5025 5026
  }

  /*
5027 5028 5029 5030 5031 5032 5033 5034 5035 5036
    1. Usually we can't use an index if the column collation
       differ from the operation collation.

    2. However, we can reuse a case insensitive index for
       the binary searches:

       WHERE latin1_swedish_ci_column = 'a' COLLATE lati1_bin;

       WHERE latin1_swedish_ci_colimn = BINARY 'a '

5037 5038 5039 5040
  */
  if (field->result_type() == STRING_RESULT &&
      value->result_type() == STRING_RESULT &&
      key_part->image_type == Field::itRAW &&
5041 5042
      ((Field_str*)field)->charset() != conf_func->compare_collation() &&
      !(conf_func->compare_collation()->state & MY_CS_BINSORT))
5043
    goto end;
5044

5045 5046 5047 5048 5049
  if (param->using_real_indexes)
    optimize_range= field->optimize_range(param->real_keynr[key_part->key],
                                          key_part->part);
  else
    optimize_range= TRUE;
5050

bk@work.mysql.com's avatar
bk@work.mysql.com committed
5051 5052 5053 5054
  if (type == Item_func::LIKE_FUNC)
  {
    bool like_error;
    char buff1[MAX_FIELD_WIDTH],*min_str,*max_str;
5055
    String tmp(buff1,sizeof(buff1),value->collation.collation),*res;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5056
    uint length,offset,min_length,max_length;
5057
    uint field_length= field->pack_length()+maybe_null;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5058

5059
    if (!optimize_range)
5060
      goto end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5061
    if (!(res= value->val_str(&tmp)))
5062 5063 5064 5065
    {
      tree= &null_element;
      goto end;
    }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5066

5067 5068 5069 5070 5071
    /*
      TODO:
      Check if this was a function. This should have be optimized away
      in the sql_select.cc
    */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5072 5073 5074 5075 5076 5077
    if (res != &tmp)
    {
      tmp.copy(*res);				// Get own copy
      res= &tmp;
    }
    if (field->cmp_type() != STRING_RESULT)
5078
      goto end;                                 // Can only optimize strings
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5079 5080

    offset=maybe_null;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5081 5082 5083
    length=key_part->store_length;

    if (length != key_part->length  + maybe_null)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5084
    {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5085 5086 5087
      /* key packed with length prefix */
      offset+= HA_KEY_BLOB_LENGTH;
      field_length= length - HA_KEY_BLOB_LENGTH;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5088 5089 5090
    }
    else
    {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5091 5092 5093 5094 5095 5096 5097 5098
      if (unlikely(length < field_length))
      {
	/*
	  This can only happen in a table created with UNIREG where one key
	  overlaps many fields
	*/
	length= field_length;
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5099
      else
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5100
	field_length= length;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5101 5102
    }
    length+=offset;
5103 5104
    if (!(min_str= (char*) alloc_root(alloc, length*2)))
      goto end;
5105

bk@work.mysql.com's avatar
bk@work.mysql.com committed
5106 5107 5108
    max_str=min_str+length;
    if (maybe_null)
      max_str[0]= min_str[0]=0;
5109

5110
    field_length-= maybe_null;
5111
    like_error= my_like_range(field->charset(),
monty@mysql.com's avatar
monty@mysql.com committed
5112
			      res->ptr(), res->length(),
monty@mysql.com's avatar
monty@mysql.com committed
5113 5114
			      ((Item_func_like*)(param->cond))->escape,
			      wild_one, wild_many,
5115
			      field_length,
monty@mysql.com's avatar
monty@mysql.com committed
5116 5117
			      min_str+offset, max_str+offset,
			      &min_length, &max_length);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5118
    if (like_error)				// Can't optimize with LIKE
5119
      goto end;
monty@mysql.com's avatar
monty@mysql.com committed
5120

5121
    if (offset != maybe_null)			// BLOB or VARCHAR
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5122 5123 5124 5125
    {
      int2store(min_str+maybe_null,min_length);
      int2store(max_str+maybe_null,max_length);
    }
5126 5127
    tree= new (alloc) SEL_ARG(field, min_str, max_str);
    goto end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5128 5129
  }

5130
  if (!optimize_range &&
5131
      type != Item_func::EQ_FUNC &&
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5132
      type != Item_func::EQUAL_FUNC)
5133
    goto end;                                   // Can't optimize this
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5134

5135 5136 5137 5138
  /*
    We can't always use indexes when comparing a string index to a number
    cmp_type() is checked to allow compare of dates to numbers
  */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5139 5140 5141
  if (field->result_type() == STRING_RESULT &&
      value->result_type() != STRING_RESULT &&
      field->cmp_type() != value->result_type())
5142
    goto end;
5143
  /* For comparison purposes allow invalid dates like 2000-01-32 */
evgen@moonbone.local's avatar
evgen@moonbone.local committed
5144
  orig_sql_mode= field->table->in_use->variables.sql_mode;
5145 5146 5147 5148
  if (value->real_item()->type() == Item::STRING_ITEM &&
      (field->type() == FIELD_TYPE_DATE ||
       field->type() == FIELD_TYPE_DATETIME))
    field->table->in_use->variables.sql_mode|= MODE_INVALID_DATES;
5149
  if (value->save_in_field_no_warnings(field, 1) < 0)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5150
  {
5151
    field->table->in_use->variables.sql_mode= orig_sql_mode;
5152
    /* This happens when we try to insert a NULL field in a not null column */
5153 5154
    tree= &null_element;                        // cmp with NULL is never TRUE
    goto end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5155
  }
5156
  field->table->in_use->variables.sql_mode= orig_sql_mode;
5157
  str= (char*) alloc_root(alloc, key_part->store_length+1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5158
  if (!str)
5159
    goto end;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5160
  if (maybe_null)
5161
    *str= (char) field->is_real_null();		// Set to 1 if null
5162
  field->get_key_image(str+maybe_null, key_part->length, key_part->image_type);
5163 5164
  if (!(tree= new (alloc) SEL_ARG(field, str, str)))
    goto end;                                   // out of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5165

timour@mysql.com's avatar
timour@mysql.com committed
5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176
  /*
    Check if we are comparing an UNSIGNED integer with a negative constant.
    In this case we know that:
    (a) (unsigned_int [< | <=] negative_constant) == FALSE
    (b) (unsigned_int [> | >=] negative_constant) == TRUE
    In case (a) the condition is false for all values, and in case (b) it
    is true for all values, so we can avoid unnecessary retrieval and condition
    testing, and we also get correct comparison of unsinged integers with
    negative integers (which otherwise fails because at query execution time
    negative integers are cast to unsigned if compared with unsigned).
   */
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
5177 5178
  if (field->result_type() == INT_RESULT &&
      value->result_type() == INT_RESULT &&
timour@mysql.com's avatar
timour@mysql.com committed
5179 5180 5181 5182 5183 5184 5185 5186
      ((Field_num*)field)->unsigned_flag && !((Item_int*)value)->unsigned_flag)
  {
    longlong item_val= value->val_int();
    if (item_val < 0)
    {
      if (type == Item_func::LT_FUNC || type == Item_func::LE_FUNC)
      {
        tree->type= SEL_ARG::IMPOSSIBLE;
5187
        goto end;
timour@mysql.com's avatar
timour@mysql.com committed
5188 5189
      }
      if (type == Item_func::GT_FUNC || type == Item_func::GE_FUNC)
5190 5191 5192 5193
      {
        tree= 0;
        goto end;
      }
timour@mysql.com's avatar
timour@mysql.com committed
5194 5195
    }
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217

  switch (type) {
  case Item_func::LT_FUNC:
    if (field_is_equal_to_item(field,value))
      tree->max_flag=NEAR_MAX;
    /* fall through */
  case Item_func::LE_FUNC:
    if (!maybe_null)
      tree->min_flag=NO_MIN_RANGE;		/* From start */
    else
    {						// > NULL
      tree->min_value=is_null_string;
      tree->min_flag=NEAR_MIN;
    }
    break;
  case Item_func::GT_FUNC:
    if (field_is_equal_to_item(field,value))
      tree->min_flag=NEAR_MIN;
    /* fall through */
  case Item_func::GE_FUNC:
    tree->max_flag=NO_MAX_RANGE;
    break;
5218
  case Item_func::SP_EQUALS_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5219 5220 5221
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_EQUAL;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5222
  case Item_func::SP_DISJOINT_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5223 5224 5225
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_DISJOINT;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5226
  case Item_func::SP_INTERSECTS_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5227 5228 5229
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5230
  case Item_func::SP_TOUCHES_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5231 5232 5233
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5234 5235

  case Item_func::SP_CROSSES_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5236 5237 5238
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5239
  case Item_func::SP_WITHIN_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5240 5241 5242
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_WITHIN;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5243 5244

  case Item_func::SP_CONTAINS_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5245 5246 5247
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_CONTAIN;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5248
  case Item_func::SP_OVERLAPS_FUNC:
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5249 5250 5251
    tree->min_flag=GEOM_FLAG | HA_READ_MBR_INTERSECT;// NEAR_MIN;//512;
    tree->max_flag=NO_MAX_RANGE;
    break;
5252

bk@work.mysql.com's avatar
bk@work.mysql.com committed
5253 5254 5255
  default:
    break;
  }
5256 5257 5258

end:
  param->thd->mem_root= alloc;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5259 5260 5261 5262 5263 5264 5265 5266 5267
  DBUG_RETURN(tree);
}


/******************************************************************************
** Tree manipulation functions
** If tree is 0 it means that the condition can't be tested. It refers
** to a non existent table or to a field in current table with isn't a key.
** The different tree flags:
monty@mysql.com's avatar
monty@mysql.com committed
5268 5269
** IMPOSSIBLE:	 Condition is never TRUE
** ALWAYS:	 Condition is always TRUE
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5270 5271 5272 5273 5274 5275
** MAYBE:	 Condition may exists when tables are read
** MAYBE_KEY:	 Condition refers to a key that may be used in join loop
** KEY_RANGE:	 Condition uses a key
******************************************************************************/

/*
5276 5277
  Add a new key test to a key when scanning through all keys
  This will never be called for same key parts.
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315
*/

static SEL_ARG *
sel_add(SEL_ARG *key1,SEL_ARG *key2)
{
  SEL_ARG *root,**key_link;

  if (!key1)
    return key2;
  if (!key2)
    return key1;

  key_link= &root;
  while (key1 && key2)
  {
    if (key1->part < key2->part)
    {
      *key_link= key1;
      key_link= &key1->next_key_part;
      key1=key1->next_key_part;
    }
    else
    {
      *key_link= key2;
      key_link= &key2->next_key_part;
      key2=key2->next_key_part;
    }
  }
  *key_link=key1 ? key1 : key2;
  return root;
}

#define CLONE_KEY1_MAYBE 1
#define CLONE_KEY2_MAYBE 2
#define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1)


static SEL_TREE *
5316
tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338
{
  DBUG_ENTER("tree_and");
  if (!tree1)
    DBUG_RETURN(tree2);
  if (!tree2)
    DBUG_RETURN(tree1);
  if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
    DBUG_RETURN(tree1);
  if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
    DBUG_RETURN(tree2);
  if (tree1->type == SEL_TREE::MAYBE)
  {
    if (tree2->type == SEL_TREE::KEY)
      tree2->type=SEL_TREE::KEY_SMALLER;
    DBUG_RETURN(tree2);
  }
  if (tree2->type == SEL_TREE::MAYBE)
  {
    tree1->type=SEL_TREE::KEY_SMALLER;
    DBUG_RETURN(tree1);
  }

sergefp@mysql.com's avatar
sergefp@mysql.com committed
5339 5340
  key_map  result_keys;
  result_keys.clear_all();
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353
  /* Join the trees key per key */
  SEL_ARG **key1,**key2,**end;
  for (key1= tree1->keys,key2= tree2->keys,end=key1+param->keys ;
       key1 != end ; key1++,key2++)
  {
    uint flag=0;
    if (*key1 || *key2)
    {
      if (*key1 && !(*key1)->simple_key())
	flag|=CLONE_KEY1_MAYBE;
      if (*key2 && !(*key2)->simple_key())
	flag|=CLONE_KEY2_MAYBE;
      *key1=key_and(*key1,*key2,flag);
5354
      if (*key1 && (*key1)->type == SEL_ARG::IMPOSSIBLE)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5355 5356
      {
	tree1->type= SEL_TREE::IMPOSSIBLE;
5357
        DBUG_RETURN(tree1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5358
      }
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5359
      result_keys.set_bit(key1 - tree1->keys);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5360
#ifdef EXTRA_DEBUG
5361 5362
      if (*key1)
        (*key1)->test_use_count(*key1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5363 5364 5365
#endif
    }
  }
5366 5367
  tree1->keys_map= result_keys;
  /* dispose index_merge if there is a "range" option */
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5368
  if (!result_keys.is_clear_all())
5369 5370 5371 5372 5373 5374 5375
  {
    tree1->merges.empty();
    DBUG_RETURN(tree1);
  }

  /* ok, both trees are index_merge trees */
  imerge_list_and_list(&tree1->merges, &tree2->merges);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5376 5377 5378 5379
  DBUG_RETURN(tree1);
}


5380
/*
5381 5382
  Check if two SEL_TREES can be combined into one (i.e. a single key range
  read can be constructed for "cond_of_tree1 OR cond_of_tree2" ) without
5383
  using index_merge.
5384 5385
*/

5386 5387
bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, 
                           RANGE_OPT_PARAM* param)
5388
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5389
  key_map common_keys= tree1->keys_map;
5390
  DBUG_ENTER("sel_trees_can_be_ored");
5391
  common_keys.intersect(tree2->keys_map);
5392

sergefp@mysql.com's avatar
sergefp@mysql.com committed
5393
  if (common_keys.is_clear_all())
monty@mysql.com's avatar
monty@mysql.com committed
5394
    DBUG_RETURN(FALSE);
5395 5396

  /* trees have a common key, check if they refer to same key part */
5397
  SEL_ARG **key1,**key2;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5398
  for (uint key_no=0; key_no < param->keys; key_no++)
5399
  {
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5400
    if (common_keys.is_set(key_no))
5401 5402 5403 5404 5405
    {
      key1= tree1->keys + key_no;
      key2= tree2->keys + key_no;
      if ((*key1)->part == (*key2)->part)
      {
monty@mysql.com's avatar
monty@mysql.com committed
5406
        DBUG_RETURN(TRUE);
5407 5408 5409
      }
    }
  }
monty@mysql.com's avatar
monty@mysql.com committed
5410
  DBUG_RETURN(FALSE);
5411
}
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5412

5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476

/*
  Remove the trees that are not suitable for record retrieval.
  SYNOPSIS
    param  Range analysis parameter
    tree   Tree to be processed, tree->type is KEY or KEY_SMALLER
 
  DESCRIPTION
    This function walks through tree->keys[] and removes the SEL_ARG* trees
    that are not "maybe" trees (*) and cannot be used to construct quick range
    selects.
    (*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
          these types here as well.

    A SEL_ARG* tree cannot be used to construct quick select if it has
    tree->part != 0. (e.g. it could represent "keypart2 < const").

    WHY THIS FUNCTION IS NEEDED
    
    Normally we allow construction of SEL_TREE objects that have SEL_ARG
    trees that do not allow quick range select construction. For example for
    " keypart1=1 AND keypart2=2 " the execution will proceed as follows:
    tree1= SEL_TREE { SEL_ARG{keypart1=1} }
    tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
                                               from this
    call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
                                   tree.
    
    There is an exception though: when we construct index_merge SEL_TREE,
    any SEL_ARG* tree that cannot be used to construct quick range select can
    be removed, because current range analysis code doesn't provide any way
    that tree could be later combined with another tree.
    Consider an example: we should not construct
    st1 = SEL_TREE { 
      merges = SEL_IMERGE { 
                            SEL_TREE(t.key1part1 = 1), 
                            SEL_TREE(t.key2part2 = 2)   -- (*)
                          } 
                   };
    because 
     - (*) cannot be used to construct quick range select, 
     - There is no execution path that would cause (*) to be converted to 
       a tree that could be used.

    The latter is easy to verify: first, notice that the only way to convert
    (*) into a usable tree is to call tree_and(something, (*)).

    Second look at what tree_and/tree_or function would do when passed a
    SEL_TREE that has the structure like st1 tree has, and conlcude that 
    tree_and(something, (*)) will not be called.

  RETURN
    0  Ok, some suitable trees left
    1  No tree->keys[] left.
*/

static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree)
{
  bool res= FALSE;
  for (uint i=0; i < param->keys; i++)
  {
    if (tree->keys[i])
    {
      if (tree->keys[i]->part)
5477
      {
5478
        tree->keys[i]= NULL;
5479 5480
        tree->keys_map.clear_bit(i);
      }
5481 5482 5483 5484
      else
        res= TRUE;
    }
  }
5485
  return !res;
5486 5487 5488
}


bk@work.mysql.com's avatar
bk@work.mysql.com committed
5489
static SEL_TREE *
5490
tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503
{
  DBUG_ENTER("tree_or");
  if (!tree1 || !tree2)
    DBUG_RETURN(0);
  if (tree1->type == SEL_TREE::IMPOSSIBLE || tree2->type == SEL_TREE::ALWAYS)
    DBUG_RETURN(tree2);
  if (tree2->type == SEL_TREE::IMPOSSIBLE || tree1->type == SEL_TREE::ALWAYS)
    DBUG_RETURN(tree1);
  if (tree1->type == SEL_TREE::MAYBE)
    DBUG_RETURN(tree1);				// Can't use this
  if (tree2->type == SEL_TREE::MAYBE)
    DBUG_RETURN(tree2);

5504
  SEL_TREE *result= 0;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5505 5506
  key_map  result_keys;
  result_keys.clear_all();
5507
  if (sel_trees_can_be_ored(tree1, tree2, param))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5508
  {
5509 5510 5511 5512
    /* Join the trees key per key */
    SEL_ARG **key1,**key2,**end;
    for (key1= tree1->keys,key2= tree2->keys,end= key1+param->keys ;
         key1 != end ; key1++,key2++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5513
    {
5514 5515 5516 5517
      *key1=key_or(*key1,*key2);
      if (*key1)
      {
        result=tree1;				// Added to tree1
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5518
        result_keys.set_bit(key1 - tree1->keys);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5519
#ifdef EXTRA_DEBUG
5520
        (*key1)->test_use_count(*key1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5521
#endif
5522 5523 5524 5525 5526 5527 5528 5529 5530 5531
      }
    }
    if (result)
      result->keys_map= result_keys;
  }
  else
  {
    /* ok, two trees have KEY type but cannot be used without index merge */
    if (tree1->merges.is_empty() && tree2->merges.is_empty())
    {
5532 5533 5534 5535 5536 5537 5538
      if (param->remove_jump_scans)
      {
        bool no_trees= remove_nonrange_trees(param, tree1);
        no_trees= no_trees || remove_nonrange_trees(param, tree2);
        if (no_trees)
          DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
      }
5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559
      SEL_IMERGE *merge;
      /* both trees are "range" trees, produce new index merge structure */
      if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) ||
          (result->merges.push_back(merge)) ||
          (merge->or_sel_tree(param, tree1)) ||
          (merge->or_sel_tree(param, tree2)))
        result= NULL;
      else
        result->type= tree1->type;
    }
    else if (!tree1->merges.is_empty() && !tree2->merges.is_empty())
    {
      if (imerge_list_or_list(param, &tree1->merges, &tree2->merges))
        result= new SEL_TREE(SEL_TREE::ALWAYS);
      else
        result= tree1;
    }
    else
    {
      /* one tree is index merge tree and another is range tree */
      if (tree1->merges.is_empty())
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
5560
        swap_variables(SEL_TREE*, tree1, tree2);
5561 5562 5563
      
      if (param->remove_jump_scans && remove_nonrange_trees(param, tree2))
         DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
5564 5565 5566 5567 5568
      /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
      if (imerge_list_or_tree(param, &tree1->merges, tree2))
        result= new SEL_TREE(SEL_TREE::ALWAYS);
      else
        result= tree1;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589
    }
  }
  DBUG_RETURN(result);
}


/* And key trees where key1->part < key2 -> part */

static SEL_ARG *
and_all_keys(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
{
  SEL_ARG *next;
  ulong use_count=key1->use_count;

  if (key1->elements != 1)
  {
    key2->use_count+=key1->elements-1;
    key2->increment_use_count((int) key1->elements-1);
  }
  if (key1->type == SEL_ARG::MAYBE_KEY)
  {
5590 5591
    key1->right= key1->left= &null_element;
    key1->next= key1->prev= 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627
  }
  for (next=key1->first(); next ; next=next->next)
  {
    if (next->next_key_part)
    {
      SEL_ARG *tmp=key_and(next->next_key_part,key2,clone_flag);
      if (tmp && tmp->type == SEL_ARG::IMPOSSIBLE)
      {
	key1=key1->tree_delete(next);
	continue;
      }
      next->next_key_part=tmp;
      if (use_count)
	next->increment_use_count(use_count);
    }
    else
      next->next_key_part=key2;
  }
  if (!key1)
    return &null_element;			// Impossible ranges
  key1->use_count++;
  return key1;
}


static SEL_ARG *
key_and(SEL_ARG *key1,SEL_ARG *key2,uint clone_flag)
{
  if (!key1)
    return key2;
  if (!key2)
    return key1;
  if (key1->part != key2->part)
  {
    if (key1->part > key2->part)
    {
5628
      swap_variables(SEL_ARG *, key1, key2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5629 5630 5631 5632 5633
      clone_flag=swap_clone_flag(clone_flag);
    }
    // key1->part < key2->part
    key1->use_count--;
    if (key1->use_count > 0)
5634 5635
      if (!(key1= key1->clone_tree()))
	return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5636 5637 5638 5639
    return and_all_keys(key1,key2,clone_flag);
  }

  if (((clone_flag & CLONE_KEY2_MAYBE) &&
5640 5641
       !(clone_flag & CLONE_KEY1_MAYBE) &&
       key2->type != SEL_ARG::MAYBE_KEY) ||
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5642 5643
      key1->type == SEL_ARG::MAYBE_KEY)
  {						// Put simple key in key2
5644
    swap_variables(SEL_ARG *, key1, key2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5645 5646 5647
    clone_flag=swap_clone_flag(clone_flag);
  }

5648
  /* If one of the key is MAYBE_KEY then the found region may be smaller */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5649 5650 5651 5652 5653
  if (key2->type == SEL_ARG::MAYBE_KEY)
  {
    if (key1->use_count > 1)
    {
      key1->use_count--;
5654 5655
      if (!(key1=key1->clone_tree()))
	return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669
      key1->use_count++;
    }
    if (key1->type == SEL_ARG::MAYBE_KEY)
    {						// Both are maybe key
      key1->next_key_part=key_and(key1->next_key_part,key2->next_key_part,
				 clone_flag);
      if (key1->next_key_part &&
	  key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
	return key1;
    }
    else
    {
      key1->maybe_smaller();
      if (key2->next_key_part)
5670 5671
      {
	key1->use_count--;			// Incremented in and_all_keys
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5672
	return and_all_keys(key1,key2,clone_flag);
5673
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5674 5675 5676 5677 5678
      key2->use_count--;			// Key2 doesn't have a tree
    }
    return key1;
  }

5679 5680 5681 5682 5683 5684 5685
  if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
  {
    key1->free_tree();
    key2->free_tree();
    return 0;					// Can't optimize this
  }

5686 5687 5688
  if ((key1->min_flag | key2->min_flag) & GEOM_FLAG)
  {
    key1->free_tree();
5689 5690 5691 5692
    key2->free_tree();
    return 0;					// Can't optimize this
  }

bk@work.mysql.com's avatar
bk@work.mysql.com committed
5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712
  key1->use_count--;
  key2->use_count--;
  SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;

  while (e1 && e2)
  {
    int cmp=e1->cmp_min_to_min(e2);
    if (cmp < 0)
    {
      if (get_range(&e1,&e2,key1))
	continue;
    }
    else if (get_range(&e2,&e1,key2))
      continue;
    SEL_ARG *next=key_and(e1->next_key_part,e2->next_key_part,clone_flag);
    e1->increment_use_count(1);
    e2->increment_use_count(1);
    if (!next || next->type != SEL_ARG::IMPOSSIBLE)
    {
      SEL_ARG *new_arg= e1->clone_and(e2);
5713 5714
      if (!new_arg)
	return &null_element;			// End of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765
      new_arg->next_key_part=next;
      if (!new_tree)
      {
	new_tree=new_arg;
      }
      else
	new_tree=new_tree->insert(new_arg);
    }
    if (e1->cmp_max_to_max(e2) < 0)
      e1=e1->next;				// e1 can't overlapp next e2
    else
      e2=e2->next;
  }
  key1->free_tree();
  key2->free_tree();
  if (!new_tree)
    return &null_element;			// Impossible range
  return new_tree;
}


static bool
get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
{
  (*e1)=root1->find_range(*e2);			// first e1->min < e2->min
  if ((*e1)->cmp_max_to_min(*e2) < 0)
  {
    if (!((*e1)=(*e1)->next))
      return 1;
    if ((*e1)->cmp_min_to_max(*e2) > 0)
    {
      (*e2)=(*e2)->next;
      return 1;
    }
  }
  return 0;
}


static SEL_ARG *
key_or(SEL_ARG *key1,SEL_ARG *key2)
{
  if (!key1)
  {
    if (key2)
    {
      key2->use_count--;
      key2->free_tree();
    }
    return 0;
  }
5766
  if (!key2)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5767 5768 5769 5770 5771 5772 5773 5774
  {
    key1->use_count--;
    key1->free_tree();
    return 0;
  }
  key1->use_count--;
  key2->use_count--;

5775 5776
  if (key1->part != key2->part || 
      (key1->min_flag | key2->min_flag) & GEOM_FLAG)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800
  {
    key1->free_tree();
    key2->free_tree();
    return 0;					// Can't optimize this
  }

  // If one of the key is MAYBE_KEY then the found region may be bigger
  if (key1->type == SEL_ARG::MAYBE_KEY)
  {
    key2->free_tree();
    key1->use_count++;
    return key1;
  }
  if (key2->type == SEL_ARG::MAYBE_KEY)
  {
    key1->free_tree();
    key2->use_count++;
    return key2;
  }

  if (key1->use_count > 0)
  {
    if (key2->use_count == 0 || key1->elements > key2->elements)
    {
5801
      swap_variables(SEL_ARG *,key1,key2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5802
    }
5803
    if (key1->use_count > 0 || !(key1=key1->clone_tree()))
5804
      return 0;					// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829
  }

  // Add tree at key2 to tree at key1
  bool key2_shared=key2->use_count != 0;
  key1->maybe_flag|=key2->maybe_flag;

  for (key2=key2->first(); key2; )
  {
    SEL_ARG *tmp=key1->find_range(key2);	// Find key1.min <= key2.min
    int cmp;

    if (!tmp)
    {
      tmp=key1->first();			// tmp.min > key2.min
      cmp= -1;
    }
    else if ((cmp=tmp->cmp_max_to_min(key2)) < 0)
    {						// Found tmp.max < key2.min
      SEL_ARG *next=tmp->next;
      if (cmp == -2 && eq_tree(tmp->next_key_part,key2->next_key_part))
      {
	// Join near ranges like tmp.max < 0 and key2.min >= 0
	SEL_ARG *key2_next=key2->next;
	if (key2_shared)
	{
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5830
	  if (!(key2=new SEL_ARG(*key2)))
5831
	    return 0;		// out of memory
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871
	  key2->increment_use_count(key1->use_count+1);
	  key2->next=key2_next;			// New copy of key2
	}
	key2->copy_min(tmp);
	if (!(key1=key1->tree_delete(tmp)))
	{					// Only one key in tree
	  key1=key2;
	  key1->make_root();
	  key2=key2_next;
	  break;
	}
      }
      if (!(tmp=next))				// tmp.min > key2.min
	break;					// Copy rest of key2
    }
    if (cmp < 0)
    {						// tmp.min > key2.min
      int tmp_cmp;
      if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0) // if tmp.min > key2.max
      {
	if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
	{					// ranges are connected
	  tmp->copy_min_to_min(key2);
	  key1->merge_flags(key2);
	  if (tmp->min_flag & NO_MIN_RANGE &&
	      tmp->max_flag & NO_MAX_RANGE)
	  {
	    if (key1->maybe_flag)
	      return new SEL_ARG(SEL_ARG::MAYBE_KEY);
	    return 0;
	  }
	  key2->increment_use_count(-1);	// Free not used tree
	  key2=key2->next;
	  continue;
	}
	else
	{
	  SEL_ARG *next=key2->next;		// Keys are not overlapping
	  if (key2_shared)
	  {
5872 5873
	    SEL_ARG *cpy= new SEL_ARG(*key2);	// Must make copy
	    if (!cpy)
5874
	      return 0;				// OOM
5875
	    key1=key1->insert(cpy);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903
	    key2->increment_use_count(key1->use_count+1);
	  }
	  else
	    key1=key1->insert(key2);		// Will destroy key2_root
	  key2=next;
	  continue;
	}
      }
    }

    // tmp.max >= key2.min && tmp.min <= key.max  (overlapping ranges)
    if (eq_tree(tmp->next_key_part,key2->next_key_part))
    {
      if (tmp->is_same(key2))
      {
	tmp->merge_flags(key2);			// Copy maybe flags
	key2->increment_use_count(-1);		// Free not used tree
      }
      else
      {
	SEL_ARG *last=tmp;
	while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
	       eq_tree(last->next->next_key_part,key2->next_key_part))
	{
	  SEL_ARG *save=last;
	  last=last->next;
	  key1=key1->tree_delete(save);
	}
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5904
        last->copy_min(tmp);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921
	if (last->copy_min(key2) || last->copy_max(key2))
	{					// Full range
	  key1->free_tree();
	  for (; key2 ; key2=key2->next)
	    key2->increment_use_count(-1);	// Free not used tree
	  if (key1->maybe_flag)
	    return new SEL_ARG(SEL_ARG::MAYBE_KEY);
	  return 0;
	}
      }
      key2=key2->next;
      continue;
    }

    if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0)
    {						// tmp.min <= x < key2.min
      SEL_ARG *new_arg=tmp->clone_first(key2);
5922 5923
      if (!new_arg)
	return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936
      if ((new_arg->next_key_part= key1->next_key_part))
	new_arg->increment_use_count(key1->use_count+1);
      tmp->copy_min_to_min(key2);
      key1=key1->insert(new_arg);
    }

    // tmp.min >= key2.min && tmp.min <= key2.max
    SEL_ARG key(*key2);				// Get copy we can modify
    for (;;)
    {
      if (tmp->cmp_min_to_min(&key) > 0)
      {						// key.min <= x < tmp.min
	SEL_ARG *new_arg=key.clone_first(tmp);
5937 5938
	if (!new_arg)
	  return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952
	if ((new_arg->next_key_part=key.next_key_part))
	  new_arg->increment_use_count(key1->use_count+1);
	key1=key1->insert(new_arg);
      }
      if ((cmp=tmp->cmp_max_to_max(&key)) <= 0)
      {						// tmp.min. <= x <= tmp.max
	tmp->maybe_flag|= key.maybe_flag;
	key.increment_use_count(key1->use_count+1);
	tmp->next_key_part=key_or(tmp->next_key_part,key.next_key_part);
	if (!cmp)				// Key2 is ready
	  break;
	key.copy_max_to_min(tmp);
	if (!(tmp=tmp->next))
	{
5953 5954 5955 5956
	  SEL_ARG *tmp2= new SEL_ARG(key);
	  if (!tmp2)
	    return 0;				// OOM
	  key1=key1->insert(tmp2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5957 5958 5959 5960 5961
	  key2=key2->next;
	  goto end;
	}
	if (tmp->cmp_min_to_max(&key) > 0)
	{
5962 5963 5964 5965
	  SEL_ARG *tmp2= new SEL_ARG(key);
	  if (!tmp2)
	    return 0;				// OOM
	  key1=key1->insert(tmp2);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5966 5967 5968 5969 5970 5971
	  break;
	}
      }
      else
      {
	SEL_ARG *new_arg=tmp->clone_last(&key); // tmp.min <= x <= key.max
5972 5973
	if (!new_arg)
	  return 0;				// OOM
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5974 5975
	tmp->copy_max_to_min(&key);
	tmp->increment_use_count(key1->use_count+1);
5976 5977
	/* Increment key count as it may be used for next loop */
	key.increment_use_count(1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991
	new_arg->next_key_part=key_or(tmp->next_key_part,key.next_key_part);
	key1=key1->insert(new_arg);
	break;
      }
    }
    key2=key2->next;
  }

end:
  while (key2)
  {
    SEL_ARG *next=key2->next;
    if (key2_shared)
    {
5992 5993 5994
      SEL_ARG *tmp=new SEL_ARG(*key2);		// Must make copy
      if (!tmp)
	return 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5995
      key2->increment_use_count(key1->use_count+1);
5996
      key1=key1->insert(tmp);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043
    }
    else
      key1=key1->insert(key2);			// Will destroy key2_root
    key2=next;
  }
  key1->use_count++;
  return key1;
}


/* Compare if two trees are equal */

static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
{
  if (a == b)
    return 1;
  if (!a || !b || !a->is_same(b))
    return 0;
  if (a->left != &null_element && b->left != &null_element)
  {
    if (!eq_tree(a->left,b->left))
      return 0;
  }
  else if (a->left != &null_element || b->left != &null_element)
    return 0;
  if (a->right != &null_element && b->right != &null_element)
  {
    if (!eq_tree(a->right,b->right))
      return 0;
  }
  else if (a->right != &null_element || b->right != &null_element)
    return 0;
  if (a->next_key_part != b->next_key_part)
  {						// Sub range
    if (!a->next_key_part != !b->next_key_part ||
	!eq_tree(a->next_key_part, b->next_key_part))
      return 0;
  }
  return 1;
}


SEL_ARG *
SEL_ARG::insert(SEL_ARG *key)
{
  SEL_ARG *element,**par,*last_element;
  LINT_INIT(par); LINT_INIT(last_element);
6044

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111
  for (element= this; element != &null_element ; )
  {
    last_element=element;
    if (key->cmp_min_to_min(element) > 0)
    {
      par= &element->right; element= element->right;
    }
    else
    {
      par = &element->left; element= element->left;
    }
  }
  *par=key;
  key->parent=last_element;
	/* Link in list */
  if (par == &last_element->left)
  {
    key->next=last_element;
    if ((key->prev=last_element->prev))
      key->prev->next=key;
    last_element->prev=key;
  }
  else
  {
    if ((key->next=last_element->next))
      key->next->prev=key;
    key->prev=last_element;
    last_element->next=key;
  }
  key->left=key->right= &null_element;
  SEL_ARG *root=rb_insert(key);			// rebalance tree
  root->use_count=this->use_count;		// copy root info
  root->elements= this->elements+1;
  root->maybe_flag=this->maybe_flag;
  return root;
}


/*
** Find best key with min <= given key
** Because the call context this should never return 0 to get_range
*/

SEL_ARG *
SEL_ARG::find_range(SEL_ARG *key)
{
  SEL_ARG *element=this,*found=0;

  for (;;)
  {
    if (element == &null_element)
      return found;
    int cmp=element->cmp_min_to_min(key);
    if (cmp == 0)
      return element;
    if (cmp < 0)
    {
      found=element;
      element=element->right;
    }
    else
      element=element->left;
  }
}


/*
6112 6113 6114 6115 6116
  Remove a element from the tree

  SYNOPSIS
    tree_delete()
    key		Key that is to be deleted from tree (this)
6117

6118 6119 6120 6121 6122
  NOTE
    This also frees all sub trees that is used by the element

  RETURN
    root of new tree (with key deleted)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6123 6124 6125 6126 6127 6128 6129
*/

SEL_ARG *
SEL_ARG::tree_delete(SEL_ARG *key)
{
  enum leaf_color remove_color;
  SEL_ARG *root,*nod,**par,*fix_par;
6130 6131 6132 6133
  DBUG_ENTER("tree_delete");

  root=this;
  this->parent= 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179

  /* Unlink from list */
  if (key->prev)
    key->prev->next=key->next;
  if (key->next)
    key->next->prev=key->prev;
  key->increment_use_count(-1);
  if (!key->parent)
    par= &root;
  else
    par=key->parent_ptr();

  if (key->left == &null_element)
  {
    *par=nod=key->right;
    fix_par=key->parent;
    if (nod != &null_element)
      nod->parent=fix_par;
    remove_color= key->color;
  }
  else if (key->right == &null_element)
  {
    *par= nod=key->left;
    nod->parent=fix_par=key->parent;
    remove_color= key->color;
  }
  else
  {
    SEL_ARG *tmp=key->next;			// next bigger key (exist!)
    nod= *tmp->parent_ptr()= tmp->right;	// unlink tmp from tree
    fix_par=tmp->parent;
    if (nod != &null_element)
      nod->parent=fix_par;
    remove_color= tmp->color;

    tmp->parent=key->parent;			// Move node in place of key
    (tmp->left=key->left)->parent=tmp;
    if ((tmp->right=key->right) != &null_element)
      tmp->right->parent=tmp;
    tmp->color=key->color;
    *par=tmp;
    if (fix_par == key)				// key->right == key->next
      fix_par=tmp;				// new parent of nod
  }

  if (root == &null_element)
6180
    DBUG_RETURN(0);				// Maybe root later
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6181 6182 6183 6184 6185 6186 6187
  if (remove_color == BLACK)
    root=rb_delete_fixup(root,nod,fix_par);
  test_rb_tree(root,root->parent);

  root->use_count=this->use_count;		// Fix root counters
  root->elements=this->elements-1;
  root->maybe_flag=this->maybe_flag;
6188
  DBUG_RETURN(root);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363
}


	/* Functions to fix up the tree after insert and delete */

static void left_rotate(SEL_ARG **root,SEL_ARG *leaf)
{
  SEL_ARG *y=leaf->right;
  leaf->right=y->left;
  if (y->left != &null_element)
    y->left->parent=leaf;
  if (!(y->parent=leaf->parent))
    *root=y;
  else
    *leaf->parent_ptr()=y;
  y->left=leaf;
  leaf->parent=y;
}

static void right_rotate(SEL_ARG **root,SEL_ARG *leaf)
{
  SEL_ARG *y=leaf->left;
  leaf->left=y->right;
  if (y->right != &null_element)
    y->right->parent=leaf;
  if (!(y->parent=leaf->parent))
    *root=y;
  else
    *leaf->parent_ptr()=y;
  y->right=leaf;
  leaf->parent=y;
}


SEL_ARG *
SEL_ARG::rb_insert(SEL_ARG *leaf)
{
  SEL_ARG *y,*par,*par2,*root;
  root= this; root->parent= 0;

  leaf->color=RED;
  while (leaf != root && (par= leaf->parent)->color == RED)
  {					// This can't be root or 1 level under
    if (par == (par2= leaf->parent->parent)->left)
    {
      y= par2->right;
      if (y->color == RED)
      {
	par->color=BLACK;
	y->color=BLACK;
	leaf=par2;
	leaf->color=RED;		/* And the loop continues */
      }
      else
      {
	if (leaf == par->right)
	{
	  left_rotate(&root,leaf->parent);
	  par=leaf;			/* leaf is now parent to old leaf */
	}
	par->color=BLACK;
	par2->color=RED;
	right_rotate(&root,par2);
	break;
      }
    }
    else
    {
      y= par2->left;
      if (y->color == RED)
      {
	par->color=BLACK;
	y->color=BLACK;
	leaf=par2;
	leaf->color=RED;		/* And the loop continues */
      }
      else
      {
	if (leaf == par->left)
	{
	  right_rotate(&root,par);
	  par=leaf;
	}
	par->color=BLACK;
	par2->color=RED;
	left_rotate(&root,par2);
	break;
      }
    }
  }
  root->color=BLACK;
  test_rb_tree(root,root->parent);
  return root;
}


SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key,SEL_ARG *par)
{
  SEL_ARG *x,*w;
  root->parent=0;

  x= key;
  while (x != root && x->color == SEL_ARG::BLACK)
  {
    if (x == par->left)
    {
      w=par->right;
      if (w->color == SEL_ARG::RED)
      {
	w->color=SEL_ARG::BLACK;
	par->color=SEL_ARG::RED;
	left_rotate(&root,par);
	w=par->right;
      }
      if (w->left->color == SEL_ARG::BLACK && w->right->color == SEL_ARG::BLACK)
      {
	w->color=SEL_ARG::RED;
	x=par;
      }
      else
      {
	if (w->right->color == SEL_ARG::BLACK)
	{
	  w->left->color=SEL_ARG::BLACK;
	  w->color=SEL_ARG::RED;
	  right_rotate(&root,w);
	  w=par->right;
	}
	w->color=par->color;
	par->color=SEL_ARG::BLACK;
	w->right->color=SEL_ARG::BLACK;
	left_rotate(&root,par);
	x=root;
	break;
      }
    }
    else
    {
      w=par->left;
      if (w->color == SEL_ARG::RED)
      {
	w->color=SEL_ARG::BLACK;
	par->color=SEL_ARG::RED;
	right_rotate(&root,par);
	w=par->left;
      }
      if (w->right->color == SEL_ARG::BLACK && w->left->color == SEL_ARG::BLACK)
      {
	w->color=SEL_ARG::RED;
	x=par;
      }
      else
      {
	if (w->left->color == SEL_ARG::BLACK)
	{
	  w->right->color=SEL_ARG::BLACK;
	  w->color=SEL_ARG::RED;
	  left_rotate(&root,w);
	  w=par->left;
	}
	w->color=par->color;
	par->color=SEL_ARG::BLACK;
	w->left->color=SEL_ARG::BLACK;
	right_rotate(&root,par);
	x=root;
	break;
      }
    }
    par=x->parent;
  }
  x->color=SEL_ARG::BLACK;
  return root;
}


6364
	/* Test that the properties for a red-black tree hold */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420

#ifdef EXTRA_DEBUG
int test_rb_tree(SEL_ARG *element,SEL_ARG *parent)
{
  int count_l,count_r;

  if (element == &null_element)
    return 0;					// Found end of tree
  if (element->parent != parent)
  {
    sql_print_error("Wrong tree: Parent doesn't point at parent");
    return -1;
  }
  if (element->color == SEL_ARG::RED &&
      (element->left->color == SEL_ARG::RED ||
       element->right->color == SEL_ARG::RED))
  {
    sql_print_error("Wrong tree: Found two red in a row");
    return -1;
  }
  if (element->left == element->right && element->left != &null_element)
  {						// Dummy test
    sql_print_error("Wrong tree: Found right == left");
    return -1;
  }
  count_l=test_rb_tree(element->left,element);
  count_r=test_rb_tree(element->right,element);
  if (count_l >= 0 && count_r >= 0)
  {
    if (count_l == count_r)
      return count_l+(element->color == SEL_ARG::BLACK);
    sql_print_error("Wrong tree: Incorrect black-count: %d - %d",
	    count_l,count_r);
  }
  return -1;					// Error, no more warnings
}

static ulong count_key_part_usage(SEL_ARG *root, SEL_ARG *key)
{
  ulong count= 0;
  for (root=root->first(); root ; root=root->next)
  {
    if (root->next_key_part)
    {
      if (root->next_key_part == key)
	count++;
      if (root->next_key_part->part < key->part)
	count+=count_key_part_usage(root->next_key_part,key);
    }
  }
  return count;
}


void SEL_ARG::test_use_count(SEL_ARG *root)
{
6421
  uint e_count=0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6422 6423
  if (this == root && use_count != 1)
  {
monty@mysql.com's avatar
monty@mysql.com committed
6424
    sql_print_information("Use_count: Wrong count %lu for root",use_count);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436
    return;
  }
  if (this->type != SEL_ARG::KEY_RANGE)
    return;
  for (SEL_ARG *pos=first(); pos ; pos=pos->next)
  {
    e_count++;
    if (pos->next_key_part)
    {
      ulong count=count_key_part_usage(root,pos->next_key_part);
      if (count > pos->next_key_part->use_count)
      {
monty@mysql.com's avatar
monty@mysql.com committed
6437
	sql_print_information("Use_count: Wrong count for key at 0x%lx, %lu should be %lu",
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6438 6439 6440 6441 6442 6443 6444
			pos,pos->next_key_part->use_count,count);
	return;
      }
      pos->next_key_part->test_use_count(root);
    }
  }
  if (e_count != elements)
monty@mysql.com's avatar
monty@mysql.com committed
6445
    sql_print_warning("Wrong use count: %u (should be %u) for tree at 0x%lx",
6446
		    e_count, elements, (gptr) this);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6447 6448 6449 6450 6451
}

#endif


6452 6453 6454 6455 6456 6457 6458 6459 6460 6461
/*
  Calculate estimate of number records that will be retrieved by a range
  scan on given index using given SEL_ARG intervals tree.
  SYNOPSIS
    check_quick_select
      param  Parameter from test_quick_select
      idx    Number of index to use in PARAM::key SEL_TREE::key
      tree   Transformed selection condition, tree->key[idx] holds intervals
             tree to be used for scanning.
  NOTES
6462
    param->is_ror_scan is set to reflect if the key scan is a ROR (see
6463
    is_key_scan_ror function for more info)
6464
    param->table->quick_*, param->range_count (and maybe others) are
6465
    updated with data of given key scan, see check_quick_keys for details.
6466 6467

  RETURN
6468
    Estimate # of records to be retrieved.
6469
    HA_POS_ERROR if estimate calculation failed due to table handler problems.
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6470

6471
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6472 6473 6474 6475 6476

static ha_rows
check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
{
  ha_rows records;
6477 6478
  bool    cpk_scan;
  uint key;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6479
  DBUG_ENTER("check_quick_select");
6480

monty@mysql.com's avatar
monty@mysql.com committed
6481
  param->is_ror_scan= FALSE;
6482

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6483 6484
  if (!tree)
    DBUG_RETURN(HA_POS_ERROR);			// Can't use it
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
6485 6486
  param->max_key_part=0;
  param->range_count=0;
6487 6488
  key= param->real_keynr[idx];

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6489 6490 6491 6492
  if (tree->type == SEL_ARG::IMPOSSIBLE)
    DBUG_RETURN(0L);				// Impossible select. return
  if (tree->type != SEL_ARG::KEY_RANGE || tree->part != 0)
    DBUG_RETURN(HA_POS_ERROR);				// Don't use tree
6493 6494 6495 6496 6497

  enum ha_key_alg key_alg= param->table->key_info[key].algorithm;
  if ((key_alg != HA_KEY_ALG_BTREE) && (key_alg!= HA_KEY_ALG_UNDEF))
  {
    /* Records are not ordered by rowid for other types of indexes. */
monty@mysql.com's avatar
monty@mysql.com committed
6498
    cpk_scan= FALSE;
6499 6500 6501 6502 6503 6504 6505
  }
  else
  {
    /*
      Clustered PK scan is a special case, check_quick_keys doesn't recognize
      CPK scans as ROR scans (while actually any CPK scan is a ROR scan).
    */
6506 6507
    cpk_scan= ((param->table->s->primary_key == param->real_keynr[idx]) &&
               param->table->file->primary_key_is_clustered());
6508
    param->is_ror_scan= !cpk_scan;
6509 6510
  }

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6511 6512
  records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0);
  if (records != HA_POS_ERROR)
6513
  {
6514
    param->table->quick_keys.set_bit(key);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6515 6516
    param->table->quick_rows[key]=records;
    param->table->quick_key_parts[key]=param->max_key_part+1;
6517

6518
    if (cpk_scan)
monty@mysql.com's avatar
monty@mysql.com committed
6519
      param->is_ror_scan= TRUE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6520
  }
6521 6522
  if (param->table->file->index_flags(key, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
    param->is_ror_scan= FALSE;
6523
  DBUG_PRINT("exit", ("Records: %lu", (ulong) records));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6524 6525 6526 6527
  DBUG_RETURN(records);
}


6528
/*
6529 6530
  Recursively calculate estimate of # rows that will be retrieved by
  key scan on key idx.
6531 6532
  SYNOPSIS
    check_quick_keys()
6533
      param         Parameter from test_quick select function.
6534
      idx           Number of key to use in PARAM::keys in list of used keys
6535 6536 6537
                    (param->real_keynr[idx] holds the key number in table)
      key_tree      SEL_ARG tree being examined.
      min_key       Buffer with partial min key value tuple
6538
      min_key_flag
6539
      max_key       Buffer with partial max key value tuple
6540 6541
      max_key_flag

6542
  NOTES
6543 6544
    The function does the recursive descent on the tree via SEL_ARG::left,
    SEL_ARG::right, and SEL_ARG::next_key_part edges. The #rows estimates
6545 6546
    are calculated using records_in_range calls at the leaf nodes and then
    summed.
6547

6548 6549
    param->min_key and param->max_key are used to hold prefixes of key value
    tuples.
6550 6551

    The side effects are:
6552

6553 6554
    param->max_key_part is updated to hold the maximum number of key parts used
      in scan minus 1.
6555 6556

    param->range_count is incremented if the function finds a range that
6557
      wasn't counted by the caller.
6558

6559 6560 6561
    param->is_ror_scan is cleared if the function detects that the key scan is
      not a Rowid-Ordered Retrieval scan ( see comments for is_key_scan_ror
      function for description of which key scans are ROR scans)
6562 6563
*/

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6564 6565 6566 6567 6568
static ha_rows
check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
		 char *min_key,uint min_key_flag, char *max_key,
		 uint max_key_flag)
{
monty@mysql.com's avatar
monty@mysql.com committed
6569 6570 6571
  ha_rows records=0, tmp;
  uint tmp_min_flag, tmp_max_flag, keynr, min_key_length, max_key_length;
  char *tmp_min_key, *tmp_max_key;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6572 6573 6574 6575

  param->max_key_part=max(param->max_key_part,key_tree->part);
  if (key_tree->left != &null_element)
  {
6576 6577 6578 6579 6580 6581
    /*
      There are at least two intervals for current key part, i.e. condition
      was converted to something like
        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
      This is not a ROR scan if the key is not Clustered Primary Key.
    */
monty@mysql.com's avatar
monty@mysql.com committed
6582
    param->is_ror_scan= FALSE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6583 6584 6585 6586 6587 6588
    records=check_quick_keys(param,idx,key_tree->left,min_key,min_key_flag,
			     max_key,max_key_flag);
    if (records == HA_POS_ERROR)			// Impossible
      return records;
  }

monty@mysql.com's avatar
monty@mysql.com committed
6589 6590
  tmp_min_key= min_key;
  tmp_max_key= max_key;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6591
  key_tree->store(param->key[idx][key_tree->part].store_length,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6592
		  &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);
monty@mysql.com's avatar
monty@mysql.com committed
6593 6594
  min_key_length= (uint) (tmp_min_key- param->min_key);
  max_key_length= (uint) (tmp_max_key- param->max_key);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6595

6596 6597
  if (param->is_ror_scan)
  {
6598
    /*
6599
      If the index doesn't cover entire key, mark the scan as non-ROR scan.
6600
      Actually we're cutting off some ROR scans here.
6601 6602 6603
    */
    uint16 fieldnr= param->table->key_info[param->real_keynr[idx]].
                    key_part[key_tree->part].fieldnr - 1;
6604
    if (param->table->field[fieldnr]->key_length() !=
6605
        param->key[idx][key_tree->part].length)
monty@mysql.com's avatar
monty@mysql.com committed
6606
      param->is_ror_scan= FALSE;
6607 6608
  }

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621
  if (key_tree->next_key_part &&
      key_tree->next_key_part->part == key_tree->part+1 &&
      key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
  {						// const key as prefix
    if (min_key_length == max_key_length &&
	!memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
	!key_tree->min_flag && !key_tree->max_flag)
    {
      tmp=check_quick_keys(param,idx,key_tree->next_key_part,
			   tmp_min_key, min_key_flag | key_tree->min_flag,
			   tmp_max_key, max_key_flag | key_tree->max_flag);
      goto end;					// Ugly, but efficient
    }
6622
    else
6623 6624
    {
      /* The interval for current key part is not c1 <= keyXpartY <= c1 */
monty@mysql.com's avatar
monty@mysql.com committed
6625
      param->is_ror_scan= FALSE;
6626
    }
6627

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645
    tmp_min_flag=key_tree->min_flag;
    tmp_max_flag=key_tree->max_flag;
    if (!tmp_min_flag)
      key_tree->next_key_part->store_min_key(param->key[idx], &tmp_min_key,
					     &tmp_min_flag);
    if (!tmp_max_flag)
      key_tree->next_key_part->store_max_key(param->key[idx], &tmp_max_key,
					     &tmp_max_flag);
    min_key_length= (uint) (tmp_min_key- param->min_key);
    max_key_length= (uint) (tmp_max_key- param->max_key);
  }
  else
  {
    tmp_min_flag=min_key_flag | key_tree->min_flag;
    tmp_max_flag=max_key_flag | key_tree->max_flag;
  }

  keynr=param->real_keynr[idx];
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
6646
  param->range_count++;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6647 6648
  if (!tmp_min_flag && ! tmp_max_flag &&
      (uint) key_tree->part+1 == param->table->key_info[keynr].key_parts &&
6649 6650
      (param->table->key_info[keynr].flags & (HA_NOSAME | HA_END_SPACE_KEY)) ==
      HA_NOSAME &&
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6651 6652 6653 6654
      min_key_length == max_key_length &&
      !memcmp(param->min_key,param->max_key,min_key_length))
    tmp=1;					// Max one record
  else
6655
  {
6656 6657
    if (param->is_ror_scan)
    {
6658 6659 6660 6661 6662 6663 6664 6665 6666
      /*
        If we get here, the condition on the key was converted to form
        "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND
          somecond(keyXpart{key_tree->part})"
        Check if
          somecond is "keyXpart{key_tree->part} = const" and
          uncovered "tail" of KeyX parts is either empty or is identical to
          first members of clustered primary key.
      */
6667 6668
      if (!(min_key_length == max_key_length &&
            !memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) &&
6669
            !key_tree->min_flag && !key_tree->max_flag &&
6670
            is_key_scan_ror(param, keynr, key_tree->part + 1)))
monty@mysql.com's avatar
monty@mysql.com committed
6671
        param->is_ror_scan= FALSE;
6672 6673
    }

6674
    if (tmp_min_flag & GEOM_FLAG)
6675
    {
6676 6677 6678 6679 6680 6681 6682 6683
      key_range min_range;
      min_range.key=    (byte*) param->min_key;
      min_range.length= min_key_length;
      /* In this case tmp_min_flag contains the handler-read-function */
      min_range.flag=   (ha_rkey_function) (tmp_min_flag ^ GEOM_FLAG);

      tmp= param->table->file->records_in_range(keynr, &min_range,
                                                (key_range*) 0);
6684 6685 6686
    }
    else
    {
6687 6688 6689 6690 6691 6692
      key_range min_range, max_range;

      min_range.key=    (byte*) param->min_key;
      min_range.length= min_key_length;
      min_range.flag=   (tmp_min_flag & NEAR_MIN ? HA_READ_AFTER_KEY :
                         HA_READ_KEY_EXACT);
monty@mysql.com's avatar
monty@mysql.com committed
6693
      max_range.key=    (byte*) param->max_key;
6694 6695 6696 6697 6698 6699 6700 6701
      max_range.length= max_key_length;
      max_range.flag=   (tmp_max_flag & NEAR_MAX ?
                         HA_READ_BEFORE_KEY : HA_READ_AFTER_KEY);
      tmp=param->table->file->records_in_range(keynr,
                                               (min_key_length ? &min_range :
                                                (key_range*) 0),
                                               (max_key_length ? &max_range :
                                                (key_range*) 0));
6702 6703
    }
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6704 6705 6706 6707 6708 6709
 end:
  if (tmp == HA_POS_ERROR)			// Impossible range
    return tmp;
  records+=tmp;
  if (key_tree->right != &null_element)
  {
6710 6711 6712 6713 6714 6715
    /*
      There are at least two intervals for current key part, i.e. condition
      was converted to something like
        (keyXpartY less/equals c1) OR (keyXpartY more/equals c2).
      This is not a ROR scan if the key is not Clustered Primary Key.
    */
monty@mysql.com's avatar
monty@mysql.com committed
6716
    param->is_ror_scan= FALSE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6717 6718 6719 6720 6721 6722 6723 6724 6725
    tmp=check_quick_keys(param,idx,key_tree->right,min_key,min_key_flag,
			 max_key,max_key_flag);
    if (tmp == HA_POS_ERROR)
      return tmp;
    records+=tmp;
  }
  return records;
}

6726

6727
/*
6728
  Check if key scan on given index with equality conditions on first n key
6729 6730 6731 6732
  parts is a ROR scan.

  SYNOPSIS
    is_key_scan_ror()
6733
      param  Parameter from test_quick_select
6734 6735 6736 6737
      keynr  Number of key in the table. The key must not be a clustered
             primary key.
      nparts Number of first key parts for which equality conditions
             are present.
6738

6739 6740 6741
  NOTES
    ROR (Rowid Ordered Retrieval) key scan is a key scan that produces
    ordered sequence of rowids (ha_xxx::cmp_ref is the comparison function)
6742

6743 6744 6745
    An index scan is a ROR scan if it is done using a condition in form

        "key1_1=c_1 AND ... AND key1_n=c_n"  (1)
6746

6747 6748
    where the index is defined on (key1_1, ..., key1_N [,a_1, ..., a_n])

6749
    and the table has a clustered Primary Key
6750

6751
    PRIMARY KEY(a_1, ..., a_n, b1, ..., b_k) with first key parts being
6752
    identical to uncovered parts ot the key being scanned (2)
6753 6754

    Scans on HASH indexes are not ROR scans,
6755 6756 6757 6758 6759 6760
    any range scan on clustered primary key is ROR scan  (3)

    Check (1) is made in check_quick_keys()
    Check (3) is made check_quick_select()
    Check (2) is made by this function.

6761
  RETURN
monty@mysql.com's avatar
monty@mysql.com committed
6762 6763
    TRUE  If the scan is ROR-scan
    FALSE otherwise
6764
*/
6765

6766 6767 6768 6769
static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts)
{
  KEY *table_key= param->table->key_info + keynr;
  KEY_PART_INFO *key_part= table_key->key_part + nparts;
6770 6771 6772
  KEY_PART_INFO *key_part_end= (table_key->key_part +
                                table_key->key_parts);
  uint pk_number;
6773

6774
  if (key_part == key_part_end)
monty@mysql.com's avatar
monty@mysql.com committed
6775
    return TRUE;
6776
  pk_number= param->table->s->primary_key;
6777
  if (!param->table->file->primary_key_is_clustered() || pk_number == MAX_KEY)
monty@mysql.com's avatar
monty@mysql.com committed
6778
    return FALSE;
6779 6780

  KEY_PART_INFO *pk_part= param->table->key_info[pk_number].key_part;
6781
  KEY_PART_INFO *pk_part_end= pk_part +
6782
                              param->table->key_info[pk_number].key_parts;
6783 6784
  for (;(key_part!=key_part_end) && (pk_part != pk_part_end);
       ++key_part, ++pk_part)
6785
  {
6786
    if ((key_part->field != pk_part->field) ||
6787
        (key_part->length != pk_part->length))
6788
      return FALSE;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6789
  }
6790
  return (key_part == key_part_end);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6791 6792 6793
}


6794 6795
/*
  Create a QUICK_RANGE_SELECT from given key and SEL_ARG tree for that key.
6796

6797 6798
  SYNOPSIS
    get_quick_select()
6799
      param
6800
      idx          Index of used key in param->key.
6801 6802
      key_tree     SEL_ARG tree for the used key
      parent_alloc If not NULL, use it to allocate memory for
6803
                   quick select data. Otherwise use quick->alloc.
6804
  NOTES
6805
    The caller must call QUICK_SELECT::init for returned quick select
6806

6807
    CAUTION! This function may change thd->mem_root to a MEM_ROOT which will be
6808
    deallocated when the returned quick select is deleted.
6809 6810 6811 6812

  RETURN
    NULL on error
    otherwise created quick select
6813
*/
6814

6815 6816 6817
QUICK_RANGE_SELECT *
get_quick_select(PARAM *param,uint idx,SEL_ARG *key_tree,
                 MEM_ROOT *parent_alloc)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6818
{
6819
  QUICK_RANGE_SELECT *quick;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6820
  DBUG_ENTER("get_quick_select");
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6821 6822 6823 6824 6825 6826 6827 6828 6829

  if (param->table->key_info[param->real_keynr[idx]].flags & HA_SPATIAL)
    quick=new QUICK_RANGE_SELECT_GEOM(param->thd, param->table,
                                      param->real_keynr[idx],
                                      test(parent_alloc),
                                      parent_alloc);
  else
    quick=new QUICK_RANGE_SELECT(param->thd, param->table,
                                 param->real_keynr[idx],
monty@mysql.com's avatar
monty@mysql.com committed
6830
                                 test(parent_alloc));
6831

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6832
  if (quick)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843
  {
    if (quick->error ||
	get_quick_keys(param,quick,param->key[idx],key_tree,param->min_key,0,
		       param->max_key,0))
    {
      delete quick;
      quick=0;
    }
    else
    {
      quick->key_parts=(KEY_PART*)
6844 6845 6846 6847
        memdup_root(parent_alloc? parent_alloc : &quick->alloc,
                    (char*) param->key[idx],
                    sizeof(KEY_PART)*
                    param->table->key_info[param->real_keynr[idx]].key_parts);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6848
    }
6849
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6850 6851 6852 6853 6854 6855 6856
  DBUG_RETURN(quick);
}


/*
** Fix this to get all possible sub_ranges
*/
6857 6858
bool
get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871
	       SEL_ARG *key_tree,char *min_key,uint min_key_flag,
	       char *max_key, uint max_key_flag)
{
  QUICK_RANGE *range;
  uint flag;

  if (key_tree->left != &null_element)
  {
    if (get_quick_keys(param,quick,key,key_tree->left,
		       min_key,min_key_flag, max_key, max_key_flag))
      return 1;
  }
  char *tmp_min_key=min_key,*tmp_max_key=max_key;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6872
  key_tree->store(key[key_tree->part].store_length,
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900
		  &tmp_min_key,min_key_flag,&tmp_max_key,max_key_flag);

  if (key_tree->next_key_part &&
      key_tree->next_key_part->part == key_tree->part+1 &&
      key_tree->next_key_part->type == SEL_ARG::KEY_RANGE)
  {						  // const key as prefix
    if (!((tmp_min_key - min_key) != (tmp_max_key - max_key) ||
	  memcmp(min_key,max_key, (uint) (tmp_max_key - max_key)) ||
	  key_tree->min_flag || key_tree->max_flag))
    {
      if (get_quick_keys(param,quick,key,key_tree->next_key_part,
			 tmp_min_key, min_key_flag | key_tree->min_flag,
			 tmp_max_key, max_key_flag | key_tree->max_flag))
	return 1;
      goto end;					// Ugly, but efficient
    }
    {
      uint tmp_min_flag=key_tree->min_flag,tmp_max_flag=key_tree->max_flag;
      if (!tmp_min_flag)
	key_tree->next_key_part->store_min_key(key, &tmp_min_key,
					       &tmp_min_flag);
      if (!tmp_max_flag)
	key_tree->next_key_part->store_max_key(key, &tmp_max_key,
					       &tmp_max_flag);
      flag=tmp_min_flag | tmp_max_flag;
    }
  }
  else
6901 6902 6903 6904
  {
    flag = (key_tree->min_flag & GEOM_FLAG) ?
      key_tree->min_flag : key_tree->min_flag | key_tree->max_flag;
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6905

6906 6907 6908 6909 6910
  /*
    Ensure that some part of min_key and max_key are used.  If not,
    regard this as no lower/upper range
  */
  if ((flag & GEOM_FLAG) == 0)
6911 6912 6913 6914 6915 6916 6917 6918 6919 6920
  {
    if (tmp_min_key != param->min_key)
      flag&= ~NO_MIN_RANGE;
    else
      flag|= NO_MIN_RANGE;
    if (tmp_max_key != param->max_key)
      flag&= ~NO_MAX_RANGE;
    else
      flag|= NO_MAX_RANGE;
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6921 6922 6923 6924 6925 6926 6927 6928
  if (flag == 0)
  {
    uint length= (uint) (tmp_min_key - param->min_key);
    if (length == (uint) (tmp_max_key - param->max_key) &&
	!memcmp(param->min_key,param->max_key,length))
    {
      KEY *table_key=quick->head->key_info+quick->index;
      flag=EQ_RANGE;
6929 6930
      if ((table_key->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == HA_NOSAME &&
	  key->part == table_key->key_parts-1)
6931 6932 6933 6934 6935 6936 6937 6938 6939
      {
	if (!(table_key->flags & HA_NULL_PART_KEY) ||
	    !null_part_in_key(key,
			      param->min_key,
			      (uint) (tmp_min_key - param->min_key)))
	  flag|= UNIQUE_RANGE;
	else
	  flag|= NULL_RANGE;
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6940 6941 6942 6943
    }
  }

  /* Get range for retrieving rows in QUICK_SELECT::get_next */
6944
  if (!(range= new QUICK_RANGE((const char *) param->min_key,
6945
			       (uint) (tmp_min_key - param->min_key),
6946
			       (const char *) param->max_key,
6947 6948
			       (uint) (tmp_max_key - param->max_key),
			       flag)))
6949 6950
    return 1;			// out of memory

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6951 6952
  set_if_bigger(quick->max_used_key_length,range->min_length);
  set_if_bigger(quick->max_used_key_length,range->max_length);
6953
  set_if_bigger(quick->used_key_parts, (uint) key_tree->part+1);
6954 6955 6956
  if (insert_dynamic(&quick->ranges, (gptr)&range))
    return 1;

bk@work.mysql.com's avatar
bk@work.mysql.com committed
6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968
 end:
  if (key_tree->right != &null_element)
    return get_quick_keys(param,quick,key,key_tree->right,
			  min_key,min_key_flag,
			  max_key,max_key_flag);
  return 0;
}

/*
  Return 1 if there is only one range and this uses the whole primary key
*/

6969
bool QUICK_RANGE_SELECT::unique_key_range()
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6970 6971 6972
{
  if (ranges.elements == 1)
  {
6973 6974
    QUICK_RANGE *tmp= *((QUICK_RANGE**)ranges.buffer);
    if ((tmp->flag & (EQ_RANGE | NULL_RANGE)) == EQ_RANGE)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6975 6976
    {
      KEY *key=head->key_info+index;
6977
      return ((key->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == HA_NOSAME &&
bk@work.mysql.com's avatar
bk@work.mysql.com committed
6978 6979 6980 6981 6982 6983
	      key->key_length == tmp->min_length);
    }
  }
  return 0;
}

6984

monty@mysql.com's avatar
monty@mysql.com committed
6985
/* Returns TRUE if any part of the key is NULL */
6986 6987 6988

static bool null_part_in_key(KEY_PART *key_part, const char *key, uint length)
{
6989
  for (const char *end=key+length ;
6990
       key < end;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6991
       key+= key_part++->store_length)
6992
  {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6993 6994
    if (key_part->null_bit && *key)
      return 1;
6995 6996 6997 6998
  }
  return 0;
}

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
6999

7000 7001
bool QUICK_SELECT_I::check_if_keys_used(List<Item> *fields)
{
7002
  return check_if_key_used(head, index, *fields);
7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040
}

bool QUICK_INDEX_MERGE_SELECT::check_if_keys_used(List<Item> *fields)
{
  QUICK_RANGE_SELECT *quick;
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  while ((quick= it++))
  {
    if (check_if_key_used(head, quick->index, *fields))
      return 1;
  }
  return 0;
}

bool QUICK_ROR_INTERSECT_SELECT::check_if_keys_used(List<Item> *fields)
{
  QUICK_RANGE_SELECT *quick;
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  while ((quick= it++))
  {
    if (check_if_key_used(head, quick->index, *fields))
      return 1;
  }
  return 0;
}

bool QUICK_ROR_UNION_SELECT::check_if_keys_used(List<Item> *fields)
{
  QUICK_SELECT_I *quick;
  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
  while ((quick= it++))
  {
    if (quick->check_if_keys_used(fields))
      return 1;
  }
  return 0;
}

monty@mysql.com's avatar
monty@mysql.com committed
7041

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7042 7043
/*
  Create quick select from ref/ref_or_null scan.
7044

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059
  SYNOPSIS
    get_quick_select_for_ref()
      thd      Thread handle
      table    Table to access
      ref      ref[_or_null] scan parameters
      records  Estimate of number of records (needed only to construct 
               quick select)
  NOTES
    This allocates things in a new memory root, as this may be called many
    times during a query.
  
  RETURN 
    Quick select that retrieves the same rows as passed ref scan
    NULL on error.
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7060

7061
QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7062
                                             TABLE_REF *ref, ha_rows records)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7063
{
7064 7065
  MEM_ROOT *old_root, *alloc;
  QUICK_RANGE_SELECT *quick;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7066 7067
  KEY *key_info = &table->key_info[ref->key];
  KEY_PART *key_part;
serg@serg.mylan's avatar
serg@serg.mylan committed
7068
  QUICK_RANGE *range;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7069
  uint part;
7070 7071 7072 7073 7074 7075

  old_root= thd->mem_root;
  /* The following call may change thd->mem_root */
  quick= new QUICK_RANGE_SELECT(thd, table, ref->key, 0);
  /* save mem_root set by QUICK_RANGE_SELECT constructor */
  alloc= thd->mem_root;
7076 7077 7078 7079 7080
  /*
    return back default mem_root (thd->mem_root) changed by
    QUICK_RANGE_SELECT constructor
  */
  thd->mem_root= old_root;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7081 7082

  if (!quick)
7083
    return 0;			/* no ranges found */
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7084
  if (quick->init())
monty@mysql.com's avatar
monty@mysql.com committed
7085
    goto err;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7086
  quick->records= records;
7087

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7088
  if (cp_buffer_from_ref(thd,ref) && thd->is_fatal_error ||
7089
      !(range= new(alloc) QUICK_RANGE()))
monty@mysql.com's avatar
monty@mysql.com committed
7090
    goto err;                                   // out of memory
7091

bk@work.mysql.com's avatar
bk@work.mysql.com committed
7092 7093 7094
  range->min_key=range->max_key=(char*) ref->key_buff;
  range->min_length=range->max_length=ref->key_length;
  range->flag= ((ref->key_length == key_info->key_length &&
7095 7096
		 (key_info->flags & (HA_NOSAME | HA_END_SPACE_KEY)) ==
		 HA_NOSAME) ? EQ_RANGE : 0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7097 7098

  if (!(quick->key_parts=key_part=(KEY_PART *)
7099
	alloc_root(&quick->alloc,sizeof(KEY_PART)*ref->key_parts)))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7100 7101 7102 7103 7104 7105
    goto err;

  for (part=0 ; part < ref->key_parts ;part++,key_part++)
  {
    key_part->part=part;
    key_part->field=        key_info->key_part[part].field;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7106 7107
    key_part->length=  	    key_info->key_part[part].length;
    key_part->store_length= key_info->key_part[part].store_length;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7108 7109
    key_part->null_bit=     key_info->key_part[part].null_bit;
  }
pem@mysql.com's avatar
pem@mysql.com committed
7110
  if (insert_dynamic(&quick->ranges,(gptr)&range))
7111 7112
    goto err;

7113
  /*
7114 7115 7116 7117 7118
     Add a NULL range if REF_OR_NULL optimization is used.
     For example:
       if we have "WHERE A=2 OR A IS NULL" we created the (A=2) range above
       and have ref->null_ref_key set. Will create a new NULL range here.
  */
7119 7120 7121 7122 7123
  if (ref->null_ref_key)
  {
    QUICK_RANGE *null_range;

    *ref->null_ref_key= 1;		// Set null byte then create a range
7124 7125 7126 7127 7128
    if (!(null_range= new (alloc) QUICK_RANGE((char*)ref->key_buff,
                                              ref->key_length,
                                              (char*)ref->key_buff,
                                              ref->key_length,
                                              EQ_RANGE)))
7129 7130
      goto err;
    *ref->null_ref_key= 0;		// Clear null byte
pem@mysql.com's avatar
pem@mysql.com committed
7131
    if (insert_dynamic(&quick->ranges,(gptr)&null_range))
7132 7133 7134 7135
      goto err;
  }

  return quick;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7136 7137 7138 7139 7140 7141

err:
  delete quick;
  return 0;
}

7142 7143

/*
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7144 7145 7146 7147 7148 7149
  Perform key scans for all used indexes (except CPK), get rowids and merge 
  them into an ordered non-recurrent sequence of rowids.
  
  The merge/duplicate removal is performed using Unique class. We put all
  rowids into Unique, get the sorted sequence and destroy the Unique.
  
7150
  If table has a clustered primary key that covers all rows (TRUE for bdb
7151
     and innodb currently) and one of the index_merge scans is a scan on PK,
7152
  then
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7153 7154
    rows that will be retrieved by PK scan are not put into Unique and 
    primary key scan is not performed here, it is performed later separately.
7155

7156 7157 7158
  RETURN
    0     OK
    other error
7159
*/
7160

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7161
int QUICK_INDEX_MERGE_SELECT::read_keys_and_merge()
7162
{
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7163 7164
  List_iterator_fast<QUICK_RANGE_SELECT> cur_quick_it(quick_selects);
  QUICK_RANGE_SELECT* cur_quick;
7165
  int result;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7166
  Unique *unique;
7167
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::prepare_unique");
7168

7169
  /* We're going to just read rowids. */
7170 7171
  if (head->file->extra(HA_EXTRA_KEYREAD))
    DBUG_RETURN(1);
7172

7173 7174
  /*
    Make innodb retrieve all PK member fields, so
7175
     * ha_innobase::position (which uses them) call works.
7176
     * We can filter out rows that will be retrieved by clustered PK.
7177
    (This also creates a deficiency - it is possible that we will retrieve
7178
     parts of key that are not used by current query at all.)
7179
  */
7180
  if (head->file->ha_retrieve_all_pk())
7181
    DBUG_RETURN(1);
7182

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7183 7184
  cur_quick_it.rewind();
  cur_quick= cur_quick_it++;
7185
  DBUG_ASSERT(cur_quick != 0);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7186 7187 7188 7189 7190
  
  /*
    We reuse the same instance of handler so we need to call both init and 
    reset here.
  */
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7191
  if (cur_quick->init() || cur_quick->reset())
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7192
    DBUG_RETURN(1);
7193

7194
  unique= new Unique(refpos_order_cmp, (void *)head->file,
7195
                     head->file->ref_length,
7196
                     thd->variables.sortbuff_size);
7197 7198
  if (!unique)
    DBUG_RETURN(1);
monty@mysql.com's avatar
monty@mysql.com committed
7199
  for (;;)
7200
  {
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7201
    while ((result= cur_quick->get_next()) == HA_ERR_END_OF_FILE)
7202
    {
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7203 7204 7205
      cur_quick->range_end();
      cur_quick= cur_quick_it++;
      if (!cur_quick)
7206
        break;
7207

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7208 7209
      if (cur_quick->file->inited != handler::NONE) 
        cur_quick->file->ha_index_end();
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7210
      if (cur_quick->init() || cur_quick->reset())
7211
        DBUG_RETURN(1);
7212 7213 7214
    }

    if (result)
7215
    {
7216
      if (result != HA_ERR_END_OF_FILE)
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7217 7218
      {
        cur_quick->range_end();
7219
        DBUG_RETURN(result);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7220
      }
7221
      break;
7222
    }
7223

7224 7225
    if (thd->killed)
      DBUG_RETURN(1);
7226

7227
    /* skip row if it will be retrieved by clustered PK scan */
7228 7229
    if (pk_quick_select && pk_quick_select->row_in_ranges())
      continue;
7230

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7231 7232
    cur_quick->file->position(cur_quick->record);
    result= unique->unique_add((char*)cur_quick->file->ref);
7233
    if (result)
7234 7235
      DBUG_RETURN(1);

monty@mysql.com's avatar
monty@mysql.com committed
7236
  }
7237

7238 7239
  /* ok, all row ids are in Unique */
  result= unique->get(head);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7240
  delete unique;
monty@mysql.com's avatar
monty@mysql.com committed
7241
  doing_pk_scan= FALSE;
monty@mysql.com's avatar
monty@mysql.com committed
7242 7243
  /* start table scan */
  init_read_record(&read_record, thd, head, (SQL_SELECT*) 0, 1, 1);
7244 7245
  /* index_merge currently doesn't support "using index" at all */
  head->file->extra(HA_EXTRA_NO_KEYREAD);
7246

7247 7248 7249
  DBUG_RETURN(result);
}

7250

7251 7252 7253
/*
  Get next row for index_merge.
  NOTES
7254 7255 7256 7257
    The rows are read from
      1. rowids stored in Unique.
      2. QUICK_RANGE_SELECT with clustered primary key (if any).
    The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
7258
*/
7259

7260 7261
int QUICK_INDEX_MERGE_SELECT::get_next()
{
7262
  int result;
7263
  DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::get_next");
7264

7265 7266 7267 7268 7269 7270 7271 7272 7273
  if (doing_pk_scan)
    DBUG_RETURN(pk_quick_select->get_next());

  result= read_record.read_record(&read_record);

  if (result == -1)
  {
    result= HA_ERR_END_OF_FILE;
    end_read_record(&read_record);
7274
    /* All rows from Unique have been retrieved, do a clustered PK scan */
monty@mysql.com's avatar
monty@mysql.com committed
7275
    if (pk_quick_select)
7276
    {
monty@mysql.com's avatar
monty@mysql.com committed
7277
      doing_pk_scan= TRUE;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7278
      if ((result= pk_quick_select->init()) || (result= pk_quick_select->reset()))
7279 7280 7281 7282 7283 7284
        DBUG_RETURN(result);
      DBUG_RETURN(pk_quick_select->get_next());
    }
  }

  DBUG_RETURN(result);
7285 7286
}

7287 7288

/*
7289
  Retrieve next record.
7290
  SYNOPSIS
7291 7292
     QUICK_ROR_INTERSECT_SELECT::get_next()

7293
  NOTES
7294 7295
    Invariant on enter/exit: all intersected selects have retrieved all index
    records with rowid <= some_rowid_val and no intersected select has
7296 7297 7298 7299
    retrieved any index records with rowid > some_rowid_val.
    We start fresh and loop until we have retrieved the same rowid in each of
    the key scans or we got an error.

7300
    If a Clustered PK scan is present, it is used only to check if row
7301 7302 7303 7304 7305
    satisfies its condition (and never used for row retrieval).

  RETURN
   0     - Ok
   other - Error code if any error occurred.
7306 7307 7308 7309 7310 7311 7312 7313 7314
*/

int QUICK_ROR_INTERSECT_SELECT::get_next()
{
  List_iterator_fast<QUICK_RANGE_SELECT> quick_it(quick_selects);
  QUICK_RANGE_SELECT* quick;
  int error, cmp;
  uint last_rowid_count=0;
  DBUG_ENTER("QUICK_ROR_INTERSECT_SELECT::get_next");
7315

7316 7317 7318 7319 7320 7321 7322 7323 7324 7325
  /* Get a rowid for first quick and save it as a 'candidate' */
  quick= quick_it++;
  if (cpk_quick)
  {
    do {
      error= quick->get_next();
    }while (!error && !cpk_quick->row_in_ranges());
  }
  else
    error= quick->get_next();
7326

7327 7328 7329 7330 7331 7332
  if (error)
    DBUG_RETURN(error);

  quick->file->position(quick->record);
  memcpy(last_rowid, quick->file->ref, head->file->ref_length);
  last_rowid_count= 1;
7333

7334 7335 7336 7337 7338 7339 7340
  while (last_rowid_count < quick_selects.elements)
  {
    if (!(quick= quick_it++))
    {
      quick_it.rewind();
      quick= quick_it++;
    }
7341

7342 7343 7344 7345
    do {
      if ((error= quick->get_next()))
        DBUG_RETURN(error);
      quick->file->position(quick->record);
7346
      cmp= head->file->cmp_ref(quick->file->ref, last_rowid);
7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361
    } while (cmp < 0);

    /* Ok, current select 'caught up' and returned ref >= cur_ref */
    if (cmp > 0)
    {
      /* Found a row with ref > cur_ref. Make it a new 'candidate' */
      if (cpk_quick)
      {
        while (!cpk_quick->row_in_ranges())
        {
          if ((error= quick->get_next()))
            DBUG_RETURN(error);
        }
      }
      memcpy(last_rowid, quick->file->ref, head->file->ref_length);
7362
      last_rowid_count= 1;
7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377
    }
    else
    {
      /* current 'candidate' row confirmed by this select */
      last_rowid_count++;
    }
  }

  /* We get here iff we got the same row ref in all scans. */
  if (need_to_fetch_row)
    error= head->file->rnd_pos(head->record[0], last_rowid);
  DBUG_RETURN(error);
}


7378 7379
/*
  Retrieve next record.
7380 7381
  SYNOPSIS
    QUICK_ROR_UNION_SELECT::get_next()
7382

7383
  NOTES
7384 7385
    Enter/exit invariant:
    For each quick select in the queue a {key,rowid} tuple has been
7386
    retrieved but the corresponding row hasn't been passed to output.
7387

7388
  RETURN
7389 7390
   0     - Ok
   other - Error code if any error occurred.
7391 7392 7393 7394 7395 7396 7397 7398
*/

int QUICK_ROR_UNION_SELECT::get_next()
{
  int error, dup_row;
  QUICK_SELECT_I *quick;
  byte *tmp;
  DBUG_ENTER("QUICK_ROR_UNION_SELECT::get_next");
7399

7400 7401 7402 7403
  do
  {
    if (!queue.elements)
      DBUG_RETURN(HA_ERR_END_OF_FILE);
7404
    /* Ok, we have a queue with >= 1 scans */
7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420

    quick= (QUICK_SELECT_I*)queue_top(&queue);
    memcpy(cur_rowid, quick->last_rowid, rowid_length);

    /* put into queue rowid from the same stream as top element */
    if ((error= quick->get_next()))
    {
      if (error != HA_ERR_END_OF_FILE)
        DBUG_RETURN(error);
      queue_remove(&queue, 0);
    }
    else
    {
      quick->save_last_pos();
      queue_replaced(&queue);
    }
7421

7422 7423 7424
    if (!have_prev_rowid)
    {
      /* No rows have been returned yet */
monty@mysql.com's avatar
monty@mysql.com committed
7425 7426
      dup_row= FALSE;
      have_prev_rowid= TRUE;
7427 7428 7429 7430
    }
    else
      dup_row= !head->file->cmp_ref(cur_rowid, prev_rowid);
  }while (dup_row);
7431

7432 7433 7434 7435 7436 7437 7438 7439
  tmp= cur_rowid;
  cur_rowid= prev_rowid;
  prev_rowid= tmp;

  error= head->file->rnd_pos(quick->record, prev_rowid);
  DBUG_RETURN(error);
}

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7440
int QUICK_RANGE_SELECT::reset()
ingo@mysql.com's avatar
ingo@mysql.com committed
7441 7442 7443
{
  uint  mrange_bufsiz;
  byte  *mrange_buff;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7444 7445 7446
  DBUG_ENTER("QUICK_RANGE_SELECT::reset");
  next=0;
  range= NULL;
7447
  in_range= FALSE;
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7448
  cur_range= (QUICK_RANGE**) ranges.buffer;
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
7449

7450
  if (file->inited == handler::NONE && (error= file->ha_index_init(index,1)))
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
7451
    DBUG_RETURN(error);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
7452
 
ingo@mysql.com's avatar
ingo@mysql.com committed
7453 7454 7455 7456 7457 7458 7459
  /* Do not allocate the buffers twice. */
  if (multi_range_length)
  {
    DBUG_ASSERT(multi_range_length == min(multi_range_count, ranges.elements));
    DBUG_RETURN(0);
  }

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7460 7461
  /* Allocate the ranges array. */
  DBUG_ASSERT(ranges.elements);
ingo@mysql.com's avatar
ingo@mysql.com committed
7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477
  multi_range_length= min(multi_range_count, ranges.elements);
  DBUG_ASSERT(multi_range_length > 0);
  while (multi_range_length && ! (multi_range= (KEY_MULTI_RANGE*)
                                  my_malloc(multi_range_length *
                                            sizeof(KEY_MULTI_RANGE),
                                            MYF(MY_WME))))
  {
    /* Try to shrink the buffers until it is 0. */
    multi_range_length/= 2;
  }
  if (! multi_range)
  {
    multi_range_length= 0;
    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
  }

sergefp@mysql.com's avatar
sergefp@mysql.com committed
7478
  /* Allocate the handler buffer if necessary.  */
ingo@mysql.com's avatar
ingo@mysql.com committed
7479 7480 7481
  if (file->table_flags() & HA_NEED_READ_RANGE_BUFFER)
  {
    mrange_bufsiz= min(multi_range_bufsiz,
joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
7482
                       (QUICK_SELECT_I::records + 1)* head->s->reclength);
ingo@mysql.com's avatar
ingo@mysql.com committed
7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504

    while (mrange_bufsiz &&
           ! my_multi_malloc(MYF(MY_WME),
                             &multi_range_buff, sizeof(*multi_range_buff),
                             &mrange_buff, mrange_bufsiz,
                             NullS))
    {
      /* Try to shrink the buffers until both are 0. */
      mrange_bufsiz/= 2;
    }
    if (! multi_range_buff)
    {
      my_free((char*) multi_range, MYF(0));
      multi_range= NULL;
      multi_range_length= 0;
      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
    }

    /* Initialize the handler buffer. */
    multi_range_buff->buffer= mrange_buff;
    multi_range_buff->buffer_end= mrange_buff + mrange_bufsiz;
    multi_range_buff->end_of_used_area= mrange_buff;
7505 7506 7507 7508 7509 7510 7511 7512
#ifdef HAVE_purify
    /*
      We need this until ndb will use the buffer efficiently
      (Now ndb stores  complete row in here, instead of only the used fields
      which gives us valgrind warnings in compare_record[])
    */
    bzero((char*) mrange_buff, mrange_bufsiz);
#endif
ingo@mysql.com's avatar
ingo@mysql.com committed
7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531
  }
  DBUG_RETURN(0);
}


/*
  Get next possible record using quick-struct.

  SYNOPSIS
    QUICK_RANGE_SELECT::get_next()

  NOTES
    Record is read into table->record[0]

  RETURN
    0			Found row
    HA_ERR_END_OF_FILE	No (more) rows in range
    #			Error code
*/
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7532

7533
int QUICK_RANGE_SELECT::get_next()
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7534
{
ingo@mysql.com's avatar
ingo@mysql.com committed
7535 7536 7537 7538
  int             result;
  KEY_MULTI_RANGE *mrange;
  key_range       *start_key;
  key_range       *end_key;
7539
  DBUG_ENTER("QUICK_RANGE_SELECT::get_next");
ingo@mysql.com's avatar
ingo@mysql.com committed
7540 7541 7542
  DBUG_ASSERT(multi_range_length && multi_range &&
              (cur_range >= (QUICK_RANGE**) ranges.buffer) &&
              (cur_range <= (QUICK_RANGE**) ranges.buffer + ranges.elements));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7543 7544 7545

  for (;;)
  {
ingo@mysql.com's avatar
ingo@mysql.com committed
7546
    if (in_range)
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7547
    {
ingo@mysql.com's avatar
ingo@mysql.com committed
7548 7549
      /* We did already start to read this key. */
      result= file->read_multi_range_next(&mrange);
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7550
      if (result != HA_ERR_END_OF_FILE)
ingo@mysql.com's avatar
ingo@mysql.com committed
7551 7552
      {
        in_range= ! result;
7553
	DBUG_RETURN(result);
ingo@mysql.com's avatar
ingo@mysql.com committed
7554
      }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7555
    }
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7556

ingo@mysql.com's avatar
ingo@mysql.com committed
7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586
    uint count= min(multi_range_length, ranges.elements -
                    (cur_range - (QUICK_RANGE**) ranges.buffer));
    if (count == 0)
    {
      /* Ranges have already been used up before. None is left for read. */
      in_range= FALSE;
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    KEY_MULTI_RANGE *mrange_slot, *mrange_end;
    for (mrange_slot= multi_range, mrange_end= mrange_slot+count;
         mrange_slot < mrange_end;
         mrange_slot++)
    {
      start_key= &mrange_slot->start_key;
      end_key= &mrange_slot->end_key;
      range= *(cur_range++);

      start_key->key=    (const byte*) range->min_key;
      start_key->length= range->min_length;
      start_key->flag=   ((range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
                          (range->flag & EQ_RANGE) ?
                          HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
      end_key->key=      (const byte*) range->max_key;
      end_key->length=   range->max_length;
      /*
        We use HA_READ_AFTER_KEY here because if we are reading on a key
        prefix. We want to find all keys with this prefix.
      */
      end_key->flag=     (range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
                          HA_READ_AFTER_KEY);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7587

ingo@mysql.com's avatar
ingo@mysql.com committed
7588 7589
      mrange_slot->range_flag= range->flag;
    }
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7590

ingo@mysql.com's avatar
ingo@mysql.com committed
7591 7592
    result= file->read_multi_range_first(&mrange, multi_range, count,
                                         sorted, multi_range_buff);
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7593
    if (result != HA_ERR_END_OF_FILE)
ingo@mysql.com's avatar
ingo@mysql.com committed
7594 7595
    {
      in_range= ! result;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7596
      DBUG_RETURN(result);
ingo@mysql.com's avatar
ingo@mysql.com committed
7597 7598
    }
    in_range= FALSE; /* No matching rows; go to next set of ranges. */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7599 7600 7601
  }
}

7602

7603 7604 7605 7606 7607 7608
/*
  Get the next record with a different prefix.

  SYNOPSIS
    QUICK_RANGE_SELECT::get_next_prefix()
    prefix_length  length of cur_prefix
7609
    cur_prefix     prefix of a key to be searched for
7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641

  DESCRIPTION
    Each subsequent call to the method retrieves the first record that has a
    prefix with length prefix_length different from cur_prefix, such that the
    record with the new prefix is within the ranges described by
    this->ranges. The record found is stored into the buffer pointed by
    this->record.
    The method is useful for GROUP-BY queries with range conditions to
    discover the prefix of the next group that satisfies the range conditions.

  TODO
    This method is a modified copy of QUICK_RANGE_SELECT::get_next(), so both
    methods should be unified into a more general one to reduce code
    duplication.

  RETURN
    0                  on success
    HA_ERR_END_OF_FILE if returned all keys
    other              if some error occurred
*/

int QUICK_RANGE_SELECT::get_next_prefix(uint prefix_length, byte *cur_prefix)
{
  DBUG_ENTER("QUICK_RANGE_SELECT::get_next_prefix");

  for (;;)
  {
    int result;
    key_range start_key, end_key;
    if (range)
    {
      /* Read the next record in the same range with prefix after cur_prefix. */
7642
      DBUG_ASSERT(cur_prefix != 0);
7643 7644 7645 7646 7647 7648
      result= file->index_read(record, cur_prefix, prefix_length,
                               HA_READ_AFTER_KEY);
      if (result || (file->compare_key(file->end_range) <= 0))
        DBUG_RETURN(result);
    }

ingo@mysql.com's avatar
ingo@mysql.com committed
7649 7650 7651 7652 7653 7654 7655 7656
    uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
    if (count == 0)
    {
      /* Ranges have already been used up before. None is left for read. */
      range= 0;
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    range= *(cur_range++);
7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685

    start_key.key=    (const byte*) range->min_key;
    start_key.length= min(range->min_length, prefix_length);
    start_key.flag=   ((range->flag & NEAR_MIN) ? HA_READ_AFTER_KEY :
		       (range->flag & EQ_RANGE) ?
		       HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT);
    end_key.key=      (const byte*) range->max_key;
    end_key.length=   min(range->max_length, prefix_length);
    /*
      We use READ_AFTER_KEY here because if we are reading on a key
      prefix we want to find all keys with this prefix
    */
    end_key.flag=     (range->flag & NEAR_MAX ? HA_READ_BEFORE_KEY :
		       HA_READ_AFTER_KEY);

    result= file->read_range_first(range->min_length ? &start_key : 0,
				   range->max_length ? &end_key : 0,
                                   test(range->flag & EQ_RANGE),
				   sorted);
    if (range->flag == (UNIQUE_RANGE | EQ_RANGE))
      range=0;				// Stop searching

    if (result != HA_ERR_END_OF_FILE)
      DBUG_RETURN(result);
    range=0;				// No matching rows; go to next range
  }
}


pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7686
/* Get next for geometrical indexes */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7687

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7688
int QUICK_RANGE_SELECT_GEOM::get_next()
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7689
{
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7690
  DBUG_ENTER("QUICK_RANGE_SELECT_GEOM::get_next");
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7691

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7692
  for (;;)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7693
  {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7694 7695
    int result;
    if (range)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7696
    {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7697 7698 7699 7700 7701
      // Already read through key
      result= file->index_next_same(record, (byte*) range->min_key,
				    range->min_length);
      if (result != HA_ERR_END_OF_FILE)
	DBUG_RETURN(result);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7702
    }
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7703

ingo@mysql.com's avatar
ingo@mysql.com committed
7704 7705 7706 7707 7708 7709 7710 7711
    uint count= ranges.elements - (cur_range - (QUICK_RANGE**) ranges.buffer);
    if (count == 0)
    {
      /* Ranges have already been used up before. None is left for read. */
      range= 0;
      DBUG_RETURN(HA_ERR_END_OF_FILE);
    }
    range= *(cur_range++);
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7712 7713 7714 7715 7716

    result= file->index_read(record,
			     (byte*) range->min_key,
			     range->min_length,
			     (ha_rkey_function)(range->flag ^ GEOM_FLAG));
7717
    if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7718 7719
      DBUG_RETURN(result);
    range=0;				// Not found, to next range
bk@work.mysql.com's avatar
bk@work.mysql.com committed
7720 7721 7722
  }
}

7723

7724 7725 7726 7727
/*
  Check if current row will be retrieved by this QUICK_RANGE_SELECT

  NOTES
7728 7729
    It is assumed that currently a scan is being done on another index
    which reads all necessary parts of the index that is scanned by this
7730
    quick select.
7731
    The implementation does a binary search on sorted array of disjoint
7732 7733
    ranges, without taking size of range into account.

7734
    This function is used to filter out clustered PK scan rows in
7735 7736
    index_merge quick select.

7737
  RETURN
monty@mysql.com's avatar
monty@mysql.com committed
7738 7739
    TRUE  if current row will be retrieved by this quick select
    FALSE if not
7740 7741 7742 7743 7744 7745 7746 7747 7748 7749
*/

bool QUICK_RANGE_SELECT::row_in_ranges()
{
  QUICK_RANGE *range;
  uint min= 0;
  uint max= ranges.elements - 1;
  uint mid= (max + min)/2;

  while (min != max)
7750
  {
7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763
    if (cmp_next(*(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid)))
    {
      /* current row value > mid->max */
      min= mid + 1;
    }
    else
      max= mid;
    mid= (min + max) / 2;
  }
  range= *(QUICK_RANGE**)dynamic_array_ptr(&ranges, mid);
  return (!cmp_next(range) && !cmp_prev(range));
}

7764
/*
7765 7766 7767 7768 7769 7770 7771
  This is a hack: we inherit from QUICK_SELECT so that we can use the
  get_next() interface, but we have to hold a pointer to the original
  QUICK_SELECT because its data are used all over the place.  What
  should be done is to factor out the data that is needed into a base
  class (QUICK_SELECT), and then have two subclasses (_ASC and _DESC)
  which handle the ranges and implement the get_next() function.  But
  for now, this seems to work right at least.
7772
 */
7773

7774
QUICK_SELECT_DESC::QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q,
7775 7776
                                     uint used_key_parts)
 : QUICK_RANGE_SELECT(*q), rev_it(rev_ranges)
7777
{
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
7778
  QUICK_RANGE *r;
7779

7780 7781
  QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
  QUICK_RANGE **last_range= pr + ranges.elements;
monty@mysql.com's avatar
monty@mysql.com committed
7782 7783
  for (; pr!=last_range; pr++)
    rev_ranges.push_front(*pr);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
7784

7785
  /* Remove EQ_RANGE flag for keys that are not using the full key */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
7786
  for (r = rev_it++; r; r = rev_it++)
7787 7788 7789 7790 7791 7792 7793 7794
  {
    if ((r->flag & EQ_RANGE) &&
	head->key_info[index].key_length != r->max_length)
      r->flag&= ~EQ_RANGE;
  }
  rev_it.rewind();
  q->dont_free=1;				// Don't free shared mem
  delete q;
7795 7796
}

7797

7798 7799 7800 7801 7802 7803
int QUICK_SELECT_DESC::get_next()
{
  DBUG_ENTER("QUICK_SELECT_DESC::get_next");

  /* The max key is handled as follows:
   *   - if there is NO_MAX_RANGE, start at the end and move backwards
7804 7805
   *   - if it is an EQ_RANGE, which means that max key covers the entire
   *     key, go directly to the key and read through it (sorting backwards is
7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817
   *     same as sorting forwards)
   *   - if it is NEAR_MAX, go to the key or next, step back once, and
   *     move backwards
   *   - otherwise (not NEAR_MAX == include the key), go after the key,
   *     step back once, and move backwards
   */

  for (;;)
  {
    int result;
    if (range)
    {						// Already read through key
7818 7819 7820
      result = ((range->flag & EQ_RANGE)
		? file->index_next_same(record, (byte*) range->min_key,
					range->min_length) :
7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835
		file->index_prev(record));
      if (!result)
      {
	if (cmp_prev(*rev_it.ref()) == 0)
	  DBUG_RETURN(0);
      }
      else if (result != HA_ERR_END_OF_FILE)
	DBUG_RETURN(result);
    }

    if (!(range=rev_it++))
      DBUG_RETURN(HA_ERR_END_OF_FILE);		// All ranges used

    if (range->flag & NO_MAX_RANGE)		// Read last record
    {
7836 7837 7838
      int local_error;
      if ((local_error=file->index_last(record)))
	DBUG_RETURN(local_error);		// Empty table
7839 7840 7841 7842 7843 7844
      if (cmp_prev(range) == 0)
	DBUG_RETURN(0);
      range=0;			// No matching records; go to next range
      continue;
    }

7845
    if (range->flag & EQ_RANGE)
7846 7847 7848 7849 7850 7851
    {
      result = file->index_read(record, (byte*) range->max_key,
				range->max_length, HA_READ_KEY_EXACT);
    }
    else
    {
7852 7853 7854 7855 7856
      DBUG_ASSERT(range->flag & NEAR_MAX || range_reads_after_key(range));
      result=file->index_read(record, (byte*) range->max_key,
			      range->max_length,
			      ((range->flag & NEAR_MAX) ?
			       HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV));
7857 7858 7859
    }
    if (result)
    {
7860
      if (result != HA_ERR_KEY_NOT_FOUND && result != HA_ERR_END_OF_FILE)
7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874
	DBUG_RETURN(result);
      range=0;					// Not found, to next range
      continue;
    }
    if (cmp_prev(range) == 0)
    {
      if (range->flag == (UNIQUE_RANGE | EQ_RANGE))
	range = 0;				// Stop searching
      DBUG_RETURN(0);				// Found key is in range
    }
    range = 0;					// To next range
  }
}

7875

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916
/*
  Compare if found key is over max-value
  Returns 0 if key <= range->max_key
*/

int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
{
  if (range_arg->flag & NO_MAX_RANGE)
    return 0;                                   /* key can't be to large */

  KEY_PART *key_part=key_parts;
  uint store_length;

  for (char *key=range_arg->max_key, *end=key+range_arg->max_length;
       key < end;
       key+= store_length, key_part++)
  {
    int cmp;
    store_length= key_part->store_length;
    if (key_part->null_bit)
    {
      if (*key)
      {
        if (!key_part->field->is_null())
          return 1;
        continue;
      }
      else if (key_part->field->is_null())
        return 0;
      key++;					// Skip null byte
      store_length--;
    }
    if ((cmp=key_part->field->key_cmp((byte*) key, key_part->length)) < 0)
      return 0;
    if (cmp > 0)
      return 1;
  }
  return (range_arg->flag & NEAR_MAX) ? 1 : 0;          // Exact match
}


7917
/*
7918 7919 7920
  Returns 0 if found key is inside range (found key >= range->min_key).
*/

7921
int QUICK_RANGE_SELECT::cmp_prev(QUICK_RANGE *range_arg)
7922
{
7923
  int cmp;
7924
  if (range_arg->flag & NO_MIN_RANGE)
7925
    return 0;					/* key can't be to small */
7926

monty@mysql.com's avatar
monty@mysql.com committed
7927 7928
  cmp= key_cmp(key_part_info, (byte*) range_arg->min_key,
               range_arg->min_length);
7929 7930 7931
  if (cmp > 0 || cmp == 0 && !(range_arg->flag & NEAR_MIN))
    return 0;
  return 1;                                     // outside of range
7932 7933
}

7934

7935
/*
monty@mysql.com's avatar
monty@mysql.com committed
7936
 * TRUE if this range will require using HA_READ_AFTER_KEY
7937
   See comment in get_next() about this
7938
 */
7939

7940
bool QUICK_SELECT_DESC::range_reads_after_key(QUICK_RANGE *range_arg)
7941
{
7942
  return ((range_arg->flag & (NO_MAX_RANGE | NEAR_MAX)) ||
7943
	  !(range_arg->flag & EQ_RANGE) ||
7944
	  head->key_info[index].key_length != range_arg->max_length) ? 1 : 0;
7945 7946
}

7947

monty@mysql.com's avatar
monty@mysql.com committed
7948
/* TRUE if we are reading over a key that may have a NULL value */
7949

7950
#ifdef NOT_USED
7951
bool QUICK_SELECT_DESC::test_if_null_range(QUICK_RANGE *range_arg,
7952 7953
					   uint used_key_parts)
{
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7954
  uint offset, end;
7955 7956 7957
  KEY_PART *key_part = key_parts,
           *key_part_end= key_part+used_key_parts;

7958
  for (offset= 0,  end = min(range_arg->min_length, range_arg->max_length) ;
7959
       offset < end && key_part != key_part_end ;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7960
       offset+= key_part++->store_length)
7961
  {
7962 7963
    if (!memcmp((char*) range_arg->min_key+offset,
		(char*) range_arg->max_key+offset,
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7964
		key_part->store_length))
7965
      continue;
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
7966 7967

    if (key_part->null_bit && range_arg->min_key[offset])
7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979
      return 1;				// min_key is null and max_key isn't
    // Range doesn't cover NULL. This is ok if there is no more null parts
    break;
  }
  /*
    If the next min_range is > NULL, then we can use this, even if
    it's a NULL key
    Example:  SELECT * FROM t1 WHERE a = 2 AND b >0 ORDER BY a DESC,b DESC;

  */
  if (key_part != key_part_end && key_part->null_bit)
  {
7980
    if (offset >= range_arg->min_length || range_arg->min_key[offset])
7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992
      return 1;					// Could be null
    key_part++;
  }
  /*
    If any of the key parts used in the ORDER BY could be NULL, we can't
    use the key to sort the data.
  */
  for (; key_part != key_part_end ; key_part++)
    if (key_part->null_bit)
      return 1;					// Covers null part
  return 0;
}
7993
#endif
7994 7995


7996 7997 7998 7999 8000 8001 8002 8003 8004
void QUICK_RANGE_SELECT::add_info_string(String *str)
{
  KEY *key_info= head->key_info + index;
  str->append(key_info->name);
}

void QUICK_INDEX_MERGE_SELECT::add_info_string(String *str)
{
  QUICK_RANGE_SELECT *quick;
monty@mysql.com's avatar
monty@mysql.com committed
8005
  bool first= TRUE;
8006
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
8007
  str->append(STRING_WITH_LEN("sort_union("));
8008 8009 8010 8011 8012
  while ((quick= it++))
  {
    if (!first)
      str->append(',');
    else
monty@mysql.com's avatar
monty@mysql.com committed
8013
      first= FALSE;
8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025
    quick->add_info_string(str);
  }
  if (pk_quick_select)
  {
    str->append(',');
    pk_quick_select->add_info_string(str);
  }
  str->append(')');
}

void QUICK_ROR_INTERSECT_SELECT::add_info_string(String *str)
{
8026
  bool first= TRUE;
8027 8028
  QUICK_RANGE_SELECT *quick;
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
8029
  str->append(STRING_WITH_LEN("intersect("));
8030 8031 8032 8033 8034
  while ((quick= it++))
  {
    KEY *key_info= head->key_info + quick->index;
    if (!first)
      str->append(',');
8035
    else
monty@mysql.com's avatar
monty@mysql.com committed
8036
      first= FALSE;
8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049
    str->append(key_info->name);
  }
  if (cpk_quick)
  {
    KEY *key_info= head->key_info + cpk_quick->index;
    str->append(',');
    str->append(key_info->name);
  }
  str->append(')');
}

void QUICK_ROR_UNION_SELECT::add_info_string(String *str)
{
8050
  bool first= TRUE;
8051 8052
  QUICK_SELECT_I *quick;
  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
8053
  str->append(STRING_WITH_LEN("union("));
8054 8055 8056 8057 8058
  while ((quick= it++))
  {
    if (!first)
      str->append(',');
    else
monty@mysql.com's avatar
monty@mysql.com committed
8059
      first= FALSE;
8060 8061 8062 8063 8064 8065
    quick->add_info_string(str);
  }
  str->append(')');
}


8066
void QUICK_RANGE_SELECT::add_keys_and_lengths(String *key_names,
8067
                                              String *used_lengths)
8068 8069 8070 8071 8072 8073 8074 8075 8076
{
  char buf[64];
  uint length;
  KEY *key_info= head->key_info + index;
  key_names->append(key_info->name);
  length= longlong2str(max_used_key_length, buf, 10) - buf;
  used_lengths->append(buf, length);
}

8077 8078
void QUICK_INDEX_MERGE_SELECT::add_keys_and_lengths(String *key_names,
                                                    String *used_lengths)
8079 8080 8081
{
  char buf[64];
  uint length;
monty@mysql.com's avatar
monty@mysql.com committed
8082
  bool first= TRUE;
8083
  QUICK_RANGE_SELECT *quick;
8084

8085 8086 8087
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  while ((quick= it++))
  {
8088
    if (first)
monty@mysql.com's avatar
monty@mysql.com committed
8089
      first= FALSE;
8090 8091
    else
    {
8092 8093
      key_names->append(',');
      used_lengths->append(',');
8094
    }
8095

8096 8097
    KEY *key_info= head->key_info + quick->index;
    key_names->append(key_info->name);
8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111
    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
    used_lengths->append(buf, length);
  }
  if (pk_quick_select)
  {
    KEY *key_info= head->key_info + pk_quick_select->index;
    key_names->append(',');
    key_names->append(key_info->name);
    length= longlong2str(pk_quick_select->max_used_key_length, buf, 10) - buf;
    used_lengths->append(',');
    used_lengths->append(buf, length);
  }
}

8112 8113
void QUICK_ROR_INTERSECT_SELECT::add_keys_and_lengths(String *key_names,
                                                      String *used_lengths)
8114 8115 8116
{
  char buf[64];
  uint length;
8117
  bool first= TRUE;
8118 8119 8120 8121 8122 8123
  QUICK_RANGE_SELECT *quick;
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  while ((quick= it++))
  {
    KEY *key_info= head->key_info + quick->index;
    if (first)
monty@mysql.com's avatar
monty@mysql.com committed
8124
      first= FALSE;
8125
    else
8126 8127
    {
      key_names->append(',');
8128
      used_lengths->append(',');
8129 8130
    }
    key_names->append(key_info->name);
8131 8132 8133
    length= longlong2str(quick->max_used_key_length, buf, 10) - buf;
    used_lengths->append(buf, length);
  }
8134

8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145
  if (cpk_quick)
  {
    KEY *key_info= head->key_info + cpk_quick->index;
    key_names->append(',');
    key_names->append(key_info->name);
    length= longlong2str(cpk_quick->max_used_key_length, buf, 10) - buf;
    used_lengths->append(',');
    used_lengths->append(buf, length);
  }
}

8146 8147
void QUICK_ROR_UNION_SELECT::add_keys_and_lengths(String *key_names,
                                                  String *used_lengths)
8148
{
8149
  bool first= TRUE;
8150 8151 8152 8153 8154
  QUICK_SELECT_I *quick;
  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
  while ((quick= it++))
  {
    if (first)
monty@mysql.com's avatar
monty@mysql.com committed
8155
      first= FALSE;
8156
    else
8157
    {
8158 8159 8160
      used_lengths->append(',');
      key_names->append(',');
    }
8161
    quick->add_keys_and_lengths(key_names, used_lengths);
8162 8163 8164
  }
}

8165 8166 8167 8168 8169 8170 8171 8172 8173

/*******************************************************************************
* Implementation of QUICK_GROUP_MIN_MAX_SELECT
*******************************************************************************/

static inline uint get_field_keypart(KEY *index, Field *field);
static inline SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree,
                                             PARAM *param, uint *param_idx);
static bool
8174
get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
8175
                       KEY_PART_INFO *first_non_group_part,
8176 8177 8178 8179
                       KEY_PART_INFO *min_max_arg_part,
                       KEY_PART_INFO *last_part, THD *thd,
                       byte *key_infix, uint *key_infix_len,
                       KEY_PART_INFO **first_non_infix_part);
8180
static bool
8181 8182
check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
                               Field::imagetype image_type);
8183

8184 8185 8186 8187 8188 8189
static void
cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
                   uint group_key_parts, SEL_TREE *range_tree,
                   SEL_ARG *index_tree, ha_rows quick_prefix_records,
                   bool have_min, bool have_max,
                   double *read_cost, ha_rows *records);
8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215

/*
  Test if this access method is applicable to a GROUP query with MIN/MAX
  functions, and if so, construct a new TRP object.

  SYNOPSIS
    get_best_group_min_max()
    param    Parameter from test_quick_select
    sel_tree Range tree generated by get_mm_tree

  DESCRIPTION
    Test whether a query can be computed via a QUICK_GROUP_MIN_MAX_SELECT.
    Queries computable via a QUICK_GROUP_MIN_MAX_SELECT must satisfy the
    following conditions:
    A) Table T has at least one compound index I of the form:
       I = <A_1, ...,A_k, [B_1,..., B_m], C, [D_1,...,D_n]>
    B) Query conditions:
    B0. Q is over a single table T.
    B1. The attributes referenced by Q are a subset of the attributes of I.
    B2. All attributes QA in Q can be divided into 3 overlapping groups:
        - SA = {S_1, ..., S_l, [C]} - from the SELECT clause, where C is
          referenced by any number of MIN and/or MAX functions if present.
        - WA = {W_1, ..., W_p} - from the WHERE clause
        - GA = <G_1, ..., G_k> - from the GROUP BY clause (if any)
             = SA              - if Q is a DISTINCT query (based on the
                                 equivalence of DISTINCT and GROUP queries.
monty@mysql.com's avatar
monty@mysql.com committed
8216 8217
        - NGA = QA - (GA union C) = {NG_1, ..., NG_m} - the ones not in
          GROUP BY and not referenced by MIN/MAX functions.
8218
        with the following properties specified below.
8219 8220
    B3. If Q has a GROUP BY WITH ROLLUP clause the access method is not 
        applicable.
8221 8222 8223 8224 8225 8226 8227 8228 8229 8230

    SA1. There is at most one attribute in SA referenced by any number of
         MIN and/or MAX functions which, which if present, is denoted as C.
    SA2. The position of the C attribute in the index is after the last A_k.
    SA3. The attribute C can be referenced in the WHERE clause only in
         predicates of the forms:
         - (C {< | <= | > | >= | =} const)
         - (const {< | <= | > | >= | =} C)
         - (C between const_i and const_j)
         - C IS NULL
8231 8232
         - C IS NOT NULL
         - C != const
8233 8234 8235
    SA4. If Q has a GROUP BY clause, there are no other aggregate functions
         except MIN and MAX. For queries with DISTINCT, aggregate functions
         are allowed.
8236
    SA5. The select list in DISTINCT queries should not contain expressions.
8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265
    GA1. If Q has a GROUP BY clause, then GA is a prefix of I. That is, if
         G_i = A_j => i = j.
    GA2. If Q has a DISTINCT clause, then there is a permutation of SA that
         forms a prefix of I. This permutation is used as the GROUP clause
         when the DISTINCT query is converted to a GROUP query.
    GA3. The attributes in GA may participate in arbitrary predicates, divided
         into two groups:
         - RNG(G_1,...,G_q ; where q <= k) is a range condition over the
           attributes of a prefix of GA
         - PA(G_i1,...G_iq) is an arbitrary predicate over an arbitrary subset
           of GA. Since P is applied to only GROUP attributes it filters some
           groups, and thus can be applied after the grouping.
    GA4. There are no expressions among G_i, just direct column references.
    NGA1.If in the index I there is a gap between the last GROUP attribute G_k,
         and the MIN/MAX attribute C, then NGA must consist of exactly the index
         attributes that constitute the gap. As a result there is a permutation
         of NGA that coincides with the gap in the index <B_1, ..., B_m>.
    NGA2.If BA <> {}, then the WHERE clause must contain a conjunction EQ of
         equality conditions for all NG_i of the form (NG_i = const) or
         (const = NG_i), such that each NG_i is referenced in exactly one
         conjunct. Informally, the predicates provide constants to fill the
         gap in the index.
    WA1. There are no other attributes in the WHERE clause except the ones
         referenced in predicates RNG, PA, PC, EQ defined above. Therefore
         WA is subset of (GA union NGA union C) for GA,NGA,C that pass the above
         tests. By transitivity then it also follows that each WA_i participates
         in the index I (if this was already tested for GA, NGA and C).

    C) Overall query form:
8266 8267 8268 8269
       SELECT EXPR([A_1,...,A_k], [B_1,...,B_m], [MIN(C)], [MAX(C)])
         FROM T
        WHERE [RNG(A_1,...,A_p ; where p <= k)]
         [AND EQ(B_1,...,B_m)]
8270 8271
         [AND PC(C)]
         [AND PA(A_i1,...,A_iq)]
8272 8273 8274 8275
       GROUP BY A_1,...,A_k
       [HAVING PH(A_1, ..., B_1,..., C)]
    where EXPR(...) is an arbitrary expression over some or all SELECT fields,
    or:
8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304
       SELECT DISTINCT A_i1,...,A_ik
         FROM T
        WHERE [RNG(A_1,...,A_p ; where p <= k)]
         [AND PA(A_i1,...,A_iq)];

  NOTES
    If the current query satisfies the conditions above, and if
    (mem_root! = NULL), then the function constructs and returns a new TRP
    object, that is later used to construct a new QUICK_GROUP_MIN_MAX_SELECT.
    If (mem_root == NULL), then the function only tests whether the current
    query satisfies the conditions above, and, if so, sets
    is_applicable = TRUE.

    Queries with DISTINCT for which index access can be used are transformed
    into equivalent group-by queries of the form:

    SELECT A_1,...,A_k FROM T
     WHERE [RNG(A_1,...,A_p ; where p <= k)]
      [AND PA(A_i1,...,A_iq)]
    GROUP BY A_1,...,A_k;

    The group-by list is a permutation of the select attributes, according
    to their order in the index.

  TODO
  - What happens if the query groups by the MIN/MAX field, and there is no
    other field as in: "select min(a) from t1 group by a" ?
  - We assume that the general correctness of the GROUP-BY query was checked
    before this point. Is this correct, or do we have to check it completely?
8305 8306
  - Lift the limitation in condition (B3), that is, make this access method 
    applicable to ROLLUP queries.
8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345

  RETURN
    If mem_root != NULL
    - valid TRP_GROUP_MIN_MAX object if this QUICK class can be used for
      the query
    -  NULL o/w.
    If mem_root == NULL
    - NULL
*/

static TRP_GROUP_MIN_MAX *
get_best_group_min_max(PARAM *param, SEL_TREE *tree)
{
  THD *thd= param->thd;
  JOIN *join= thd->lex->select_lex.join;
  TABLE *table= param->table;
  bool have_min= FALSE;              /* TRUE if there is a MIN function. */
  bool have_max= FALSE;              /* TRUE if there is a MAX function. */
  Item_field *min_max_arg_item= NULL;/* The argument of all MIN/MAX functions.*/
  KEY_PART_INFO *min_max_arg_part= NULL; /* The corresponding keypart. */
  uint group_prefix_len= 0; /* Length (in bytes) of the key prefix. */
  KEY *index_info= NULL;    /* The index chosen for data access. */
  uint index= 0;            /* The id of the chosen index. */
  uint group_key_parts= 0;  /* Number of index key parts in the group prefix. */
  uint used_key_parts= 0;   /* Number of index key parts used for access. */
  byte key_infix[MAX_KEY_LENGTH]; /* Constants from equality predicates.*/
  uint key_infix_len= 0;          /* Length of key_infix. */
  TRP_GROUP_MIN_MAX *read_plan= NULL; /* The eventually constructed TRP. */
  uint key_part_nr;
  ORDER *tmp_group;
  Item *item;
  Item_field *item_field;
  DBUG_ENTER("get_best_group_min_max");

  /* Perform few 'cheap' tests whether this access method is applicable. */
  if (!join || (thd->lex->sql_command != SQLCOM_SELECT))
    DBUG_RETURN(NULL);        /* This is not a select statement. */
  if ((join->tables != 1) ||  /* The query must reference one table. */
      ((!join->group_list) && /* Neither GROUP BY nor a DISTINCT query. */
8346 8347
       (!join->select_distinct)) ||
      (thd->lex->select_lex.olap == ROLLUP_TYPE)) /* Check (B3) for ROLLUP */
8348
    DBUG_RETURN(NULL);
8349
  if (table->s->keys == 0)        /* There are no indexes to use. */
8350 8351 8352
    DBUG_RETURN(NULL);

  /* Analyze the query in more detail. */
8353
  List_iterator<Item> select_items_it(join->fields_list);
8354

8355
  /* Check (SA1,SA4) and store the only MIN/MAX argument - the C attribute.*/
monty@mishka.local's avatar
monty@mishka.local committed
8356
  if (join->make_sum_func_list(join->all_fields, join->fields_list, 1))
8357 8358
    DBUG_RETURN(NULL);
  if (join->sum_funcs[0])
8359
  {
8360 8361 8362
    Item_sum *min_max_item;
    Item_sum **func_ptr= join->sum_funcs;
    while ((min_max_item= *(func_ptr++)))
8363
    {
8364 8365 8366 8367 8368
      if (min_max_item->sum_func() == Item_sum::MIN_FUNC)
        have_min= TRUE;
      else if (min_max_item->sum_func() == Item_sum::MAX_FUNC)
        have_max= TRUE;
      else
8369 8370
        DBUG_RETURN(NULL);

8371 8372
      Item *expr= min_max_item->args[0];    /* The argument of MIN/MAX. */
      if (expr->type() == Item::FIELD_ITEM) /* Is it an attribute? */
8373
      {
8374 8375 8376 8377
        if (! min_max_arg_item)
          min_max_arg_item= (Item_field*) expr;
        else if (! min_max_arg_item->eq(expr, 1))
          DBUG_RETURN(NULL);
8378
      }
8379 8380
      else
        DBUG_RETURN(NULL);
8381
    }
8382
  }
8383

8384 8385 8386 8387
  /* Check (SA5). */
  if (join->select_distinct)
  {
    while ((item= select_items_it++))
8388
    {
8389 8390
      if (item->type() != Item::FIELD_ITEM)
        DBUG_RETURN(NULL);
8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406
    }
  }

  /* Check (GA4) - that there are no expressions among the group attributes. */
  for (tmp_group= join->group_list; tmp_group; tmp_group= tmp_group->next)
  {
    if ((*tmp_group->item)->type() != Item::FIELD_ITEM)
      DBUG_RETURN(NULL);
  }

  /*
    Check that table has at least one compound index such that the conditions
    (GA1,GA2) are all TRUE. If there is more than one such index, select the
    first one. Here we set the variables: group_prefix_len and index_info.
  */
  KEY *cur_index_info= table->key_info;
8407
  KEY *cur_index_info_end= cur_index_info + table->s->keys;
8408
  KEY_PART_INFO *cur_part= NULL;
8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426
  KEY_PART_INFO *end_part; /* Last part for loops. */
  /* Last index part. */
  KEY_PART_INFO *last_part= NULL;
  KEY_PART_INFO *first_non_group_part= NULL;
  KEY_PART_INFO *first_non_infix_part= NULL;
  uint key_infix_parts= 0;
  uint cur_group_key_parts= 0;
  uint cur_group_prefix_len= 0;
  /* Cost-related variables for the best index so far. */
  double best_read_cost= DBL_MAX;
  ha_rows best_records= 0;
  SEL_ARG *best_index_tree= NULL;
  ha_rows best_quick_prefix_records= 0;
  uint best_param_idx= 0;
  double cur_read_cost= DBL_MAX;
  ha_rows cur_records;
  SEL_ARG *cur_index_tree= NULL;
  ha_rows cur_quick_prefix_records= 0;
8427
  uint cur_param_idx=MAX_KEY;
timour@mysql.com's avatar
timour@mysql.com committed
8428
  key_map cur_used_key_parts;
timour@mysql.com's avatar
timour@mysql.com committed
8429
  uint pk= param->table->s->primary_key;
8430 8431 8432 8433 8434 8435 8436

  for (uint cur_index= 0 ; cur_index_info != cur_index_info_end ;
       cur_index_info++, cur_index++)
  {
    /* Check (B1) - if current index is covering. */
    if (!table->used_keys.is_set(cur_index))
      goto next_index;
8437

timour@mysql.com's avatar
timour@mysql.com committed
8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461
    /*
      If the current storage manager is such that it appends the primary key to
      each index, then the above condition is insufficient to check if the
      index is covering. In such cases it may happen that some fields are
      covered by the PK index, but not by the current index. Since we can't
      use the concatenation of both indexes for index lookup, such an index
      does not qualify as covering in our case. If this is the case, below
      we check that all query fields are indeed covered by 'cur_index'.
    */
    if (pk < MAX_KEY && cur_index != pk &&
        (table->file->table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX))
    {
      /* For each table field */
      for (uint i= 0; i < table->s->fields; i++)
      {
        Field *cur_field= table->field[i];
        /*
          If the field is used in the current query, check that the
          field is covered by some keypart of the current index.
        */
        if (thd->query_id == cur_field->query_id)
        {
          KEY_PART_INFO *key_part= cur_index_info->key_part;
          KEY_PART_INFO *key_part_end= key_part + cur_index_info->key_parts;
8462
          for (;;)
timour@mysql.com's avatar
timour@mysql.com committed
8463 8464 8465
          {
            if (key_part->field == cur_field)
              break;
8466 8467
            if (++key_part == key_part_end)
              goto next_index;                  // Field was not part of key
timour@mysql.com's avatar
timour@mysql.com committed
8468 8469 8470 8471 8472
          }
        }
      }
    }

8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493
    /*
      Check (GA1) for GROUP BY queries.
    */
    if (join->group_list)
    {
      cur_part= cur_index_info->key_part;
      end_part= cur_part + cur_index_info->key_parts;
      /* Iterate in parallel over the GROUP list and the index parts. */
      for (tmp_group= join->group_list; tmp_group && (cur_part != end_part);
           tmp_group= tmp_group->next, cur_part++)
      {
        /*
          TODO:
          tmp_group::item is an array of Item, is it OK to consider only the
          first Item? If so, then why? What is the array for?
        */
        /* Above we already checked that all group items are fields. */
        DBUG_ASSERT((*tmp_group->item)->type() == Item::FIELD_ITEM);
        Item_field *group_field= (Item_field *) (*tmp_group->item);
        if (group_field->field->eq(cur_part->field))
        {
8494 8495
          cur_group_prefix_len+= cur_part->store_length;
          ++cur_group_key_parts;
8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510
        }
        else
          goto next_index;
      }
    }
    /*
      Check (GA2) if this is a DISTINCT query.
      If GA2, then Store a new ORDER object in group_fields_array at the
      position of the key part of item_field->field. Thus we get the ORDER
      objects for each field ordered as the corresponding key parts.
      Later group_fields_array of ORDER objects is used to convert the query
      to a GROUP query.
    */
    else if (join->select_distinct)
    {
8511
      select_items_it.rewind();
timour@mysql.com's avatar
timour@mysql.com committed
8512
      cur_used_key_parts.clear_all();
8513
      uint max_key_part= 0;
8514
      while ((item= select_items_it++))
8515
      {
8516
        item_field= (Item_field*) item; /* (SA5) already checked above. */
8517 8518
        /* Find the order of the key part in the index. */
        key_part_nr= get_field_keypart(cur_index_info, item_field->field);
timour@mysql.com's avatar
timour@mysql.com committed
8519 8520 8521 8522 8523 8524
        /*
          Check if this attribute was already present in the select list.
          If it was present, then its corresponding key part was alredy used.
        */
        if (cur_used_key_parts.is_set(key_part_nr))
          continue;
8525
        if (key_part_nr < 1 || key_part_nr > join->fields_list.elements)
8526 8527
          goto next_index;
        cur_part= cur_index_info->key_part + key_part_nr - 1;
8528
        cur_group_prefix_len+= cur_part->store_length;
timour@mysql.com's avatar
timour@mysql.com committed
8529 8530
        cur_used_key_parts.set_bit(key_part_nr);
        ++cur_group_key_parts;
8531
        max_key_part= max(max_key_part,key_part_nr);
8532
      }
8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543
      /*
        Check that used key parts forms a prefix of the index.
        To check this we compare bits in all_parts and cur_parts.
        all_parts have all bits set from 0 to (max_key_part-1).
        cur_parts have bits set for only used keyparts.
      */
      ulonglong all_parts, cur_parts;
      all_parts= (1<<max_key_part) - 1;
      cur_parts= cur_used_key_parts.to_ulonglong() >> 1;
      if (all_parts != cur_parts)
        goto next_index;
8544 8545 8546 8547 8548 8549 8550 8551
    }
    else
      DBUG_ASSERT(FALSE);

    /* Check (SA2). */
    if (min_max_arg_item)
    {
      key_part_nr= get_field_keypart(cur_index_info, min_max_arg_item->field);
8552
      if (key_part_nr <= cur_group_key_parts)
8553 8554 8555 8556 8557 8558 8559 8560
        goto next_index;
      min_max_arg_part= cur_index_info->key_part + key_part_nr - 1;
    }

    /*
      Check (NGA1, NGA2) and extract a sequence of constants to be used as part
      of all search keys.
    */
8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582

    /*
      If there is MIN/MAX, each keypart between the last group part and the
      MIN/MAX part must participate in one equality with constants, and all
      keyparts after the MIN/MAX part must not be referenced in the query.

      If there is no MIN/MAX, the keyparts after the last group part can be
      referenced only in equalities with constants, and the referenced keyparts
      must form a sequence without any gaps that starts immediately after the
      last group keypart.
    */
    last_part= cur_index_info->key_part + cur_index_info->key_parts;
    first_non_group_part= (cur_group_key_parts < cur_index_info->key_parts) ?
                          cur_index_info->key_part + cur_group_key_parts :
                          NULL;
    first_non_infix_part= min_max_arg_part ?
                          (min_max_arg_part < last_part) ?
                             min_max_arg_part + 1 :
                             NULL :
                           NULL;
    if (first_non_group_part &&
        (!min_max_arg_part || (min_max_arg_part - first_non_group_part > 0)))
8583
    {
8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600
      if (tree)
      {
        uint dummy;
        SEL_ARG *index_range_tree= get_index_range_tree(cur_index, tree, param,
                                                        &dummy);
        if (!get_constant_key_infix(cur_index_info, index_range_tree,
                                    first_non_group_part, min_max_arg_part,
                                    last_part, thd, key_infix, &key_infix_len,
                                    &first_non_infix_part))
          goto next_index;
      }
      else if (min_max_arg_part &&
               (min_max_arg_part - first_non_group_part > 0))
        /*
          There is a gap but no range tree, thus no predicates at all for the
          non-group keyparts.
        */
8601 8602 8603
        goto next_index;
    }

8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616
    /*
      Test (WA1) partially - that no other keypart after the last infix part is
      referenced in the query.
    */
    if (first_non_infix_part)
    {
      for (cur_part= first_non_infix_part; cur_part != last_part; cur_part++)
      {
        if (cur_part->field->query_id == thd->query_id)
          goto next_index;
      }
    }

8617
    /* If we got to this point, cur_index_info passes the test. */
8618 8619 8620
    key_infix_parts= key_infix_len ?
                     (first_non_infix_part - first_non_group_part) : 0;
    used_key_parts= cur_group_key_parts + key_infix_parts;
8621

8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635
    /* Compute the cost of using this index. */
    if (tree)
    {
      /* Find the SEL_ARG sub-tree that corresponds to the chosen index. */
      cur_index_tree= get_index_range_tree(cur_index, tree, param,
                                           &cur_param_idx);
      /* Check if this range tree can be used for prefix retrieval. */
      cur_quick_prefix_records= check_quick_select(param, cur_param_idx,
                                                    cur_index_tree);
    }
    cost_group_min_max(table, cur_index_info, used_key_parts,
                       cur_group_key_parts, tree, cur_index_tree,
                       cur_quick_prefix_records, have_min, have_max,
                       &cur_read_cost, &cur_records);
timour@mysql.com's avatar
timour@mysql.com committed
8636 8637 8638 8639 8640 8641
    /*
      If cur_read_cost is lower than best_read_cost use cur_index.
      Do not compare doubles directly because they may have different
      representations (64 vs. 80 bits).
    */
    if (cur_read_cost < best_read_cost - (DBL_EPSILON * cur_read_cost))
8642
    {
8643
      DBUG_ASSERT(tree != 0 || cur_param_idx == MAX_KEY);
8644 8645 8646 8647 8648 8649 8650 8651 8652 8653
      index_info= cur_index_info;
      index= cur_index;
      best_read_cost= cur_read_cost;
      best_records= cur_records;
      best_index_tree= cur_index_tree;
      best_quick_prefix_records= cur_quick_prefix_records;
      best_param_idx= cur_param_idx;
      group_key_parts= cur_group_key_parts;
      group_prefix_len= cur_group_prefix_len;
    }
8654 8655

  next_index:
8656 8657
    cur_group_key_parts= 0;
    cur_group_prefix_len= 0;
8658 8659 8660 8661
  }
  if (!index_info) /* No usable index found. */
    DBUG_RETURN(NULL);

8662 8663 8664
  /* Check (SA3) for the where clause. */
  if (join->conds && min_max_arg_item &&
      !check_group_min_max_predicates(join->conds, min_max_arg_item,
8665 8666
                                      (index_info->flags & HA_SPATIAL) ?
                                      Field::itMBR : Field::itRAW))
8667 8668 8669 8670
    DBUG_RETURN(NULL);

  /* The query passes all tests, so construct a new TRP object. */
  read_plan= new (param->mem_root)
8671 8672 8673 8674
                 TRP_GROUP_MIN_MAX(have_min, have_max, min_max_arg_part,
                                   group_prefix_len, used_key_parts,
                                   group_key_parts, index_info, index,
                                   key_infix_len,
8675
                                   (key_infix_len > 0) ? key_infix : NULL,
8676
                                   tree, best_index_tree, best_param_idx,
8677
                                   best_quick_prefix_records);
8678 8679 8680 8681 8682
  if (read_plan)
  {
    if (tree && read_plan->quick_prefix_records == 0)
      DBUG_RETURN(NULL);

8683 8684 8685
    read_plan->read_cost= best_read_cost;
    read_plan->records=   best_records;

8686 8687 8688 8689 8690 8691 8692 8693 8694 8695
    DBUG_PRINT("info",
               ("Returning group min/max plan: cost: %g, records: %lu",
                read_plan->read_cost, (ulong) read_plan->records));
  }

  DBUG_RETURN(read_plan);
}


/*
8696 8697
  Check that the MIN/MAX attribute participates only in range predicates
  with constants.
8698 8699 8700 8701 8702 8703

  SYNOPSIS
    check_group_min_max_predicates()
    cond              tree (or subtree) describing all or part of the WHERE
                      clause being analyzed
    min_max_arg_item  the field referenced by the MIN/MAX function(s)
8704
    min_max_arg_part  the keypart of the MIN/MAX argument if any
8705 8706 8707

  DESCRIPTION
    The function walks recursively over the cond tree representing a WHERE
8708
    clause, and checks condition (SA3) - if a field is referenced by a MIN/MAX
8709 8710
    aggregate function, it is referenced only by one of the following
    predicates: {=, !=, <, <=, >, >=, between, is null, is not null}.
8711 8712 8713 8714 8715 8716 8717

  RETURN
    TRUE  if cond passes the test
    FALSE o/w
*/

static bool
8718 8719
check_group_min_max_predicates(COND *cond, Item_field *min_max_arg_item,
                               Field::imagetype image_type)
8720 8721
{
  DBUG_ENTER("check_group_min_max_predicates");
8722
  DBUG_ASSERT(cond && min_max_arg_item);
8723 8724 8725 8726 8727 8728 8729 8730 8731

  Item::Type cond_type= cond->type();
  if (cond_type == Item::COND_ITEM) /* 'AND' or 'OR' */
  {
    DBUG_PRINT("info", ("Analyzing: %s", ((Item_func*) cond)->func_name()));
    List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
    Item *and_or_arg;
    while ((and_or_arg= li++))
    {
monty@mishka.local's avatar
monty@mishka.local committed
8732
      if (!check_group_min_max_predicates(and_or_arg, min_max_arg_item,
8733
                                         image_type))
8734 8735 8736 8737 8738
        DBUG_RETURN(FALSE);
    }
    DBUG_RETURN(TRUE);
  }

8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751
  /*
    TODO:
    This is a very crude fix to handle sub-selects in the WHERE clause
    (Item_subselect objects). With the test below we rule out from the
    optimization all queries with subselects in the WHERE clause. What has to
    be done, is that here we should analyze whether the subselect references
    the MIN/MAX argument field, and disallow the optimization only if this is
    so.
  */
  if (cond_type == Item::SUBSELECT_ITEM)
    DBUG_RETURN(FALSE);
  
  /* We presume that at this point there are no other Items than functions. */
8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764
  DBUG_ASSERT(cond_type == Item::FUNC_ITEM);

  /* Test if cond references only group-by or non-group fields. */
  Item_func *pred= (Item_func*) cond;
  Item **arguments= pred->arguments();
  Item *cur_arg;
  DBUG_PRINT("info", ("Analyzing: %s", pred->func_name()));
  for (uint arg_idx= 0; arg_idx < pred->argument_count (); arg_idx++)
  {
    cur_arg= arguments[arg_idx];
    DBUG_PRINT("info", ("cur_arg: %s", cur_arg->full_name()));
    if (cur_arg->type() == Item::FIELD_ITEM)
    {
8765
      if (min_max_arg_item->eq(cur_arg, 1)) 
8766 8767 8768
      {
       /*
         If pred references the MIN/MAX argument, check whether pred is a range
8769
         condition that compares the MIN/MAX argument with a constant.
8770 8771
       */
        Item_func::Functype pred_type= pred->functype();
8772 8773 8774 8775 8776 8777 8778 8779 8780 8781
        if (pred_type != Item_func::EQUAL_FUNC     &&
            pred_type != Item_func::LT_FUNC        &&
            pred_type != Item_func::LE_FUNC        &&
            pred_type != Item_func::GT_FUNC        &&
            pred_type != Item_func::GE_FUNC        &&
            pred_type != Item_func::BETWEEN        &&
            pred_type != Item_func::ISNULL_FUNC    &&
            pred_type != Item_func::ISNOTNULL_FUNC &&
            pred_type != Item_func::EQ_FUNC        &&
            pred_type != Item_func::NE_FUNC)
8782 8783 8784 8785
          DBUG_RETURN(FALSE);

        /* Check that pred compares min_max_arg_item with a constant. */
        Item *args[3];
8786
        bzero(args, 3 * sizeof(Item*));
8787 8788 8789 8790
        bool inv;
        /* Test if this is a comparison of a field and a constant. */
        if (!simple_pred(pred, args, &inv))
          DBUG_RETURN(FALSE);
8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809

        /* Check for compatible string comparisons - similar to get_mm_leaf. */
        if (args[0] && args[1] && !args[2] && // this is a binary function
            min_max_arg_item->result_type() == STRING_RESULT &&
            /*
              Don't use an index when comparing strings of different collations.
            */
            ((args[1]->result_type() == STRING_RESULT &&
              image_type == Field::itRAW &&
              ((Field_str*) min_max_arg_item->field)->charset() !=
              pred->compare_collation())
             ||
             /*
               We can't always use indexes when comparing a string index to a
               number.
             */
             (args[1]->result_type() != STRING_RESULT &&
              min_max_arg_item->field->cmp_type() != args[1]->result_type())))
          DBUG_RETURN(FALSE);
8810 8811 8812 8813
      }
    }
    else if (cur_arg->type() == Item::FUNC_ITEM)
    {
monty@mishka.local's avatar
monty@mishka.local committed
8814
      if (!check_group_min_max_predicates(cur_arg, min_max_arg_item,
8815
                                         image_type))
8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834
        DBUG_RETURN(FALSE);
    }
    else if (cur_arg->const_item())
    {
      DBUG_RETURN(TRUE);
    }
    else
      DBUG_RETURN(FALSE);
  }

  DBUG_RETURN(TRUE);
}


/*
  Extract a sequence of constants from a conjunction of equality predicates.

  SYNOPSIS
    get_constant_key_infix()
8835 8836 8837 8838 8839 8840 8841 8842 8843
    index_info             [in]  Descriptor of the chosen index.
    index_range_tree       [in]  Range tree for the chosen index
    first_non_group_part   [in]  First index part after group attribute parts
    min_max_arg_part       [in]  The keypart of the MIN/MAX argument if any
    last_part              [in]  Last keypart of the index
    thd                    [in]  Current thread
    key_infix              [out] Infix of constants to be used for index lookup
    key_infix_len          [out] Lenghth of the infix
    first_non_infix_part   [out] The first keypart after the infix (if any)
8844 8845 8846
    
  DESCRIPTION
    Test conditions (NGA1, NGA2) from get_best_group_min_max(). Namely,
8847 8848
    for each keypart field NGF_i not in GROUP-BY, check that there is a
    constant equality predicate among conds with the form (NGF_i = const_ci) or
8849 8850
    (const_ci = NGF_i).
    Thus all the NGF_i attributes must fill the 'gap' between the last group-by
8851 8852 8853 8854 8855 8856
    attribute and the MIN/MAX attribute in the index (if present). If these
    conditions hold, copy each constant from its corresponding predicate into
    key_infix, in the order its NG_i attribute appears in the index, and update
    key_infix_len with the total length of the key parts in key_infix.

  RETURN
8857
    TRUE  if the index passes the test
8858 8859 8860 8861
    FALSE o/w
*/

static bool
8862
get_constant_key_infix(KEY *index_info, SEL_ARG *index_range_tree,
8863
                       KEY_PART_INFO *first_non_group_part,
8864 8865 8866 8867
                       KEY_PART_INFO *min_max_arg_part,
                       KEY_PART_INFO *last_part, THD *thd,
                       byte *key_infix, uint *key_infix_len,
                       KEY_PART_INFO **first_non_infix_part)
8868 8869 8870
{
  SEL_ARG       *cur_range;
  KEY_PART_INFO *cur_part;
8871 8872
  /* End part for the first loop below. */
  KEY_PART_INFO *end_part= min_max_arg_part ? min_max_arg_part : last_part;
8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889

  *key_infix_len= 0;
  byte *key_ptr= key_infix;
  for (cur_part= first_non_group_part; cur_part != end_part; cur_part++)
  {
    /*
      Find the range tree for the current keypart. We assume that
      index_range_tree points to the leftmost keypart in the index.
    */
    for (cur_range= index_range_tree; cur_range;
         cur_range= cur_range->next_key_part)
    {
      if (cur_range->field->eq(cur_part->field))
        break;
    }
    if (!cur_range)
    {
8890 8891 8892 8893 8894 8895 8896
      if (min_max_arg_part)
        return FALSE; /* The current keypart has no range predicates at all. */
      else
      {
        *first_non_infix_part= cur_part;
        return TRUE;
      }
8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920
    }

    /* Check that the current range tree is a single point interval. */
    if (cur_range->prev || cur_range->next)
      return FALSE; /* This is not the only range predicate for the field. */
    if ((cur_range->min_flag & NO_MIN_RANGE) ||
        (cur_range->max_flag & NO_MAX_RANGE) ||
        (cur_range->min_flag & NEAR_MIN) || (cur_range->max_flag & NEAR_MAX))
      return FALSE;

    uint field_length= cur_part->store_length;
    if ((cur_range->maybe_null &&
         cur_range->min_value[0] && cur_range->max_value[0])
        ||
        (memcmp(cur_range->min_value, cur_range->max_value, field_length) == 0))
    { /* cur_range specifies 'IS NULL' or an equality condition. */
      memcpy(key_ptr, cur_range->min_value, field_length);
      key_ptr+= field_length;
      *key_infix_len+= field_length;
    }
    else
      return FALSE;
  }

8921 8922 8923
  if (!min_max_arg_part && (cur_part == last_part))
    *first_non_infix_part= last_part;

8924 8925 8926 8927
  return TRUE;
}


8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947
/*
  Find the key part referenced by a field.

  SYNOPSIS
    get_field_keypart()
    index  descriptor of an index
    field  field that possibly references some key part in index

  NOTES
    The return value can be used to get a KEY_PART_INFO pointer by
    part= index->key_part + get_field_keypart(...) - 1;

  RETURN
    Positive number which is the consecutive number of the key part, or
    0 if field does not reference any index field.
*/

static inline uint
get_field_keypart(KEY *index, Field *field)
{
8948
  KEY_PART_INFO *part, *end;
8949

8950
  for (part= index->key_part, end= part + index->key_parts; part < end; part++)
8951 8952
  {
    if (field->eq(part->field))
ram@gw.mysql.r18.ru's avatar
ram@gw.mysql.r18.ru committed
8953
      return part - index->key_part + 1;
8954
  }
8955
  return 0;
8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996
}


/*
  Find the SEL_ARG sub-tree that corresponds to the chosen index.

  SYNOPSIS
    get_index_range_tree()
    index     [in]  The ID of the index being looked for
    range_tree[in]  Tree of ranges being searched
    param     [in]  PARAM from SQL_SELECT::test_quick_select
    param_idx [out] Index in the array PARAM::key that corresponds to 'index'

  DESCRIPTION

    A SEL_TREE contains range trees for all usable indexes. This procedure
    finds the SEL_ARG sub-tree for 'index'. The members of a SEL_TREE are
    ordered in the same way as the members of PARAM::key, thus we first find
    the corresponding index in the array PARAM::key. This index is returned
    through the variable param_idx, to be used later as argument of
    check_quick_select().

  RETURN
    Pointer to the SEL_ARG subtree that corresponds to index.
*/

SEL_ARG * get_index_range_tree(uint index, SEL_TREE* range_tree, PARAM *param,
                               uint *param_idx)
{
  uint idx= 0; /* Index nr in param->key_parts */
  while (idx < param->keys)
  {
    if (index == param->real_keynr[idx])
      break;
    idx++;
  }
  *param_idx= idx;
  return(range_tree->keys[idx]);
}


8997
/*
8998
  Compute the cost of a quick_group_min_max_select for a particular index.
8999 9000

  SYNOPSIS
9001 9002 9003 9004 9005 9006 9007
    cost_group_min_max()
    table                [in] The table being accessed
    index_info           [in] The index used to access the table
    used_key_parts       [in] Number of key parts used to access the index
    group_key_parts      [in] Number of index key parts in the group prefix
    range_tree           [in] Tree of ranges for all indexes
    index_tree           [in] The range tree for the current index
monty@mysql.com's avatar
monty@mysql.com committed
9008 9009
    quick_prefix_records [in] Number of records retrieved by the internally
			      used quick range select if any
9010 9011 9012 9013
    have_min             [in] True if there is a MIN function
    have_max             [in] True if there is a MAX function
    read_cost           [out] The cost to retrieve rows via this quick select
    records             [out] The number of rows retrieved
9014 9015

  DESCRIPTION
monty@mysql.com's avatar
monty@mysql.com committed
9016 9017
    This method computes the access cost of a TRP_GROUP_MIN_MAX instance and
    the number of rows returned. It updates this->read_cost and this->records.
9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056

  NOTES
    The cost computation distinguishes several cases:
    1) No equality predicates over non-group attributes (thus no key_infix).
       If groups are bigger than blocks on the average, then we assume that it
       is very unlikely that block ends are aligned with group ends, thus even
       if we look for both MIN and MAX keys, all pairs of neighbor MIN/MAX
       keys, except for the first MIN and the last MAX keys, will be in the
       same block.  If groups are smaller than blocks, then we are going to
       read all blocks.
    2) There are equality predicates over non-group attributes.
       In this case the group prefix is extended by additional constants, and
       as a result the min/max values are inside sub-groups of the original
       groups. The number of blocks that will be read depends on whether the
       ends of these sub-groups will be contained in the same or in different
       blocks. We compute the probability for the two ends of a subgroup to be
       in two different blocks as the ratio of:
       - the number of positions of the left-end of a subgroup inside a group,
         such that the right end of the subgroup is past the end of the buffer
         containing the left-end, and
       - the total number of possible positions for the left-end of the
         subgroup, which is the number of keys in the containing group.
       We assume it is very unlikely that two ends of subsequent subgroups are
       in the same block.
    3) The are range predicates over the group attributes.
       Then some groups may be filtered by the range predicates. We use the
       selectivity of the range predicates to decide how many groups will be
       filtered.

  TODO
     - Take into account the optional range predicates over the MIN/MAX
       argument.
     - Check if we have a PK index and we use all cols - then each key is a
       group, and it will be better to use an index scan.

  RETURN
    None
*/

9057 9058 9059 9060 9061
void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
                        uint group_key_parts, SEL_TREE *range_tree,
                        SEL_ARG *index_tree, ha_rows quick_prefix_records,
                        bool have_min, bool have_max,
                        double *read_cost, ha_rows *records)
9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073
{
  uint table_records;
  uint num_groups;
  uint num_blocks;
  uint keys_per_block;
  uint keys_per_group;
  uint keys_per_subgroup; /* Average number of keys in sub-groups */
                          /* formed by a key infix. */
  double p_overlap; /* Probability that a sub-group overlaps two blocks. */
  double quick_prefix_selectivity;
  double io_cost;
  double cpu_cost= 0; /* TODO: CPU cost of index_read calls? */
timour@mysql.com's avatar
timour@mysql.com committed
9074
  DBUG_ENTER("cost_group_min_max");
monty@mysql.com's avatar
monty@mysql.com committed
9075

9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093
  table_records= table->file->records;
  keys_per_block= (table->file->block_size / 2 /
                   (index_info->key_length + table->file->ref_length)
                        + 1);
  num_blocks= (table_records / keys_per_block) + 1;

  /* Compute the number of keys in a group. */
  keys_per_group= index_info->rec_per_key[group_key_parts - 1];
  if (keys_per_group == 0) /* If there is no statistics try to guess */
    /* each group contains 10% of all records */
    keys_per_group= (table_records / 10) + 1;
  num_groups= (table_records / keys_per_group) + 1;

  /* Apply the selectivity of the quick select for group prefixes. */
  if (range_tree && (quick_prefix_records != HA_POS_ERROR))
  {
    quick_prefix_selectivity= (double) quick_prefix_records /
                              (double) table_records;
serg@serg.mylan's avatar
serg@serg.mylan committed
9094
    num_groups= (uint) rint(num_groups * quick_prefix_selectivity);
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
9095
    set_if_bigger(num_groups, 1);
9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126
  }

  if (used_key_parts > group_key_parts)
  { /*
      Compute the probability that two ends of a subgroup are inside
      different blocks.
    */
    keys_per_subgroup= index_info->rec_per_key[used_key_parts - 1];
    if (keys_per_subgroup >= keys_per_block) /* If a subgroup is bigger than */
      p_overlap= 1.0;       /* a block, it will overlap at least two blocks. */
    else
    {
      double blocks_per_group= (double) num_blocks / (double) num_groups;
      p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
      p_overlap= min(p_overlap, 1.0);
    }
    io_cost= (double) min(num_groups * (1 + p_overlap), num_blocks);
  }
  else
    io_cost= (keys_per_group > keys_per_block) ?
             (have_min && have_max) ? (double) (num_groups + 1) :
                                      (double) num_groups :
             (double) num_blocks;

  /*
    TODO: If there is no WHERE clause and no other expressions, there should be
    no CPU cost. We leave it here to make this cost comparable to that of index
    scan as computed in SQL_SELECT::test_quick_select().
  */
  cpu_cost= (double) num_groups / TIME_FOR_COMPARE;

9127
  *read_cost= io_cost + cpu_cost;
9128
  *records= num_groups;
9129 9130

  DBUG_PRINT("info",
9131 9132
             ("table rows=%u, keys/block=%u, keys/group=%u, result rows=%u, blocks=%u",
              table_records, keys_per_block, keys_per_group, *records,
9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165
              num_blocks));
  DBUG_VOID_RETURN;
}


/*
  Construct a new quick select object for queries with group by with min/max.

  SYNOPSIS
    TRP_GROUP_MIN_MAX::make_quick()
    param              Parameter from test_quick_select
    retrieve_full_rows ignored
    parent_alloc       Memory pool to use, if any.

  NOTES
    Make_quick ignores the retrieve_full_rows parameter because
    QUICK_GROUP_MIN_MAX_SELECT always performs 'index only' scans.
    The other parameter are ignored as well because all necessary
    data to create the QUICK object is computed at this TRP creation
    time.

  RETURN
    New QUICK_GROUP_MIN_MAX_SELECT object if successfully created,
    NULL o/w.
*/

QUICK_SELECT_I *
TRP_GROUP_MIN_MAX::make_quick(PARAM *param, bool retrieve_full_rows,
                              MEM_ROOT *parent_alloc)
{
  QUICK_GROUP_MIN_MAX_SELECT *quick;
  DBUG_ENTER("TRP_GROUP_MIN_MAX::make_quick");

9166 9167 9168 9169 9170
  quick= new QUICK_GROUP_MIN_MAX_SELECT(param->table,
                                        param->thd->lex->select_lex.join,
                                        have_min, have_max, min_max_arg_part,
                                        group_prefix_len, used_key_parts,
                                        index_info, index, read_cost, records,
monty@mysql.com's avatar
monty@mysql.com committed
9171 9172
                                        key_infix_len, key_infix,
                                        parent_alloc);
9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188
  if (!quick)
    DBUG_RETURN(NULL);

  if (quick->init())
  {
    delete quick;
    DBUG_RETURN(NULL);
  }

  if (range_tree)
  {
    DBUG_ASSERT(quick_prefix_records > 0);
    if (quick_prefix_records == HA_POS_ERROR)
      quick->quick_prefix_select= NULL; /* Can't construct a quick select. */
    else
      /* Make a QUICK_RANGE_SELECT to be used for group prefix retrieval. */
9189 9190
      quick->quick_prefix_select= get_quick_select(param, param_idx,
                                                   index_tree,
9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212
                                                   &quick->alloc);

    /*
      Extract the SEL_ARG subtree that contains only ranges for the MIN/MAX
      attribute, and create an array of QUICK_RANGES to be used by the
      new quick select.
    */
    if (min_max_arg_part)
    {
      SEL_ARG *min_max_range= index_tree;
      while (min_max_range) /* Find the tree for the MIN/MAX key part. */
      {
        if (min_max_range->field->eq(min_max_arg_part->field))
          break;
        min_max_range= min_max_range->next_key_part;
      }
      /* Scroll to the leftmost interval for the MIN/MAX argument. */
      while (min_max_range && min_max_range->prev)
        min_max_range= min_max_range->prev;
      /* Create an array of QUICK_RANGEs for the MIN/MAX argument. */
      while (min_max_range)
      {
9213
        if (quick->add_range(min_max_range))
9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255
        {
          delete quick;
          quick= NULL;
          DBUG_RETURN(NULL);
        }
        min_max_range= min_max_range->next;
      }
    }
  }
  else
    quick->quick_prefix_select= NULL;

  quick->update_key_stat();

  DBUG_RETURN(quick);
}


/*
  Construct new quick select for group queries with min/max.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::QUICK_GROUP_MIN_MAX_SELECT()
    table             The table being accessed
    join              Descriptor of the current query
    have_min          TRUE if the query selects a MIN function
    have_max          TRUE if the query selects a MAX function
    min_max_arg_part  The only argument field of all MIN/MAX functions
    group_prefix_len  Length of all key parts in the group prefix
    prefix_key_parts  All key parts in the group prefix
    index_info        The index chosen for data access
    use_index         The id of index_info
    read_cost         Cost of this access method
    records           Number of records returned
    key_infix_len     Length of the key infix appended to the group prefix
    key_infix         Infix of constants from equality predicates
    parent_alloc      Memory pool for this and quick_prefix_select data

  RETURN
    None
*/

monty@mysql.com's avatar
monty@mysql.com committed
9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268
QUICK_GROUP_MIN_MAX_SELECT::
QUICK_GROUP_MIN_MAX_SELECT(TABLE *table, JOIN *join_arg, bool have_min_arg,
                           bool have_max_arg,
                           KEY_PART_INFO *min_max_arg_part_arg,
                           uint group_prefix_len_arg,
                           uint used_key_parts_arg, KEY *index_info_arg,
                           uint use_index, double read_cost_arg,
                           ha_rows records_arg, uint key_infix_len_arg,
                           byte *key_infix_arg, MEM_ROOT *parent_alloc)
  :join(join_arg), index_info(index_info_arg),
   group_prefix_len(group_prefix_len_arg), have_min(have_min_arg),
   have_max(have_max_arg), seen_first_key(FALSE),
   min_max_arg_part(min_max_arg_part_arg), key_infix(key_infix_arg),
9269 9270
   key_infix_len(key_infix_len_arg), min_functions_it(NULL),
   max_functions_it(NULL)
9271 9272 9273 9274 9275 9276
{
  head=       table;
  file=       head->file;
  index=      use_index;
  record=     head->record[0];
  tmp_record= head->record[1];
9277 9278 9279
  read_time= read_cost_arg;
  records= records_arg;
  used_key_parts= used_key_parts_arg;
9280 9281 9282
  real_prefix_len= group_prefix_len + key_infix_len;
  group_prefix= NULL;
  min_max_arg_len= min_max_arg_part ? min_max_arg_part->store_length : 0;
monty@mysql.com's avatar
monty@mysql.com committed
9283 9284 9285 9286 9287 9288

  /*
    We can't have parent_alloc set as the init function can't handle this case
    yet.
  */
  DBUG_ASSERT(!parent_alloc);
9289 9290 9291
  if (!parent_alloc)
  {
    init_sql_alloc(&alloc, join->thd->variables.range_alloc_block_size, 0);
monty@mysql.com's avatar
monty@mysql.com committed
9292
    join->thd->mem_root= &alloc;
9293 9294
  }
  else
9295
    bzero(&alloc, sizeof(MEM_ROOT));            // ensure that it's not used
9296 9297 9298 9299 9300 9301 9302 9303 9304
}


/*
  Do post-constructor initialization.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::init()
  
9305 9306 9307 9308 9309 9310
  DESCRIPTION
    The method performs initialization that cannot be done in the constructor
    such as memory allocations that may fail. It allocates memory for the
    group prefix and inifix buffers, and for the lists of MIN/MAX item to be
    updated during execution.

9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345
  RETURN
    0      OK
    other  Error code
*/

int QUICK_GROUP_MIN_MAX_SELECT::init()
{
  if (group_prefix) /* Already initialized. */
    return 0;

  if (!(last_prefix= (byte*) alloc_root(&alloc, group_prefix_len)))
      return 1;
  /*
    We may use group_prefix to store keys with all select fields, so allocate
    enough space for it.
  */
  if (!(group_prefix= (byte*) alloc_root(&alloc,
                                         real_prefix_len + min_max_arg_len)))
    return 1;

  if (key_infix_len > 0)
  {
    /*
      The memory location pointed to by key_infix will be deleted soon, so
      allocate a new buffer and copy the key_infix into it.
    */
    byte *tmp_key_infix= (byte*) alloc_root(&alloc, key_infix_len);
    if (!tmp_key_infix)
      return 1;
    memcpy(tmp_key_infix, this->key_infix, key_infix_len);
    this->key_infix= tmp_key_infix;
  }

  if (min_max_arg_part)
  {
monty@mishka.local's avatar
monty@mishka.local committed
9346
    if (my_init_dynamic_array(&min_max_ranges, sizeof(QUICK_RANGE*), 16, 16))
9347 9348
      return 1;

9349 9350
    if (have_min)
    {
monty@mishka.local's avatar
monty@mishka.local committed
9351
      if (!(min_functions= new List<Item_sum>))
9352 9353 9354 9355 9356 9357
        return 1;
    }
    else
      min_functions= NULL;
    if (have_max)
    {
monty@mishka.local's avatar
monty@mishka.local committed
9358
      if (!(max_functions= new List<Item_sum>))
9359 9360 9361 9362
        return 1;
    }
    else
      max_functions= NULL;
9363

9364 9365 9366
    Item_sum *min_max_item;
    Item_sum **func_ptr= join->sum_funcs;
    while ((min_max_item= *(func_ptr++)))
9367
    {
9368 9369 9370 9371
      if (have_min && (min_max_item->sum_func() == Item_sum::MIN_FUNC))
        min_functions->push_back(min_max_item);
      else if (have_max && (min_max_item->sum_func() == Item_sum::MAX_FUNC))
        max_functions->push_back(min_max_item);
9372 9373
    }

9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384
    if (have_min)
    {
      if (!(min_functions_it= new List_iterator<Item_sum>(*min_functions)))
        return 1;
    }

    if (have_max)
    {
      if (!(max_functions_it= new List_iterator<Item_sum>(*max_functions)))
        return 1;
    }
9385
  }
igor@rurik.mysql.com's avatar
igor@rurik.mysql.com committed
9386 9387
  else
    min_max_ranges.elements= 0;
9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400

  return 0;
}


QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT()
{
  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::~QUICK_GROUP_MIN_MAX_SELECT");
  if (file->inited != handler::NONE) 
    file->ha_index_end();
  if (min_max_arg_part)
    delete_dynamic(&min_max_ranges);
  free_root(&alloc,MYF(0));
9401 9402
  delete min_functions_it;
  delete max_functions_it;
9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421
  delete quick_prefix_select;
  DBUG_VOID_RETURN; 
}


/*
  Eventually create and add a new quick range object.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::add_range()
    sel_range  Range object from which a 

  NOTES
    Construct a new QUICK_RANGE object from a SEL_ARG object, and
    add it to the array min_max_ranges. If sel_arg is an infinite
    range, e.g. (x < 5 or x > 4), then skip it and do not construct
    a quick range.

  RETURN
9422 9423
    FALSE on success
    TRUE  otherwise
9424 9425 9426 9427 9428 9429 9430 9431
*/

bool QUICK_GROUP_MIN_MAX_SELECT::add_range(SEL_ARG *sel_range)
{
  QUICK_RANGE *range;
  uint range_flag= sel_range->min_flag | sel_range->max_flag;

  /* Skip (-inf,+inf) ranges, e.g. (x < 5 or x > 4). */
monty@mishka.local's avatar
monty@mishka.local committed
9432
  if ((range_flag & NO_MIN_RANGE) && (range_flag & NO_MAX_RANGE))
9433
    return FALSE;
9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448

  if (!(sel_range->min_flag & NO_MIN_RANGE) &&
      !(sel_range->max_flag & NO_MAX_RANGE))
  {
    if (sel_range->maybe_null &&
        sel_range->min_value[0] && sel_range->max_value[0])
      range_flag|= NULL_RANGE; /* IS NULL condition */
    else if (memcmp(sel_range->min_value, sel_range->max_value,
                    min_max_arg_len) == 0)
      range_flag|= EQ_RANGE;  /* equality condition */
  }
  range= new QUICK_RANGE(sel_range->min_value, min_max_arg_len,
                         sel_range->max_value, min_max_arg_len,
                         range_flag);
  if (!range)
9449
    return TRUE;
9450
  if (insert_dynamic(&min_max_ranges, (gptr)&range))
9451 9452
    return TRUE;
  return FALSE;
9453 9454 9455 9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481
}


/*
  Determine the total number and length of the keys that will be used for
  index lookup.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()

  DESCRIPTION
    The total length of the keys used for index lookup depends on whether
    there are any predicates referencing the min/max argument, and/or if
    the min/max argument field can be NULL.
    This function does an optimistic analysis whether the search key might
    be extended by a constant for the min/max keypart. It is 'optimistic'
    because during actual execution it may happen that a particular range
    is skipped, and then a shorter key will be used. However this is data
    dependent and can't be easily estimated here.

  RETURN
    None
*/

void QUICK_GROUP_MIN_MAX_SELECT::update_key_stat()
{
  max_used_key_length= real_prefix_len;
  if (min_max_ranges.elements > 0)
  {
9482
    QUICK_RANGE *cur_range;
9483 9484 9485 9486 9487 9488 9489
    if (have_min)
    { /* Check if the right-most range has a lower boundary. */
      get_dynamic(&min_max_ranges, (gptr)&cur_range,
                  min_max_ranges.elements - 1);
      if (!(cur_range->flag & NO_MIN_RANGE))
      {
        max_used_key_length+= min_max_arg_len;
9490
        used_key_parts++;
9491 9492 9493 9494 9495 9496 9497 9498 9499
        return;
      }
    }
    if (have_max)
    { /* Check if the left-most range has an upper boundary. */
      get_dynamic(&min_max_ranges, (gptr)&cur_range, 0);
      if (!(cur_range->flag & NO_MAX_RANGE))
      {
        max_used_key_length+= min_max_arg_len;
9500
        used_key_parts++;
9501 9502 9503 9504
        return;
      }
    }
  }
9505 9506
  else if (have_min && min_max_arg_part &&
           min_max_arg_part->field->real_maybe_null())
9507
  {
9508 9509 9510 9511 9512 9513 9514 9515
    /*
      If a MIN/MAX argument value is NULL, we can quickly determine
      that we're in the beginning of the next group, because NULLs
      are always < any other value. This allows us to quickly
      determine the end of the current group and jump to the next
      group (see next_min()) and thus effectively increases the
      usable key length.
    */
9516
    max_used_key_length+= min_max_arg_len;
9517
    used_key_parts++;
9518 9519 9520 9521 9522 9523 9524 9525 9526 9527
  }
}


/*
  Initialize a quick group min/max select for key retrieval.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::reset()

9528 9529 9530 9531
  DESCRIPTION
    Initialize the index chosen for access and find and store the prefix
    of the last group. The method is expensive since it performs disk access.

9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542
  RETURN
    0      OK
    other  Error code
*/

int QUICK_GROUP_MIN_MAX_SELECT::reset(void)
{
  int result;
  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::reset");

  file->extra(HA_EXTRA_KEYREAD); /* We need only the key attributes */
9543
  result= file->ha_index_init(index, 1);
9544
  result= file->index_last(record);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
9545 9546
  if (result == HA_ERR_END_OF_FILE)
    DBUG_RETURN(0);
9547 9548
  if (result)
    DBUG_RETURN(result);
sergefp@mysql.com's avatar
sergefp@mysql.com committed
9549 9550
  if (quick_prefix_select && quick_prefix_select->reset())
    DBUG_RETURN(1);
9551 9552 9553 9554 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589
  /* Save the prefix of the last group. */
  key_copy(last_prefix, record, index_info, group_prefix_len);

  DBUG_RETURN(0);
}



/* 
  Get the next key containing the MIN and/or MAX key for the next group.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::get_next()

  DESCRIPTION
    The method finds the next subsequent group of records that satisfies the
    query conditions and finds the keys that contain the MIN/MAX values for
    the key part referenced by the MIN/MAX function(s). Once a group and its
    MIN/MAX values are found, store these values in the Item_sum objects for
    the MIN/MAX functions. The rest of the values in the result row are stored
    in the Item_field::result_field of each select field. If the query does
    not contain MIN and/or MAX functions, then the function only finds the
    group prefix, which is a query answer itself.

  NOTES
    If both MIN and MAX are computed, then we use the fact that if there is
    no MIN key, there can't be a MAX key as well, so we can skip looking
    for a MAX key in this case.

  RETURN
    0                  on success
    HA_ERR_END_OF_FILE if returned all keys
    other              if some error occurred
*/

int QUICK_GROUP_MIN_MAX_SELECT::get_next()
{
  int min_res= 0;
  int max_res= 0;
timour@mysql.com's avatar
timour@mysql.com committed
9590 9591 9592 9593 9594 9595 9596
#ifdef HPUX11
  /*
    volatile is required by a bug in the HP compiler due to which the
    last test of result fails.
  */
  volatile int result;
#else
9597
  int result;
timour@mysql.com's avatar
timour@mysql.com committed
9598
#endif
9599 9600 9601 9602 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614 9615 9616 9617 9618
  int is_last_prefix;

  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::get_next");

  /*
    Loop until a group is found that satisfies all query conditions or the last
    group is reached.
  */
  do
  {
    result= next_prefix();
    /*
      Check if this is the last group prefix. Notice that at this point
      this->record contains the current prefix in record format.
    */
    is_last_prefix= key_cmp(index_info->key_part, last_prefix,
                            group_prefix_len);
    DBUG_ASSERT(is_last_prefix <= 0);
    if (result == HA_ERR_KEY_NOT_FOUND)
      continue;
9619
    if (result)
9620 9621 9622 9623 9624 9625 9626 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637 9638
      break;

    if (have_min)
    {
      min_res= next_min();
      if (min_res == 0)
        update_min_result();
    }
    /* If there is no MIN in the group, there is no MAX either. */
    if ((have_max && !have_min) ||
        (have_max && have_min && (min_res == 0)))
    {
      max_res= next_max();
      if (max_res == 0)
        update_max_result();
      /* If a MIN was found, a MAX must have been found as well. */
      DBUG_ASSERT((have_max && !have_min) ||
                  (have_max && have_min && (max_res == 0)));
    }
9639
    /*
9640
      If this is just a GROUP BY or DISTINCT without MIN or MAX and there
9641 9642 9643 9644 9645 9646 9647
      are equality predicates for the key parts after the group, find the
      first sub-group with the extended prefix.
    */
    if (!have_min && !have_max && key_infix_len > 0)
      result= file->index_read(record, group_prefix, real_prefix_len,
                               HA_READ_KEY_EXACT);

9648
    result= have_min ? min_res : have_max ? max_res : result;
9649 9650
  } while ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
           is_last_prefix != 0);
9651 9652

  if (result == 0)
9653
  {
9654 9655 9656 9657 9658 9659 9660
    /*
      Partially mimic the behavior of end_select_send. Copy the
      field data from Item_field::field into Item_field::result_field
      of each non-aggregated field (the group fields, and optionally
      other fields in non-ANSI SQL mode).
    */
    copy_fields(&join->tmp_table_param);
9661
  }
9662 9663 9664 9665 9666 9667 9668 9669 9670 9671 9672 9673 9674 9675
  else if (result == HA_ERR_KEY_NOT_FOUND)
    result= HA_ERR_END_OF_FILE;

  DBUG_RETURN(result);
}


/*
  Retrieve the minimal key in the next group.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::next_min()

  DESCRIPTION
9676 9677
    Find the minimal key within this group such that the key satisfies the query
    conditions and NULL semantics. The found key is loaded into this->record.
9678 9679 9680 9681 9682 9683 9684 9685 9686 9687

  IMPLEMENTATION
    Depending on the values of min_max_ranges.elements, key_infix_len, and
    whether there is a  NULL in the MIN field, this function may directly
    return without any data access. In this case we use the key loaded into
    this->record by the call to this->next_prefix() just before this call.

  RETURN
    0                    on success
    HA_ERR_KEY_NOT_FOUND if no MIN key was found that fulfills all conditions.
9688
    HA_ERR_END_OF_FILE   - "" -
9689 9690 9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704
    other                if some error occurred
*/

int QUICK_GROUP_MIN_MAX_SELECT::next_min()
{
  int result= 0;
  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_min");

  /* Find the MIN key using the eventually extended group prefix. */
  if (min_max_ranges.elements > 0)
  {
    if ((result= next_min_in_range()))
      DBUG_RETURN(result);
  }
  else
  {
9705
    /* Apply the constant equality conditions to the non-group select fields */
9706 9707 9708 9709 9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 9737 9738
    if (key_infix_len > 0)
    {
      if ((result= file->index_read(record, group_prefix, real_prefix_len,
                                    HA_READ_KEY_EXACT)))
        DBUG_RETURN(result);
    }

    /*
      If the min/max argument field is NULL, skip subsequent rows in the same
      group with NULL in it. Notice that:
      - if the first row in a group doesn't have a NULL in the field, no row
      in the same group has (because NULL < any other value),
      - min_max_arg_part->field->ptr points to some place in 'record'.
    */
    if (min_max_arg_part && min_max_arg_part->field->is_null())
    {
      /* Find the first subsequent record without NULL in the MIN/MAX field. */
      key_copy(tmp_record, record, index_info, 0);
      result= file->index_read(record, tmp_record,
                               real_prefix_len + min_max_arg_len,
                               HA_READ_AFTER_KEY);
      /*
        Check if the new record belongs to the current group by comparing its
        prefix with the group's prefix. If it is from the next group, then the
        whole group has NULLs in the MIN/MAX field, so use the first record in
        the group as a result.
        TODO:
        It is possible to reuse this new record as the result candidate for the
        next call to next_min(), and to save one lookup in the next call. For
        this add a new member 'this->next_group_prefix'.
      */
      if (!result)
      {
9739
        if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
9740
          key_restore(record, tmp_record, index_info, 0);
9741
      }
9742
      else if (result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE)
9743 9744 9745 9746 9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761
        result= 0; /* There is a result in any case. */
    }
  }

  /*
    If the MIN attribute is non-nullable, this->record already contains the
    MIN key in the group, so just return.
  */
  DBUG_RETURN(result);
}


/* 
  Retrieve the maximal key in the next group.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::next_max()

  DESCRIPTION
9762
    Lookup the maximal key of the group, and store it into this->record.
9763 9764 9765 9766

  RETURN
    0                    on success
    HA_ERR_KEY_NOT_FOUND if no MAX key was found that fulfills all conditions.
9767
    HA_ERR_END_OF_FILE	 - "" -
9768 9769 9770 9771 9772 9773 9774 9775 9776 9777 9778 9779 9780 9781 9782 9783 9784 9785 9786 9787 9788 9789 9790 9791 9792 9793 9794 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805 9806 9807 9808 9809 9810 9811 9812 9813 9814 9815 9816 9817 9818 9819 9820 9821 9822 9823 9824 9825 9826 9827 9828 9829 9830 9831 9832 9833 9834 9835 9836 9837 9838 9839 9840 9841 9842 9843 9844 9845 9846 9847 9848 9849 9850 9851 9852 9853 9854 9855 9856 9857 9858 9859 9860 9861 9862 9863 9864 9865 9866 9867
    other                if some error occurred
*/

int QUICK_GROUP_MIN_MAX_SELECT::next_max()
{
  int result;

  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_max");

  /* Get the last key in the (possibly extended) group. */
  if (min_max_ranges.elements > 0)
    result= next_max_in_range();
  else
    result= file->index_read(record, group_prefix, real_prefix_len,
                             HA_READ_PREFIX_LAST);
  DBUG_RETURN(result);
}


/*
  Determine the prefix of the next group.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::next_prefix()

  DESCRIPTION
    Determine the prefix of the next group that satisfies the query conditions.
    If there is a range condition referencing the group attributes, use a
    QUICK_RANGE_SELECT object to retrieve the *first* key that satisfies the
    condition. If there is a key infix of constants, append this infix
    immediately after the group attributes. The possibly extended prefix is
    stored in this->group_prefix. The first key of the found group is stored in
    this->record, on which relies this->next_min().

  RETURN
    0                    on success
    HA_ERR_KEY_NOT_FOUND if there is no key with the formed prefix
    HA_ERR_END_OF_FILE   if there are no more keys
    other                if some error occurred
*/
int QUICK_GROUP_MIN_MAX_SELECT::next_prefix()
{
  int result;
  DBUG_ENTER("QUICK_GROUP_MIN_MAX_SELECT::next_prefix");

  if (quick_prefix_select)
  {
    byte *cur_prefix= seen_first_key ? group_prefix : NULL;
    if ((result= quick_prefix_select->get_next_prefix(group_prefix_len,
                                                      cur_prefix)))
      DBUG_RETURN(result);
    seen_first_key= TRUE;
  }
  else
  {
    if (!seen_first_key)
    {
      result= file->index_first(record);
      if (result)
        DBUG_RETURN(result);
      seen_first_key= TRUE;
    }
    else
    {
      /* Load the first key in this group into record. */
      result= file->index_read(record, group_prefix, group_prefix_len,
                               HA_READ_AFTER_KEY);
      if (result)
        DBUG_RETURN(result);
    }
  }

  /* Save the prefix of this group for subsequent calls. */
  key_copy(group_prefix, record, index_info, group_prefix_len);
  /* Append key_infix to group_prefix. */
  if (key_infix_len > 0)
    memcpy(group_prefix + group_prefix_len,
           key_infix, key_infix_len);

  DBUG_RETURN(0);
}


/*
  Find the minimal key in a group that satisfies some range conditions for the
  min/max argument field.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()

  DESCRIPTION
    Given the sequence of ranges min_max_ranges, find the minimal key that is
    in the left-most possible range. If there is no such key, then the current
    group does not have a MIN key that satisfies the WHERE clause. If a key is
    found, its value is stored in this->record.

  RETURN
    0                    on success
    HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
                         the ranges
9868
    HA_ERR_END_OF_FILE   - "" -
9869 9870 9871 9872 9873 9874 9875 9876 9877 9878 9879 9880 9881 9882 9883 9884 9885 9886 9887 9888 9889 9890
    other                if some error
*/

int QUICK_GROUP_MIN_MAX_SELECT::next_min_in_range()
{
  ha_rkey_function find_flag;
  uint search_prefix_len;
  QUICK_RANGE *cur_range;
  bool found_null= FALSE;
  int result= HA_ERR_KEY_NOT_FOUND;

  DBUG_ASSERT(min_max_ranges.elements > 0);

  for (uint range_idx= 0; range_idx < min_max_ranges.elements; range_idx++)
  { /* Search from the left-most range to the right. */
    get_dynamic(&min_max_ranges, (gptr)&cur_range, range_idx);

    /*
      If the current value for the min/max argument is bigger than the right
      boundary of cur_range, there is no need to check this range.
    */
    if (range_idx != 0 && !(cur_range->flag & NO_MAX_RANGE) &&
9891
        (key_cmp(min_max_arg_part, (const byte*) cur_range->max_key,
9892
                 min_max_arg_len) == 1))
9893 9894 9895 9896 9897 9898 9899 9900 9901 9902 9903 9904 9905 9906 9907 9908 9909 9910 9911 9912
      continue;

    if (cur_range->flag & NO_MIN_RANGE)
    {
      find_flag= HA_READ_KEY_EXACT;
      search_prefix_len= real_prefix_len;
    }
    else
    {
      /* Extend the search key with the lower boundary for this range. */
      memcpy(group_prefix + real_prefix_len, cur_range->min_key,
             cur_range->min_length);
      search_prefix_len= real_prefix_len + min_max_arg_len;
      find_flag= (cur_range->flag & (EQ_RANGE | NULL_RANGE)) ?
                 HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MIN) ?
                 HA_READ_AFTER_KEY : HA_READ_KEY_OR_NEXT;
    }

    result= file->index_read(record, group_prefix, search_prefix_len,
                             find_flag);
9913
    if (result)
9914
    {
9915 9916 9917 9918
      if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
          (cur_range->flag & (EQ_RANGE | NULL_RANGE)))
        continue; /* Check the next range. */

9919 9920 9921 9922 9923
      /*
        In all other cases (HA_ERR_*, HA_READ_KEY_EXACT with NO_MIN_RANGE,
        HA_READ_AFTER_KEY, HA_READ_KEY_OR_NEXT) if the lookup failed for this
        range, it can't succeed for any other subsequent range.
      */
9924
      break;
9925
    }
9926 9927 9928 9929 9930 9931

    /* A key was found. */
    if (cur_range->flag & EQ_RANGE)
      break; /* No need to perform the checks below for equal keys. */

    if (cur_range->flag & NULL_RANGE)
9932 9933 9934 9935 9936 9937
    {
      /*
        Remember this key, and continue looking for a non-NULL key that
        satisfies some other condition.
      */
      memcpy(tmp_record, record, head->s->rec_buff_length);
9938 9939 9940 9941 9942 9943 9944
      found_null= TRUE;
      continue;
    }

    /* Check if record belongs to the current group. */
    if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
    {
9945
      result= HA_ERR_KEY_NOT_FOUND;
9946 9947 9948 9949 9950 9951 9952 9953 9954 9955 9956 9957 9958 9959 9960 9961 9962
      continue;
    }

    /* If there is an upper limit, check if the found key is in the range. */
    if ( !(cur_range->flag & NO_MAX_RANGE) )
    {
      /* Compose the MAX key for the range. */
      byte *max_key= (byte*) my_alloca(real_prefix_len + min_max_arg_len);
      memcpy(max_key, group_prefix, real_prefix_len);
      memcpy(max_key + real_prefix_len, cur_range->max_key,
             cur_range->max_length);
      /* Compare the found key with max_key. */
      int cmp_res= key_cmp(index_info->key_part, max_key,
                           real_prefix_len + min_max_arg_len);
      if (!((cur_range->flag & NEAR_MAX) && (cmp_res == -1) ||
            (cmp_res <= 0)))
      {
9963
        result= HA_ERR_KEY_NOT_FOUND;
9964 9965 9966 9967 9968 9969 9970 9971 9972 9973 9974 9975 9976 9977
        continue;
      }
    }
    /* If we got to this point, the current key qualifies as MIN. */
    DBUG_ASSERT(result == 0);
    break;
  }
  /*
    If there was a key with NULL in the MIN/MAX field, and there was no other
    key without NULL from the same group that satisfies some other condition,
    then use the key with the NULL.
  */
  if (found_null && result)
  {
9978
    memcpy(record, tmp_record, head->s->rec_buff_length);
9979 9980 9981 9982 9983 9984 9985 9986 9987 9988 9989 9990 9991 9992 9993 9994 9995 9996 9997 9998 9999 10000 10001
    result= 0;
  }
  return result;
}


/*
  Find the maximal key in a group that satisfies some range conditions for the
  min/max argument field.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()

  DESCRIPTION
    Given the sequence of ranges min_max_ranges, find the maximal key that is
    in the right-most possible range. If there is no such key, then the current
    group does not have a MAX key that satisfies the WHERE clause. If a key is
    found, its value is stored in this->record.

  RETURN
    0                    on success
    HA_ERR_KEY_NOT_FOUND if there is no key with the given prefix in any of
                         the ranges
10002
    HA_ERR_END_OF_FILE   - "" -
10003 10004 10005 10006 10007 10008 10009 10010 10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022 10023 10024
    other                if some error
*/

int QUICK_GROUP_MIN_MAX_SELECT::next_max_in_range()
{
  ha_rkey_function find_flag;
  uint search_prefix_len;
  QUICK_RANGE *cur_range;
  int result;

  DBUG_ASSERT(min_max_ranges.elements > 0);

  for (uint range_idx= min_max_ranges.elements; range_idx > 0; range_idx--)
  { /* Search from the right-most range to the left. */
    get_dynamic(&min_max_ranges, (gptr)&cur_range, range_idx - 1);

    /*
      If the current value for the min/max argument is smaller than the left
      boundary of cur_range, there is no need to check this range.
    */
    if (range_idx != min_max_ranges.elements &&
        !(cur_range->flag & NO_MIN_RANGE) &&
10025
        (key_cmp(min_max_arg_part, (const byte*) cur_range->min_key,
10026
                 min_max_arg_len) == -1))
10027 10028 10029 10030 10031 10032 10033 10034 10035 10036 10037 10038 10039 10040 10041 10042 10043 10044 10045 10046 10047
      continue;

    if (cur_range->flag & NO_MAX_RANGE)
    {
      find_flag= HA_READ_PREFIX_LAST;
      search_prefix_len= real_prefix_len;
    }
    else
    {
      /* Extend the search key with the upper boundary for this range. */
      memcpy(group_prefix + real_prefix_len, cur_range->max_key,
             cur_range->max_length);
      search_prefix_len= real_prefix_len + min_max_arg_len;
      find_flag= (cur_range->flag & EQ_RANGE) ?
                 HA_READ_KEY_EXACT : (cur_range->flag & NEAR_MAX) ?
                 HA_READ_BEFORE_KEY : HA_READ_PREFIX_LAST_OR_PREV;
    }

    result= file->index_read(record, group_prefix, search_prefix_len,
                             find_flag);

monty@mysql.com's avatar
monty@mysql.com committed
10048 10049
    if (result)
    {
10050 10051 10052 10053
      if ((result == HA_ERR_KEY_NOT_FOUND || result == HA_ERR_END_OF_FILE) &&
          (cur_range->flag & EQ_RANGE))
        continue; /* Check the next range. */

10054 10055 10056 10057 10058
      /*
        In no key was found with this upper bound, there certainly are no keys
        in the ranges to the left.
      */
      return result;
monty@mysql.com's avatar
monty@mysql.com committed
10059
    }
10060 10061
    /* A key was found. */
    if (cur_range->flag & EQ_RANGE)
monty@mysql.com's avatar
monty@mysql.com committed
10062
      return 0; /* No need to perform the checks below for equal keys. */
10063 10064 10065

    /* Check if record belongs to the current group. */
    if (key_cmp(index_info->key_part, group_prefix, real_prefix_len))
monty@mysql.com's avatar
monty@mysql.com committed
10066
      continue;                                 // Row not found
10067 10068 10069 10070 10071 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082 10083 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094 10095 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154

    /* If there is a lower limit, check if the found key is in the range. */
    if ( !(cur_range->flag & NO_MIN_RANGE) )
    {
      /* Compose the MIN key for the range. */
      byte *min_key= (byte*) my_alloca(real_prefix_len + min_max_arg_len);
      memcpy(min_key, group_prefix, real_prefix_len);
      memcpy(min_key + real_prefix_len, cur_range->min_key,
             cur_range->min_length);
      /* Compare the found key with min_key. */
      int cmp_res= key_cmp(index_info->key_part, min_key,
                           real_prefix_len + min_max_arg_len);
      if (!((cur_range->flag & NEAR_MIN) && (cmp_res == 1) ||
            (cmp_res >= 0)))
        continue;
    }
    /* If we got to this point, the current key qualifies as MAX. */
    return result;
  }
  return HA_ERR_KEY_NOT_FOUND;
}


/*
  Update all MIN function results with the newly found value.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::update_min_result()

  DESCRIPTION
    The method iterates through all MIN functions and updates the result value
    of each function by calling Item_sum::reset(), which in turn picks the new
    result value from this->head->record[0], previously updated by
    next_min(). The updated value is stored in a member variable of each of the
    Item_sum objects, depending on the value type.

  IMPLEMENTATION
    The update must be done separately for MIN and MAX, immediately after
    next_min() was called and before next_max() is called, because both MIN and
    MAX take their result value from the same buffer this->head->record[0]
    (i.e.  this->record).

  RETURN
    None
*/

void QUICK_GROUP_MIN_MAX_SELECT::update_min_result()
{
  Item_sum *min_func;

  min_functions_it->rewind();
  while ((min_func= (*min_functions_it)++))
    min_func->reset();
}


/*
  Update all MAX function results with the newly found value.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::update_max_result()

  DESCRIPTION
    The method iterates through all MAX functions and updates the result value
    of each function by calling Item_sum::reset(), which in turn picks the new
    result value from this->head->record[0], previously updated by
    next_max(). The updated value is stored in a member variable of each of the
    Item_sum objects, depending on the value type.

  IMPLEMENTATION
    The update must be done separately for MIN and MAX, immediately after
    next_max() was called, because both MIN and MAX take their result value
    from the same buffer this->head->record[0] (i.e.  this->record).

  RETURN
    None
*/

void QUICK_GROUP_MIN_MAX_SELECT::update_max_result()
{
  Item_sum *max_func;

  max_functions_it->rewind();
  while ((max_func= (*max_functions_it)++))
    max_func->reset();
}


10155 10156 10157 10158 10159 10160 10161 10162 10163 10164 10165 10166 10167 10168 10169
/*
  Append comma-separated list of keys this quick select uses to key_names;
  append comma-separated list of corresponding used lengths to used_lengths.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths()
    key_names    [out] Names of used indexes
    used_lengths [out] Corresponding lengths of the index names

  DESCRIPTION
    This method is used by select_describe to extract the names of the
    indexes used by a quick select.

*/

10170 10171 10172 10173 10174 10175 10176 10177 10178 10179 10180
void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names,
                                                      String *used_lengths)
{
  char buf[64];
  uint length;
  key_names->append(index_info->name);
  length= longlong2str(max_used_key_length, buf, 10) - buf;
  used_lengths->append(buf, length);
}


10181
#ifndef DBUG_OFF
10182

10183 10184 10185 10186 10187 10188 10189
static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map,
                           const char *msg)
{
  SEL_ARG **key,**end;
  int idx;
  char buff[1024];
  DBUG_ENTER("print_sel_tree");
10190

10191 10192 10193 10194 10195 10196 10197 10198 10199 10200 10201 10202 10203 10204 10205
  String tmp(buff,sizeof(buff),&my_charset_bin);
  tmp.length(0);
  for (idx= 0,key=tree->keys, end=key+param->keys ;
       key != end ;
       key++,idx++)
  {
    if (tree_map->is_set(idx))
    {
      uint keynr= param->real_keynr[idx];
      if (tmp.length())
        tmp.append(',');
      tmp.append(param->table->key_info[keynr].name);
    }
  }
  if (!tmp.length())
10206
    tmp.append(STRING_WITH_LEN("(empty)"));
10207

10208
  DBUG_PRINT("info", ("SEL_TREE %p (%s) scans:%s", tree, msg, tmp.ptr()));
10209

10210 10211
  DBUG_VOID_RETURN;
}
10212

10213 10214 10215 10216

static void print_ror_scans_arr(TABLE *table, const char *msg,
                                struct st_ror_scan_info **start,
                                struct st_ror_scan_info **end)
10217
{
serg@serg.mylan's avatar
serg@serg.mylan committed
10218
  DBUG_ENTER("print_ror_scans_arr");
10219 10220 10221 10222

  char buff[1024];
  String tmp(buff,sizeof(buff),&my_charset_bin);
  tmp.length(0);
10223
  for (;start != end; start++)
10224
  {
10225 10226 10227
    if (tmp.length())
      tmp.append(',');
    tmp.append(table->key_info[(*start)->keynr].name);
10228
  }
10229
  if (!tmp.length())
10230
    tmp.append(STRING_WITH_LEN("(empty)"));
10231 10232
  DBUG_PRINT("info", ("ROR key scans (%s): %s", msg, tmp.ptr()));
  DBUG_VOID_RETURN;
10233 10234 10235
}


bk@work.mysql.com's avatar
bk@work.mysql.com committed
10236 10237 10238 10239 10240 10241 10242 10243 10244 10245 10246
/*****************************************************************************
** Print a quick range for debugging
** TODO:
** This should be changed to use a String to store each row instead
** of locking the DEBUG stream !
*****************************************************************************/

static void
print_key(KEY_PART *key_part,const char *key,uint used_length)
{
  char buff[1024];
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10247
  const char *key_end= key+used_length;
10248
  String tmp(buff,sizeof(buff),&my_charset_bin);
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10249
  uint store_length;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10250

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10251
  for (; key < key_end; key+=store_length, key_part++)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10252
  {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10253 10254 10255
    Field *field=      key_part->field;
    store_length= key_part->store_length;

bk@work.mysql.com's avatar
bk@work.mysql.com committed
10256 10257
    if (field->real_maybe_null())
    {
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10258
      if (*key)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10259 10260 10261 10262
      {
	fwrite("NULL",sizeof(char),4,DBUG_FILE);
	continue;
      }
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10263 10264
      key++;					// Skip null byte
      store_length--;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10265
    }
10266
    field->set_key_image((char*) key, key_part->length);
monty@mysql.com's avatar
monty@mysql.com committed
10267 10268 10269 10270
    if (field->type() == MYSQL_TYPE_BIT)
      (void) field->val_int_as_str(&tmp, 1);
    else
      field->val_str(&tmp);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10271
    fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE);
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10272 10273
    if (key+store_length < key_end)
      fputc('/',DBUG_FILE);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10274 10275 10276
  }
}

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10277

10278
static void print_quick(QUICK_SELECT_I *quick, const key_map *needed_reg)
10279
{
10280
  char buf[MAX_KEY/8+1];
10281
  DBUG_ENTER("print_quick");
serg@serg.mylan's avatar
serg@serg.mylan committed
10282
  if (!quick)
10283
    DBUG_VOID_RETURN;
10284
  DBUG_LOCK_FILE;
10285

monty@mysql.com's avatar
monty@mysql.com committed
10286
  quick->dbug_dump(0, TRUE);
10287
  fprintf(DBUG_FILE,"other_keys: 0x%s:\n", needed_reg->print(buf));
10288

10289
  DBUG_UNLOCK_FILE;
10290 10291 10292
  DBUG_VOID_RETURN;
}

pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
10293

10294
static void print_rowid(byte* val, int len)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10295
{
10296
  byte *pb;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10297
  DBUG_LOCK_FILE;
10298 10299 10300 10301 10302 10303 10304 10305 10306 10307
  fputc('\"', DBUG_FILE);
  for (pb= val; pb!= val + len; ++pb)
    fprintf(DBUG_FILE, "%c", *pb);
  fprintf(DBUG_FILE, "\", hex: ");

  for (pb= val; pb!= val + len; ++pb)
    fprintf(DBUG_FILE, "%x ", *pb);
  fputc('\n', DBUG_FILE);
  DBUG_UNLOCK_FILE;
}
10308

10309 10310 10311 10312
void QUICK_RANGE_SELECT::dbug_dump(int indent, bool verbose)
{
  fprintf(DBUG_FILE, "%*squick range select, key %s, length: %d\n",
	  indent, "", head->key_info[index].name, max_used_key_length);
10313

10314
  if (verbose)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10315
  {
10316 10317
    QUICK_RANGE *range;
    QUICK_RANGE **pr= (QUICK_RANGE**)ranges.buffer;
10318
    QUICK_RANGE **last_range= pr + ranges.elements;
10319
    for (; pr!=last_range; ++pr)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10320
    {
10321 10322 10323 10324 10325 10326 10327 10328 10329 10330 10331
      fprintf(DBUG_FILE, "%*s", indent + 2, "");
      range= *pr;
      if (!(range->flag & NO_MIN_RANGE))
      {
        print_key(key_parts,range->min_key,range->min_length);
        if (range->flag & NEAR_MIN)
	  fputs(" < ",DBUG_FILE);
        else
	  fputs(" <= ",DBUG_FILE);
      }
      fputs("X",DBUG_FILE);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10332

10333 10334 10335 10336 10337 10338 10339 10340 10341
      if (!(range->flag & NO_MAX_RANGE))
      {
        if (range->flag & NEAR_MAX)
	  fputs(" < ",DBUG_FILE);
        else
	  fputs(" <= ",DBUG_FILE);
        print_key(key_parts,range->max_key,range->max_length);
      }
      fputs("\n",DBUG_FILE);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10342 10343
    }
  }
10344 10345 10346 10347 10348 10349 10350 10351 10352 10353 10354 10355
}

void QUICK_INDEX_MERGE_SELECT::dbug_dump(int indent, bool verbose)
{
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  QUICK_RANGE_SELECT *quick;
  fprintf(DBUG_FILE, "%*squick index_merge select\n", indent, "");
  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
  while ((quick= it++))
    quick->dbug_dump(indent+2, verbose);
  if (pk_quick_select)
  {
10356
    fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
10357 10358 10359 10360 10361 10362 10363 10364 10365
    pk_quick_select->dbug_dump(indent+2, verbose);
  }
  fprintf(DBUG_FILE, "%*s}\n", indent, "");
}

void QUICK_ROR_INTERSECT_SELECT::dbug_dump(int indent, bool verbose)
{
  List_iterator_fast<QUICK_RANGE_SELECT> it(quick_selects);
  QUICK_RANGE_SELECT *quick;
10366
  fprintf(DBUG_FILE, "%*squick ROR-intersect select, %scovering\n",
10367 10368 10369
          indent, "", need_to_fetch_row? "":"non-");
  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
  while ((quick= it++))
10370
    quick->dbug_dump(indent+2, verbose);
10371 10372
  if (cpk_quick)
  {
10373
    fprintf(DBUG_FILE, "%*sclustered PK quick:\n", indent, "");
10374 10375 10376 10377 10378 10379 10380 10381 10382 10383 10384 10385 10386 10387
    cpk_quick->dbug_dump(indent+2, verbose);
  }
  fprintf(DBUG_FILE, "%*s}\n", indent, "");
}

void QUICK_ROR_UNION_SELECT::dbug_dump(int indent, bool verbose)
{
  List_iterator_fast<QUICK_SELECT_I> it(quick_selects);
  QUICK_SELECT_I *quick;
  fprintf(DBUG_FILE, "%*squick ROR-union select\n", indent, "");
  fprintf(DBUG_FILE, "%*smerged scans {\n", indent, "");
  while ((quick= it++))
    quick->dbug_dump(indent+2, verbose);
  fprintf(DBUG_FILE, "%*s}\n", indent, "");
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10388 10389
}

10390 10391 10392 10393 10394 10395 10396 10397 10398 10399 10400 10401 10402 10403 10404 10405 10406 10407 10408 10409 10410 10411 10412 10413 10414 10415 10416 10417 10418 10419 10420 10421 10422 10423 10424 10425 10426 10427 10428 10429 10430 10431 10432 10433

/*
  Print quick select information to DBUG_FILE.

  SYNOPSIS
    QUICK_GROUP_MIN_MAX_SELECT::dbug_dump()
    indent  Indentation offset
    verbose If TRUE show more detailed output.

  DESCRIPTION
    Print the contents of this quick select to DBUG_FILE. The method also
    calls dbug_dump() for the used quick select if any.

  IMPLEMENTATION
    Caller is responsible for locking DBUG_FILE before this call and unlocking
    it afterwards.

  RETURN
    None
*/

void QUICK_GROUP_MIN_MAX_SELECT::dbug_dump(int indent, bool verbose)
{
  fprintf(DBUG_FILE,
          "%*squick_group_min_max_select: index %s (%d), length: %d\n",
	  indent, "", index_info->name, index, max_used_key_length);
  if (key_infix_len > 0)
  {
    fprintf(DBUG_FILE, "%*susing key_infix with length %d:\n",
            indent, "", key_infix_len);
  }
  if (quick_prefix_select)
  {
    fprintf(DBUG_FILE, "%*susing quick_range_select:\n", indent, "");
    quick_prefix_select->dbug_dump(indent + 2, verbose);
  }
  if (min_max_ranges.elements > 0)
  {
    fprintf(DBUG_FILE, "%*susing %d quick_ranges for MIN/MAX:\n",
            indent, "", min_max_ranges.elements);
  }
}


monty@mysql.com's avatar
monty@mysql.com committed
10434
#endif /* NOT_USED */
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10435 10436

/*****************************************************************************
10437
** Instantiate templates
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10438 10439
*****************************************************************************/

10440
#ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
bk@work.mysql.com's avatar
bk@work.mysql.com committed
10441 10442 10443
template class List<QUICK_RANGE>;
template class List_iterator<QUICK_RANGE>;
#endif