sql_partition.cc 236 KB
Newer Older
1
/* Copyright (C) 2005, 2006 MySQL AB
2 3 4

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6 7 8 9 10 11 12 13 14 15 16

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/*
17
  This file is a container for general functionality related
18
  to partitioning introduced in MySQL version 5.1. It contains functionality
19 20
  used by all handlers that support partitioning, such as
  the partitioning handler itself and the NDB handler.
21

patg@krsna.patg.net's avatar
patg@krsna.patg.net committed
22
  The first version was written by Mikael Ronstrom.
23 24 25 26 27

  This version supports RANGE partitioning, LIST partitioning, HASH
  partitioning and composite partitioning (hereafter called subpartitioning)
  where each RANGE/LIST partitioning is HASH partitioned. The hash function
  can either be supplied by the user or by only a list of fields (also
28
  called KEY partitioning), where the MySQL server will use an internal
29 30 31 32 33 34
  hash function.
  There are quite a few defaults that can be used as well.
*/

/* Some general useful functions */

35
#define MYSQL_LEX 1
36 37 38 39 40
#include "mysql_priv.h"
#include <errno.h>
#include <m_ctype.h>
#include "md5.h"

41
#ifdef WITH_PARTITION_STORAGE_ENGINE
kent@mysql.com's avatar
kent@mysql.com committed
42
#include "ha_partition.h"
43 44 45
/*
  Partition related functions declarations and some static constants;
*/
46 47
const LEX_STRING partition_keywords[]=
{
andrey@example.com's avatar
andrey@example.com committed
48 49 50 51 52 53
  { C_STRING_WITH_LEN("HASH") },
  { C_STRING_WITH_LEN("RANGE") },
  { C_STRING_WITH_LEN("LIST") }, 
  { C_STRING_WITH_LEN("KEY") },
  { C_STRING_WITH_LEN("MAXVALUE") },
  { C_STRING_WITH_LEN("LINEAR ") }
54
};
55 56 57 58 59 60 61 62
static const char *part_str= "PARTITION";
static const char *sub_str= "SUB";
static const char *by_str= "BY";
static const char *space_str= " ";
static const char *equal_str= "=";
static const char *end_paren_str= ")";
static const char *begin_paren_str= "(";
static const char *comma_str= ",";
63

64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
static int get_part_id_charset_func_all(partition_info *part_info,
                                        uint32 *part_id,
                                        longlong *func_value);
static int get_part_id_charset_func_part(partition_info *part_info,
                                         uint32 *part_id,
                                         longlong *func_value);
static int get_part_id_charset_func_subpart(partition_info *part_info,
                                            uint32 *part_id,
                                            longlong *func_value);
static int get_part_part_id_charset_func(partition_info *part_info,
                                         uint32 *part_id,
                                         longlong *func_value);
static uint32 get_subpart_id_charset_func(partition_info *part_info);
int get_partition_id_list(partition_info *part_info,
                          uint32 *part_id,
                          longlong *func_value);
80
int get_partition_id_list(partition_info *part_info,
81 82 83
                          uint32 *part_id,
                          longlong *func_value);
int get_partition_id_range(partition_info *part_info,
84 85
                           uint32 *part_id,
                           longlong *func_value);
86
int get_partition_id_hash_nosub(partition_info *part_info,
87 88
                                uint32 *part_id,
                                longlong *func_value);
89 90 91
int get_partition_id_key_nosub(partition_info *part_info,
                               uint32 *part_id,
                               longlong *func_value);
92
int get_partition_id_linear_hash_nosub(partition_info *part_info,
93 94
                                       uint32 *part_id,
                                       longlong *func_value);
95 96 97
int get_partition_id_linear_key_nosub(partition_info *part_info,
                                      uint32 *part_id,
                                      longlong *func_value);
98
int get_partition_id_range_sub_hash(partition_info *part_info,
99 100
                                    uint32 *part_id,
                                    longlong *func_value);
101 102 103
int get_partition_id_range_sub_key(partition_info *part_info,
                                   uint32 *part_id,
                                   longlong *func_value);
104
int get_partition_id_range_sub_linear_hash(partition_info *part_info,
105 106
                                           uint32 *part_id,
                                           longlong *func_value);
107 108 109
int get_partition_id_range_sub_linear_key(partition_info *part_info,
                                          uint32 *part_id,
                                          longlong *func_value);
110
int get_partition_id_list_sub_hash(partition_info *part_info,
111 112
                                   uint32 *part_id,
                                   longlong *func_value);
113 114 115
int get_partition_id_list_sub_key(partition_info *part_info,
                                  uint32 *part_id,
                                  longlong *func_value);
116
int get_partition_id_list_sub_linear_hash(partition_info *part_info,
117 118
                                          uint32 *part_id,
                                          longlong *func_value);
119 120 121
int get_partition_id_list_sub_linear_key(partition_info *part_info,
                                         uint32 *part_id,
                                         longlong *func_value);
122 123 124 125
uint32 get_partition_id_hash_sub(partition_info *part_info); 
uint32 get_partition_id_key_sub(partition_info *part_info); 
uint32 get_partition_id_linear_hash_sub(partition_info *part_info); 
uint32 get_partition_id_linear_key_sub(partition_info *part_info); 
126 127
#endif

128 129 130 131 132 133
static uint32 get_next_partition_via_walking(PARTITION_ITERATOR*);
static uint32 get_next_subpartition_via_walking(PARTITION_ITERATOR*);
uint32 get_next_partition_id_range(PARTITION_ITERATOR* part_iter);
uint32 get_next_partition_id_list(PARTITION_ITERATOR* part_iter);
int get_part_iter_for_interval_via_mapping(partition_info *part_info,
                                           bool is_subpart,
134
                                           char *min_value, char *max_value,
135 136 137 138
                                           uint flags,
                                           PARTITION_ITERATOR *part_iter);
int get_part_iter_for_interval_via_walking(partition_info *part_info,
                                           bool is_subpart,
139
                                           char *min_value, char *max_value,
140 141 142
                                           uint flags,
                                           PARTITION_ITERATOR *part_iter);
static void set_up_range_analysis_info(partition_info *part_info);
143

144
#ifdef WITH_PARTITION_STORAGE_ENGINE
145
/*
146 147
  A support function to check if a name is in a list of strings

148
  SYNOPSIS
149 150 151 152
    is_name_in_list()
    name               String searched for
    list_names         A list of names searched in

153 154 155 156 157
  RETURN VALUES
    TRUE               String found
    FALSE              String not found
*/

158 159
bool is_name_in_list(char *name,
                          List<char> list_names)
160
{
161 162
  List_iterator<char> names_it(list_names);
  uint no_names= list_names.elements;
163
  uint i= 0;
164

165 166
  do
  {
167 168
    char *list_name= names_it++;
    if (!(my_strcasecmp(system_charset_info, name, list_name)))
169 170 171 172 173 174
      return TRUE;
  } while (++i < no_names);
  return FALSE;
}


175 176 177 178 179 180 181 182

/*
  Set-up defaults for partitions. 

  SYNOPSIS
    partition_default_handling()
    table                         Table object
    part_info                     Partition info to set up
183 184
    is_create_table_ind           Is this part of a table creation
    normalized_path               Normalized path name of table and database
185 186 187 188 189 190

  RETURN VALUES
    TRUE                          Error
    FALSE                         Success
*/

191
bool partition_default_handling(TABLE *table, partition_info *part_info,
192
                                bool is_create_table_ind,
193
                                const char *normalized_path)
194 195 196 197 198
{
  DBUG_ENTER("partition_default_handling");

  if (part_info->use_default_no_partitions)
  {
199 200
    if (!is_create_table_ind &&
        table->file->get_no_parts(normalized_path, &part_info->no_parts))
201 202 203 204
    {
      DBUG_RETURN(TRUE);
    }
  }
205
  else if (part_info->is_sub_partitioned() &&
206 207 208
           part_info->use_default_no_subpartitions)
  {
    uint no_parts;
209 210
    if (!is_create_table_ind &&
        (table->file->get_no_parts(normalized_path, &no_parts)))
211 212 213 214 215 216 217
    {
      DBUG_RETURN(TRUE);
    }
    DBUG_ASSERT(part_info->no_parts > 0);
    part_info->no_subparts= no_parts / part_info->no_parts;
    DBUG_ASSERT((no_parts % part_info->no_parts) == 0);
  }
218 219
  part_info->set_up_defaults_for_partitioning(table->file,
                                              (ulonglong)0, (uint)0);
220 221 222 223
  DBUG_RETURN(FALSE);
}


224 225 226 227 228 229 230
/*
  Check that the reorganized table will not have duplicate partitions.

  SYNOPSIS
    check_reorganise_list()
    new_part_info      New partition info
    old_part_info      Old partition info
231 232
    list_part_names    The list of partition names that will go away and
                       can be reused in the new table.
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269

  RETURN VALUES
    TRUE               Inacceptable name conflict detected.
    FALSE              New names are OK.

  DESCRIPTION
    Can handle that the 'new_part_info' and 'old_part_info' the same
    in which case it checks that the list of names in the partitions
    doesn't contain any duplicated names.
*/

bool check_reorganise_list(partition_info *new_part_info,
                           partition_info *old_part_info,
                           List<char> list_part_names)
{
  uint new_count, old_count;
  uint no_new_parts= new_part_info->partitions.elements;
  uint no_old_parts= old_part_info->partitions.elements;
  List_iterator<partition_element> new_parts_it(new_part_info->partitions);
  bool same_part_info= (new_part_info == old_part_info);
  DBUG_ENTER("check_reorganise_list");

  new_count= 0;
  do
  {
    List_iterator<partition_element> old_parts_it(old_part_info->partitions);
    char *new_name= (new_parts_it++)->partition_name;
    new_count++;
    old_count= 0;
    do
    {
      char *old_name= (old_parts_it++)->partition_name;
      old_count++;
      if (same_part_info && old_count == new_count)
        break;
      if (!(my_strcasecmp(system_charset_info, old_name, new_name)))
      {
270
        if (!is_name_in_list(old_name, list_part_names))
271 272 273 274 275 276 277 278
          DBUG_RETURN(TRUE);
      }
    } while (old_count < no_old_parts);
  } while (new_count < no_new_parts);
  DBUG_RETURN(FALSE);
}


279 280 281
/*
  A useful routine used by update_row for partition handlers to calculate
  the partition ids of the old and the new record.
282

283 284 285 286 287 288
  SYNOPSIS
    get_part_for_update()
    old_data                Buffer of old record
    new_data                Buffer of new record
    rec0                    Reference to table->record[0]
    part_info               Reference to partition information
289 290 291
    out:old_part_id         The returned partition id of old record 
    out:new_part_id         The returned partition id of new record

292 293 294 295 296 297 298
  RETURN VALUE
    0                       Success
    > 0                     Error code
*/

int get_parts_for_update(const byte *old_data, byte *new_data,
                         const byte *rec0, partition_info *part_info,
299 300
                         uint32 *old_part_id, uint32 *new_part_id,
                         longlong *new_func_value)
301 302 303
{
  Field **part_field_array= part_info->full_part_field_array;
  int error;
304
  longlong old_func_value;
305 306
  DBUG_ENTER("get_parts_for_update");

307
  DBUG_ASSERT(new_data == rec0);
308
  set_field_ptr(part_field_array, old_data, rec0);
309 310
  error= part_info->get_partition_id(part_info, old_part_id,
                                     &old_func_value);
311 312 313 314 315 316 317 318 319 320
  set_field_ptr(part_field_array, rec0, old_data);
  if (unlikely(error))                             // Should never happen
  {
    DBUG_ASSERT(0);
    DBUG_RETURN(error);
  }
#ifdef NOT_NEEDED
  if (new_data == rec0)
#endif
  {
321 322 323
    if (unlikely(error= part_info->get_partition_id(part_info,
                                                    new_part_id,
                                                    new_func_value)))
324 325 326 327 328 329 330 331 332 333 334 335 336
    {
      DBUG_RETURN(error);
    }
  }
#ifdef NOT_NEEDED
  else
  {
    /*
      This branch should never execute but it is written anyways for
      future use. It will be tested by ensuring that the above
      condition is false in one test situation before pushing the code.
    */
    set_field_ptr(part_field_array, new_data, rec0);
337 338
    error= part_info->get_partition_id(part_info, new_part_id,
                                       new_func_value);
339 340 341 342 343 344 345 346 347 348 349 350 351 352
    set_field_ptr(part_field_array, rec0, new_data);
    if (unlikely(error))
    {
      DBUG_RETURN(error);
    }
  }
#endif
  DBUG_RETURN(0);
}


/*
  A useful routine used by delete_row for partition handlers to calculate
  the partition id.
353

354 355 356 357 358
  SYNOPSIS
    get_part_for_delete()
    buf                     Buffer of old record
    rec0                    Reference to table->record[0]
    part_info               Reference to partition information
359 360
    out:part_id             The returned partition id to delete from

361 362 363
  RETURN VALUE
    0                       Success
    > 0                     Error code
364

365 366 367 368 369 370 371 372 373 374
  DESCRIPTION
    Dependent on whether buf is not record[0] we need to prepare the
    fields. Then we call the function pointer get_partition_id to
    calculate the partition id.
*/

int get_part_for_delete(const byte *buf, const byte *rec0,
                        partition_info *part_info, uint32 *part_id)
{
  int error;
375
  longlong func_value;
376 377 378 379
  DBUG_ENTER("get_part_for_delete");

  if (likely(buf == rec0))
  {
380 381
    if (unlikely((error= part_info->get_partition_id(part_info, part_id,
                                                     &func_value))))
382 383 384 385 386 387 388 389 390
    {
      DBUG_RETURN(error);
    }
    DBUG_PRINT("info", ("Delete from partition %d", *part_id));
  }
  else
  {
    Field **part_field_array= part_info->full_part_field_array;
    set_field_ptr(part_field_array, buf, rec0);
391
    error= part_info->get_partition_id(part_info, part_id, &func_value);
392 393 394 395 396 397 398 399 400 401 402 403
    set_field_ptr(part_field_array, rec0, buf);
    if (unlikely(error))
    {
      DBUG_RETURN(error);
    }
    DBUG_PRINT("info", ("Delete from partition %d (path2)", *part_id));
  }
  DBUG_RETURN(0);
}


/*
404 405 406
  This method is used to set-up both partition and subpartitioning
  field array and used for all types of partitioning.
  It is part of the logic around fix_partition_func.
407 408 409 410 411

  SYNOPSIS
    set_up_field_array()
    table                TABLE object for which partition fields are set-up
    sub_part             Is the table subpartitioned as well
412

413 414 415
  RETURN VALUE
    TRUE                 Error, some field didn't meet requirements
    FALSE                Ok, partition field array set-up
416

417
  DESCRIPTION
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442

    A great number of functions below here is part of the fix_partition_func
    method. It is used to set up the partition structures for execution from
    openfrm. It is called at the end of the openfrm when the table struct has
    been set-up apart from the partition information.
    It involves:
    1) Setting arrays of fields for the partition functions.
    2) Setting up binary search array for LIST partitioning
    3) Setting up array for binary search for RANGE partitioning
    4) Setting up key_map's to assist in quick evaluation whether one
       can deduce anything from a given index of what partition to use
    5) Checking whether a set of partitions can be derived from a range on
       a field in the partition function.
    As part of doing this there is also a great number of error controls.
    This is actually the place where most of the things are checked for
    partition information when creating a table.
    Things that are checked includes
    1) All fields of partition function in Primary keys and unique indexes
       (if not supported)


    Create an array of partition fields (NULL terminated). Before this method
    is called fix_fields or find_table_in_sef has been called to set
    GET_FIXED_FIELDS_FLAG on all fields that are part of the partition
    function.
443
*/
444

445
static bool set_up_field_array(TABLE *table,
446
                              bool is_sub_part)
447 448
{
  Field **ptr, *field, **field_array;
449 450 451
  uint no_fields= 0;
  uint size_field_array;
  uint i= 0;
452
  partition_info *part_info= table->part_info;
453 454 455 456 457 458 459 460 461
  int result= FALSE;
  DBUG_ENTER("set_up_field_array");

  ptr= table->field;
  while ((field= *(ptr++))) 
  {
    if (field->flags & GET_FIXED_FIELDS_FLAG)
      no_fields++;
  }
462 463 464 465 466 467 468 469
  if (no_fields == 0)
  {
    /*
      We are using hidden key as partitioning field
    */
    DBUG_ASSERT(!is_sub_part);
    DBUG_RETURN(result);
  }
470 471 472 473
  size_field_array= (no_fields+1)*sizeof(Field*);
  field_array= (Field**)sql_alloc(size_field_array);
  if (unlikely(!field_array))
  {
474
    mem_alloc_error(size_field_array);
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
    result= TRUE;
  }
  ptr= table->field;
  while ((field= *(ptr++))) 
  {
    if (field->flags & GET_FIXED_FIELDS_FLAG)
    {
      field->flags&= ~GET_FIXED_FIELDS_FLAG;
      field->flags|= FIELD_IN_PART_FUNC_FLAG;
      if (likely(!result))
      {
        field_array[i++]= field;

        /*
          We check that the fields are proper. It is required for each
          field in a partition function to:
          1) Not be a BLOB of any type
            A BLOB takes too long time to evaluate so we don't want it for
            performance reasons.
        */

        if (unlikely(field->flags & BLOB_FLAG))
        {
          my_error(ER_BLOB_FIELD_IN_PART_FUNC_ERROR, MYF(0));
          result= TRUE;
        }
      }
    }
  }
  field_array[no_fields]= 0;
505
  if (!is_sub_part)
506 507 508 509 510 511 512 513 514 515 516 517 518
  {
    part_info->part_field_array= field_array;
    part_info->no_part_fields= no_fields;
  }
  else
  {
    part_info->subpart_field_array= field_array;
    part_info->no_subpart_fields= no_fields;
  }
  DBUG_RETURN(result);
}


519

520 521 522
/*
  Create a field array including all fields of both the partitioning and the
  subpartitioning functions.
523

524 525 526 527
  SYNOPSIS
    create_full_part_field_array()
    table                TABLE object for which partition fields are set-up
    part_info            Reference to partitioning data structure
528

529 530 531
  RETURN VALUE
    TRUE                 Memory allocation of field array failed
    FALSE                Ok
532

533 534 535 536 537 538 539 540 541 542 543
  DESCRIPTION
    If there is no subpartitioning then the same array is used as for the
    partitioning. Otherwise a new array is built up using the flag
    FIELD_IN_PART_FUNC in the field object.
    This function is called from fix_partition_func
*/

static bool create_full_part_field_array(TABLE *table,
                                         partition_info *part_info)
{
  bool result= FALSE;
544
  Field **ptr;
545 546
  DBUG_ENTER("create_full_part_field_array");

547
  if (!part_info->is_sub_partitioned())
548 549 550 551 552 553
  {
    part_info->full_part_field_array= part_info->part_field_array;
    part_info->no_full_part_fields= part_info->no_part_fields;
  }
  else
  {
554
    Field *field, **field_array;
555 556 557 558 559 560 561 562 563 564 565
    uint no_part_fields=0, size_field_array;
    ptr= table->field;
    while ((field= *(ptr++)))
    {
      if (field->flags & FIELD_IN_PART_FUNC_FLAG)
        no_part_fields++;
    }
    size_field_array= (no_part_fields+1)*sizeof(Field*);
    field_array= (Field**)sql_alloc(size_field_array);
    if (unlikely(!field_array))
    {
566
      mem_alloc_error(size_field_array);
567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
      result= TRUE;
      goto end;
    }
    no_part_fields= 0;
    ptr= table->field;
    while ((field= *(ptr++)))
    {
      if (field->flags & FIELD_IN_PART_FUNC_FLAG)
        field_array[no_part_fields++]= field;
    }
    field_array[no_part_fields]=0;
    part_info->full_part_field_array= field_array;
    part_info->no_full_part_fields= no_part_fields;
  }
end:
  DBUG_RETURN(result);
}


/*

  Clear flag GET_FIXED_FIELDS_FLAG in all fields of a key previously set by
  set_indicator_in_key_fields (always used in pairs).
590

591 592 593
  SYNOPSIS
    clear_indicator_in_key_fields()
    key_info                  Reference to find the key fields
594 595 596 597 598 599 600 601 602 603 604 605

  RETURN VALUE
    NONE

  DESCRIPTION
    These support routines is used to set/reset an indicator of all fields
    in a certain key. It is used in conjunction with another support routine
    that traverse all fields in the PF to find if all or some fields in the
    PF is part of the key. This is used to check primary keys and unique
    keys involve all fields in PF (unless supported) and to derive the
    key_map's used to quickly decide whether the index can be used to
    derive which partitions are needed to scan.
606 607 608 609 610 611 612 613 614 615 616 617 618
*/

static void clear_indicator_in_key_fields(KEY *key_info)
{
  KEY_PART_INFO *key_part;
  uint key_parts= key_info->key_parts, i;
  for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++)
    key_part->field->flags&= (~GET_FIXED_FIELDS_FLAG);
}


/*
  Set flag GET_FIXED_FIELDS_FLAG in all fields of a key.
619

620 621 622
  SYNOPSIS
    set_indicator_in_key_fields
    key_info                  Reference to find the key fields
623 624 625

  RETURN VALUE
    NONE
626 627 628 629 630 631 632 633 634 635 636 637 638 639
*/

static void set_indicator_in_key_fields(KEY *key_info)
{
  KEY_PART_INFO *key_part;
  uint key_parts= key_info->key_parts, i;
  for (i= 0, key_part=key_info->key_part; i < key_parts; i++, key_part++)
    key_part->field->flags|= GET_FIXED_FIELDS_FLAG;
}


/*
  Check if all or some fields in partition field array is part of a key
  previously used to tag key fields.
640

641 642 643
  SYNOPSIS
    check_fields_in_PF()
    ptr                  Partition field array
644 645 646
    out:all_fields       Is all fields of partition field array used in key
    out:some_fields      Is some fields of partition field array used in key

647 648 649 650 651 652 653 654
  RETURN VALUE
    all_fields, some_fields
*/

static void check_fields_in_PF(Field **ptr, bool *all_fields,
                               bool *some_fields)
{
  DBUG_ENTER("check_fields_in_PF");
655

656 657
  *all_fields= TRUE;
  *some_fields= FALSE;
658 659 660 661 662
  if ((!ptr) || !(*ptr))
  {
    *all_fields= FALSE;
    DBUG_VOID_RETURN;
  }
663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
  do
  {
  /* Check if the field of the PF is part of the current key investigated */
    if ((*ptr)->flags & GET_FIXED_FIELDS_FLAG)
      *some_fields= TRUE; 
    else
      *all_fields= FALSE;
  } while (*(++ptr));
  DBUG_VOID_RETURN;
}


/*
  Clear flag GET_FIXED_FIELDS_FLAG in all fields of the table.
  This routine is used for error handling purposes.
678

679 680 681
  SYNOPSIS
    clear_field_flag()
    table                TABLE object for which partition fields are set-up
682 683 684

  RETURN VALUE
    NONE
685 686 687 688 689 690 691 692 693 694 695 696 697 698
*/

static void clear_field_flag(TABLE *table)
{
  Field **ptr;
  DBUG_ENTER("clear_field_flag");

  for (ptr= table->field; *ptr; ptr++)
    (*ptr)->flags&= (~GET_FIXED_FIELDS_FLAG);
  DBUG_VOID_RETURN;
}


/*
699 700 701
  find_field_in_table_sef finds the field given its name. All fields get
  GET_FIXED_FIELDS_FLAG set.

702 703 704 705 706 707
  SYNOPSIS
    handle_list_of_fields()
    it                   A list of field names for the partition function
    table                TABLE object for which partition fields are set-up
    part_info            Reference to partitioning data structure
    sub_part             Is the table subpartitioned as well
708

709 710 711
  RETURN VALUE
    TRUE                 Fields in list of fields not part of table
    FALSE                All fields ok and array created
712

713
  DESCRIPTION
714 715 716 717
    This routine sets-up the partition field array for KEY partitioning, it
    also verifies that all fields in the list of fields is actually a part of
    the table.

718 719
*/

720

721 722 723
static bool handle_list_of_fields(List_iterator<char> it,
                                  TABLE *table,
                                  partition_info *part_info,
724
                                  bool is_sub_part)
725 726 727 728
{
  Field *field;
  bool result;
  char *field_name;
729
  bool is_list_empty= TRUE;
730 731 732 733
  DBUG_ENTER("handle_list_of_fields");

  while ((field_name= it++))
  {
734
    is_list_empty= FALSE;
735 736 737 738 739 740 741 742 743 744 745
    field= find_field_in_table_sef(table, field_name);
    if (likely(field != 0))
      field->flags|= GET_FIXED_FIELDS_FLAG;
    else
    {
      my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0));
      clear_field_flag(table);
      result= TRUE;
      goto end;
    }
  }
746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
  if (is_list_empty)
  {
    uint primary_key= table->s->primary_key;
    if (primary_key != MAX_KEY)
    {
      uint no_key_parts= table->key_info[primary_key].key_parts, i;
      /*
        In the case of an empty list we use primary key as partition key.
      */
      for (i= 0; i < no_key_parts; i++)
      {
        Field *field= table->key_info[primary_key].key_part[i].field;
        field->flags|= GET_FIXED_FIELDS_FLAG;
      }
    }
    else
    {
antony@ppcg5.local's avatar
antony@ppcg5.local committed
763 764 765
      if (table->s->db_type()->partition_flags &&
          (table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION) &&
          (table->s->db_type()->partition_flags() & HA_CAN_PARTITION))
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782
      {
        /*
          This engine can handle automatic partitioning and there is no
          primary key. In this case we rely on that the engine handles
          partitioning based on a hidden key. Thus we allocate no
          array for partitioning fields.
        */
        DBUG_RETURN(FALSE);
      }
      else
      {
        my_error(ER_FIELD_NOT_FOUND_PART_ERROR, MYF(0));
        DBUG_RETURN(TRUE);
      }
    }
  }
  result= set_up_field_array(table, is_sub_part);
783 784 785 786 787
end:
  DBUG_RETURN(result);
}


788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814
/*
  Support function to check if all VALUES * (expression) is of the
  right sign (no signed constants when unsigned partition function)

  SYNOPSIS
    check_signed_flag()
    part_info                Partition info object

  RETURN VALUES
    0                        No errors due to sign errors
    >0                       Sign error
*/

int check_signed_flag(partition_info *part_info)
{
  int error= 0;
  uint i= 0;
  if (part_info->part_type != HASH_PARTITION &&
      part_info->part_expr->unsigned_flag)
  {
    List_iterator<partition_element> part_it(part_info->partitions);
    do
    {
      partition_element *part_elem= part_it++;

      if (part_elem->signed_flag)
      {
815 816
        my_error(ER_PARTITION_CONST_DOMAIN_ERROR, MYF(0));
        error= ER_PARTITION_CONST_DOMAIN_ERROR;
817 818 819 820 821 822 823 824
        break;
      }
    } while (++i < part_info->no_parts);
  }
  return error;
}


825
/*
826 827 828 829 830
  The function uses a new feature in fix_fields where the flag 
  GET_FIXED_FIELDS_FLAG is set for all fields in the item tree.
  This field must always be reset before returning from the function
  since it is used for other purposes as well.

831 832 833 834
  SYNOPSIS
    fix_fields_part_func()
    thd                  The thread object
    func_expr            The item tree reference of the partition function
835
    table                The table object
836
    part_info            Reference to partitioning data structure
837 838
    is_sub_part          Is the table subpartitioned as well
    is_field_to_be_setup Flag if we are to set-up field arrays
839

840 841 842 843
  RETURN VALUE
    TRUE                 An error occurred, something was wrong with the
                         partition function.
    FALSE                Ok, a partition field array was created
844

845
  DESCRIPTION
846 847 848 849 850 851
    This function is used to build an array of partition fields for the
    partitioning function and subpartitioning function. The partitioning
    function is an item tree that must reference at least one field in the
    table. This is checked first in the parser that the function doesn't
    contain non-cacheable parts (like a random function) and by checking
    here that the function isn't a constant function.
852 853 854 855 856 857 858

    Calculate the number of fields in the partition function.
    Use it allocate memory for array of Field pointers.
    Initialise array of field pointers. Use information set when
    calling fix_fields and reset it immediately after.
    The get_fields_in_item_tree activates setting of bit in flags
    on the field object.
859
*/
860

861 862
bool fix_fields_part_func(THD *thd, Item* func_expr, TABLE *table,
                          bool is_sub_part, bool is_field_to_be_setup)
863
{
864 865
  partition_info *part_info= table->part_info;
  uint dir_length, home_dir_length;
866
  bool result= TRUE;
867
  TABLE_LIST tables;
868
  TABLE_LIST *save_table_list, *save_first_table, *save_last_table;
869
  int error;
870
  Name_resolution_context *context;
871
  const char *save_where;
872 873
  char* db_name;
  char db_name_string[FN_REFLEN];
874 875
  DBUG_ENTER("fix_fields_part_func");

876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900
  if (part_info->fixed)
  {
    if (!(is_sub_part || (error= check_signed_flag(part_info))))
      result= FALSE;
    goto end;
  }

  /*
    Set-up the TABLE_LIST object to be a list with a single table
    Set the object to zero to create NULL pointers and set alias
    and real name to table name and get database name from file name.
  */

  bzero((void*)&tables, sizeof(TABLE_LIST));
  tables.alias= tables.table_name= (char*) table->s->table_name.str;
  tables.table= table;
  tables.next_local= 0;
  tables.next_name_resolution_table= 0;
  strmov(db_name_string, table->s->normalized_path.str);
  dir_length= dirname_length(db_name_string);
  db_name_string[dir_length - 1]= 0;
  home_dir_length= dirname_length(db_name_string);
  db_name= &db_name_string[home_dir_length];
  tables.db= db_name;

901
  context= thd->lex->current_context();
902 903
  table->map= 1; //To ensure correct calculation of const item
  table->get_fields_in_item_tree= TRUE;
904 905 906
  save_table_list= context->table_list;
  save_first_table= context->first_name_resolution_table;
  save_last_table= context->last_name_resolution_table;
907 908
  context->table_list= &tables;
  context->first_name_resolution_table= &tables;
909
  context->last_name_resolution_table= NULL;
910
  func_expr->walk(&Item::change_context_processor, 0, (byte*) context);
911
  save_where= thd->where;
912
  thd->where= "partition function";
913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929
  /*
    In execution we must avoid the use of thd->change_item_tree since
    we might release memory before statement is completed. We do this
    by temporarily setting the stmt_arena->mem_root to be the mem_root
    of the table object, this also ensures that any memory allocated
    during fix_fields will not be released at end of execution of this
    statement. Thus the item tree will remain valid also in subsequent
    executions of this table object. We do however not at the moment
    support allocations during execution of val_int so any item class
    that does this during val_int must be disallowed as partition
    function.
    SEE Bug #21658
  */
  /*
    This is a tricky call to prepare for since it can have a large number
    of interesting side effects, both desirable and undesirable.
  */
930
  error= func_expr->fix_fields(thd, (Item**)0);
931

932 933 934
  context->table_list= save_table_list;
  context->first_name_resolution_table= save_first_table;
  context->last_name_resolution_table= save_last_table;
935 936 937
  if (unlikely(error))
  {
    DBUG_PRINT("info", ("Field in partition function not part of table"));
938 939
    if (is_field_to_be_setup)
      clear_field_flag(table);
940 941
    goto end;
  }
942
  thd->where= save_where;
943 944 945 946 947 948
  if (unlikely(func_expr->const_item()))
  {
    my_error(ER_CONST_EXPR_IN_PARTITION_FUNC_ERROR, MYF(0));
    clear_field_flag(table);
    goto end;
  }
949 950 951 952 953 954 955
  if ((!is_sub_part) && (error= check_signed_flag(part_info)))
    goto end;
  result= FALSE;
  if (is_field_to_be_setup)
    result= set_up_field_array(table, is_sub_part);
  if (!is_sub_part)
    part_info->fixed= TRUE;
956 957 958 959 960 961 962 963
end:
  table->get_fields_in_item_tree= FALSE;
  table->map= 0; //Restore old value
  DBUG_RETURN(result);
}


/*
964 965
  Check that the primary key contains all partition fields if defined

966 967 968
  SYNOPSIS
    check_primary_key()
    table                TABLE object for which partition fields are set-up
969

970 971 972 973 974
  RETURN VALUES
    TRUE                 Not all fields in partitioning function was part
                         of primary key
    FALSE                Ok, all fields of partitioning function were part
                         of primary key
975 976 977 978 979 980

  DESCRIPTION
    This function verifies that if there is a primary key that it contains
    all the fields of the partition function.
    This is a temporary limitation that will hopefully be removed after a
    while.
981 982 983 984 985
*/

static bool check_primary_key(TABLE *table)
{
  uint primary_key= table->s->primary_key;
986 987
  bool all_fields, some_fields;
  bool result= FALSE;
988 989 990 991 992
  DBUG_ENTER("check_primary_key");

  if (primary_key < MAX_KEY)
  {
    set_indicator_in_key_fields(table->key_info+primary_key);
993
    check_fields_in_PF(table->part_info->full_part_field_array,
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
                        &all_fields, &some_fields);
    clear_indicator_in_key_fields(table->key_info+primary_key);
    if (unlikely(!all_fields))
    {
      my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"PRIMARY KEY");
      result= TRUE;
    }
  }
  DBUG_RETURN(result);
}


/*
1007 1008
  Check that unique keys contains all partition fields

1009 1010 1011
  SYNOPSIS
    check_unique_keys()
    table                TABLE object for which partition fields are set-up
1012

1013 1014 1015 1016 1017
  RETURN VALUES
    TRUE                 Not all fields in partitioning function was part
                         of all unique keys
    FALSE                Ok, all fields of partitioning function were part
                         of unique keys
1018 1019 1020 1021 1022 1023

  DESCRIPTION
    This function verifies that if there is a unique index that it contains
    all the fields of the partition function.
    This is a temporary limitation that will hopefully be removed after a
    while.
1024 1025 1026 1027
*/

static bool check_unique_keys(TABLE *table)
{
1028 1029 1030 1031
  bool all_fields, some_fields;
  bool result= FALSE;
  uint keys= table->s->keys;
  uint i;
1032
  DBUG_ENTER("check_unique_keys");
1033

1034 1035 1036 1037 1038
  for (i= 0; i < keys; i++)
  {
    if (table->key_info[i].flags & HA_NOSAME) //Unique index
    {
      set_indicator_in_key_fields(table->key_info+i);
1039
      check_fields_in_PF(table->part_info->full_part_field_array,
1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
                         &all_fields, &some_fields);
      clear_indicator_in_key_fields(table->key_info+i);
      if (unlikely(!all_fields))
      {
        my_error(ER_UNIQUE_KEY_NEED_ALL_FIELDS_IN_PF,MYF(0),"UNIQUE INDEX");
        result= TRUE;
        break;
      }
    }
  }
  DBUG_RETURN(result);
}


/*
  An important optimisation is whether a range on a field can select a subset
  of the partitions.
  A prerequisite for this to happen is that the PF is a growing function OR
  a shrinking function.
  This can never happen for a multi-dimensional PF. Thus this can only happen
  with PF with at most one field involved in the PF.
  The idea is that if the function is a growing function and you know that
  the field of the PF is 4 <= A <= 6 then we can convert this to a range
  in the PF instead by setting the range to PF(4) <= PF(A) <= PF(6). In the
  case of RANGE PARTITIONING and LIST PARTITIONING this can be used to
  calculate a set of partitions rather than scanning all of them.
  Thus the following prerequisites are there to check if sets of partitions
  can be found.
  1) Only possible for RANGE and LIST partitioning (not for subpartitioning)
  2) Only possible if PF only contains 1 field
  3) Possible if PF is a growing function of the field
  4) Possible if PF is a shrinking function of the field
  OBSERVATION:
  1) IF f1(A) is a growing function AND f2(A) is a growing function THEN
     f1(A) + f2(A) is a growing function
     f1(A) * f2(A) is a growing function if f1(A) >= 0 and f2(A) >= 0
  2) IF f1(A) is a growing function and f2(A) is a shrinking function THEN
     f1(A) / f2(A) is a growing function if f1(A) >= 0 and f2(A) > 0
  3) IF A is a growing function then a function f(A) that removes the
     least significant portion of A is a growing function
     E.g. DATE(datetime) is a growing function
     MONTH(datetime) is not a growing/shrinking function
  4) IF f1(A) is a growing function and f2(A) is a growing function THEN
     f1(f2(A)) and f2(f1(A)) are also growing functions
  5) IF f1(A) is a shrinking function and f2(A) is a growing function THEN
     f1(f2(A)) is a shrinking function and f2(f1(A)) is a shrinking function
  6) f1(A) = A is a growing function
  7) f1(A) = A*a + b (where a and b are constants) is a growing function

  By analysing the item tree of the PF we can use these deducements and
  derive whether the PF is a growing function or a shrinking function or
  neither of it.

  If the PF is range capable then a flag is set on the table object
  indicating this to notify that we can use also ranges on the field
  of the PF to deduce a set of partitions if the fields of the PF were
  not all fully bound.
1097

1098 1099 1100
  SYNOPSIS
    check_range_capable_PF()
    table                TABLE object for which partition fields are set-up
1101

1102 1103 1104 1105 1106 1107 1108
  DESCRIPTION
    Support for this is not implemented yet.
*/

void check_range_capable_PF(TABLE *table)
{
  DBUG_ENTER("check_range_capable_PF");
1109

1110 1111 1112 1113
  DBUG_VOID_RETURN;
}


1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
/*
  Set up partition bitmap

  SYNOPSIS
    set_up_partition_bitmap()
    thd                  Thread object
    part_info            Reference to partitioning data structure

  RETURN VALUE
    TRUE                 Memory allocation failure
    FALSE                Success

  DESCRIPTION
    Allocate memory for bitmap of the partitioned table
    and initialise it.
*/

static bool set_up_partition_bitmap(THD *thd, partition_info *part_info)
{
  uint32 *bitmap_buf;
  uint bitmap_bits= part_info->no_subparts? 
                     (part_info->no_subparts* part_info->no_parts):
                      part_info->no_parts;
  uint bitmap_bytes= bitmap_buffer_size(bitmap_bits);
  DBUG_ENTER("set_up_partition_bitmap");

  if (!(bitmap_buf= (uint32*)thd->alloc(bitmap_bytes)))
  {
    mem_alloc_error(bitmap_bytes);
    DBUG_RETURN(TRUE);
  }
  bitmap_init(&part_info->used_partitions, bitmap_buf, bitmap_bytes*8, FALSE);
1146
  bitmap_set_all(&part_info->used_partitions);
1147 1148 1149 1150
  DBUG_RETURN(FALSE);
}


1151 1152
/*
  Set up partition key maps
1153

1154 1155 1156 1157
  SYNOPSIS
    set_up_partition_key_maps()
    table                TABLE object for which partition fields are set-up
    part_info            Reference to partitioning data structure
1158

1159 1160
  RETURN VALUES
    None
1161

1162
  DESCRIPTION
1163 1164 1165 1166 1167 1168 1169 1170 1171 1172
    This function sets up a couple of key maps to be able to quickly check
    if an index ever can be used to deduce the partition fields or even
    a part of the fields of the  partition function.
    We set up the following key_map's.
    PF = Partition Function
    1) All fields of the PF is set even by equal on the first fields in the
       key
    2) All fields of the PF is set if all fields of the key is set
    3) At least one field in the PF is set if all fields is set
    4) At least one field in the PF is part of the key
1173 1174 1175 1176 1177
*/

static void set_up_partition_key_maps(TABLE *table,
                                      partition_info *part_info)
{
1178 1179
  uint keys= table->s->keys;
  uint i;
1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
  bool all_fields, some_fields;
  DBUG_ENTER("set_up_partition_key_maps");

  part_info->all_fields_in_PF.clear_all();
  part_info->all_fields_in_PPF.clear_all();
  part_info->all_fields_in_SPF.clear_all();
  part_info->some_fields_in_PF.clear_all();
  for (i= 0; i < keys; i++)
  {
    set_indicator_in_key_fields(table->key_info+i);
    check_fields_in_PF(part_info->full_part_field_array,
                       &all_fields, &some_fields);
    if (all_fields)
      part_info->all_fields_in_PF.set_bit(i);
    if (some_fields)
      part_info->some_fields_in_PF.set_bit(i);
1196
    if (part_info->is_sub_partitioned())
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
    {
      check_fields_in_PF(part_info->part_field_array,
                         &all_fields, &some_fields);
      if (all_fields)
        part_info->all_fields_in_PPF.set_bit(i);
      check_fields_in_PF(part_info->subpart_field_array,
                         &all_fields, &some_fields);
      if (all_fields)
        part_info->all_fields_in_SPF.set_bit(i);
    }
    clear_indicator_in_key_fields(table->key_info+i);
  }
  DBUG_VOID_RETURN;
}


/*
1214 1215
  Set up function pointers for partition function

1216
  SYNOPSIS
1217
    set_up_partition_func_pointers()
1218
    part_info            Reference to partitioning data structure
1219 1220 1221 1222 1223 1224 1225 1226 1227

  RETURN VALUE
    NONE

  DESCRIPTION
    Set-up all function pointers for calculation of partition id,
    subpartition id and the upper part in subpartitioning. This is to speed up
    execution of get_partition_id which is executed once every record to be
    written and deleted and twice for updates.
1228 1229 1230 1231
*/

static void set_up_partition_func_pointers(partition_info *part_info)
{
1232 1233
  DBUG_ENTER("set_up_partition_func_pointers");

1234
  if (part_info->is_sub_partitioned())
1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265
  {
    if (part_info->part_type == RANGE_PARTITION)
    {
      part_info->get_part_partition_id= get_partition_id_range;
      if (part_info->list_of_subpart_fields)
      {
        if (part_info->linear_hash_ind)
        {
          part_info->get_partition_id= get_partition_id_range_sub_linear_key;
          part_info->get_subpartition_id= get_partition_id_linear_key_sub;
        }
        else
        {
          part_info->get_partition_id= get_partition_id_range_sub_key;
          part_info->get_subpartition_id= get_partition_id_key_sub;
        }
      }
      else
      {
        if (part_info->linear_hash_ind)
        {
          part_info->get_partition_id= get_partition_id_range_sub_linear_hash;
          part_info->get_subpartition_id= get_partition_id_linear_hash_sub;
        }
        else
        {
          part_info->get_partition_id= get_partition_id_range_sub_hash;
          part_info->get_subpartition_id= get_partition_id_hash_sub;
        }
      }
    }
1266
    else /* LIST Partitioning */
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
    {
      part_info->get_part_partition_id= get_partition_id_list;
      if (part_info->list_of_subpart_fields)
      {
        if (part_info->linear_hash_ind)
        {
          part_info->get_partition_id= get_partition_id_list_sub_linear_key;
          part_info->get_subpartition_id= get_partition_id_linear_key_sub;
        }
        else
        {
          part_info->get_partition_id= get_partition_id_list_sub_key;
          part_info->get_subpartition_id= get_partition_id_key_sub;
        }
      }
      else
      {
        if (part_info->linear_hash_ind)
        {
          part_info->get_partition_id= get_partition_id_list_sub_linear_hash;
          part_info->get_subpartition_id= get_partition_id_linear_hash_sub;
        }
        else
        {
          part_info->get_partition_id= get_partition_id_list_sub_hash;
          part_info->get_subpartition_id= get_partition_id_hash_sub;
        }
      }
    }
  }
1297
  else /* No subpartitioning */
1298 1299 1300 1301 1302 1303 1304
  {
    part_info->get_part_partition_id= NULL;
    part_info->get_subpartition_id= NULL;
    if (part_info->part_type == RANGE_PARTITION)
      part_info->get_partition_id= get_partition_id_range;
    else if (part_info->part_type == LIST_PARTITION)
      part_info->get_partition_id= get_partition_id_list;
1305
    else /* HASH partitioning */
1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322
    {
      if (part_info->list_of_part_fields)
      {
        if (part_info->linear_hash_ind)
          part_info->get_partition_id= get_partition_id_linear_key_nosub;
        else
          part_info->get_partition_id= get_partition_id_key_nosub;
      }
      else
      {
        if (part_info->linear_hash_ind)
          part_info->get_partition_id= get_partition_id_linear_hash_nosub;
        else
          part_info->get_partition_id= get_partition_id_hash_nosub;
      }
    }
  }
1323
  if (part_info->full_part_charset_field_array)
1324 1325 1326
  {
    DBUG_ASSERT(part_info->get_partition_id);
    part_info->get_partition_id_charset= part_info->get_partition_id;
1327 1328
    if (part_info->part_charset_field_array &&
        part_info->subpart_charset_field_array)
1329
      part_info->get_partition_id= get_part_id_charset_func_all;
1330
    else if (part_info->part_charset_field_array)
1331 1332 1333 1334
      part_info->get_partition_id= get_part_id_charset_func_part;
    else
      part_info->get_partition_id= get_part_id_charset_func_subpart;
  }
1335
  if (part_info->part_charset_field_array &&
1336 1337 1338 1339 1340 1341 1342
      part_info->is_sub_partitioned())
  {
    DBUG_ASSERT(part_info->get_part_partition_id);
    part_info->get_part_partition_id_charset=
          part_info->get_part_partition_id;
    part_info->get_part_partition_id= get_part_part_id_charset_func;
  }
1343
  if (part_info->subpart_charset_field_array)
1344 1345 1346 1347 1348 1349
  {
    DBUG_ASSERT(part_info->get_subpartition_id);
    part_info->get_subpartition_id_charset=
          part_info->get_subpartition_id;
    part_info->get_subpartition_id= get_subpart_id_charset_func;
  }
1350
  DBUG_VOID_RETURN;
1351
}
1352 1353


1354 1355 1356
/*
  For linear hashing we need a mask which is on the form 2**n - 1 where
  2**n >= no_parts. Thus if no_parts is 6 then mask is 2**3 - 1 = 8 - 1 = 7.
1357

1358 1359 1360 1361
  SYNOPSIS
    set_linear_hash_mask()
    part_info            Reference to partitioning data structure
    no_parts             Number of parts in linear hash partitioning
1362 1363 1364

  RETURN VALUE
    NONE
1365 1366 1367 1368 1369
*/

static void set_linear_hash_mask(partition_info *part_info, uint no_parts)
{
  uint mask;
1370

1371 1372 1373 1374 1375 1376 1377 1378 1379
  for (mask= 1; mask < no_parts; mask<<=1)
    ;
  part_info->linear_hash_mask= mask - 1;
}


/*
  This function calculates the partition id provided the result of the hash
  function using linear hashing parameters, mask and number of partitions.
1380

1381 1382 1383 1384 1385
  SYNOPSIS
    get_part_id_from_linear_hash()
    hash_value          Hash value calculated by HASH function or KEY function
    mask                Mask calculated previously by set_linear_hash_mask
    no_parts            Number of partitions in HASH partitioned part
1386

1387 1388
  RETURN VALUE
    part_id             The calculated partition identity (starting at 0)
1389

1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400
  DESCRIPTION
    The partition is calculated according to the theory of linear hashing.
    See e.g. Linear hashing: a new tool for file and table addressing,
    Reprinted from VLDB-80 in Readings Database Systems, 2nd ed, M. Stonebraker
    (ed.), Morgan Kaufmann 1994.
*/

static uint32 get_part_id_from_linear_hash(longlong hash_value, uint mask,
                                           uint no_parts)
{
  uint32 part_id= (uint32)(hash_value & mask);
1401

1402 1403 1404
  if (part_id >= no_parts)
  {
    uint new_mask= ((mask + 1) >> 1) - 1;
1405
    part_id= (uint32)(hash_value & new_mask);
1406 1407 1408 1409
  }
  return part_id;
}

1410

1411 1412 1413
/*
  Check if a particular field is in need of character set
  handling for partition functions.
1414

1415 1416 1417
  SYNOPSIS
    field_is_partition_charset()
    field                         The field to check
1418

1419 1420 1421 1422 1423 1424 1425
  RETURN VALUES
    FALSE                        Not in need of character set handling
    TRUE                         In need of character set handling
*/

bool field_is_partition_charset(Field *field)
{
mikael/pappa@dator5.(none)'s avatar
mikael/pappa@dator5.(none) committed
1426 1427
  if (!(field->type() == MYSQL_TYPE_STRING) &&
      !(field->type() == MYSQL_TYPE_VARCHAR))
1428 1429 1430
    return FALSE;
  {
    CHARSET_INFO *cs= ((Field_str*)field)->charset();
mikael/pappa@dator5.(none)'s avatar
mikael/pappa@dator5.(none) committed
1431
    if (!(field->type() == MYSQL_TYPE_STRING) ||
1432 1433 1434 1435 1436 1437 1438
        !(cs->state & MY_CS_BINSORT))
      return TRUE;
    return FALSE;
  }
}


1439
/*
1440
  Check that partition function doesn't contain any forbidden
1441
  character sets and collations.
1442

1443
  SYNOPSIS
1444
    check_part_func_fields()
1445
    ptr                                 Array of Field pointers
1446 1447
    ok_with_charsets                    Will we report allowed charset
                                        fields as ok
1448 1449 1450
  RETURN VALUES
    FALSE                               Success
    TRUE                                Error
1451

1452 1453 1454 1455 1456
  DESCRIPTION
    We will check in this routine that the fields of the partition functions
    do not contain unallowed parts. It can also be used to check if there
    are fields that require special care by calling my_strnxfrm before
    calling the functions to calculate partition id.
1457 1458
*/

1459
bool check_part_func_fields(Field **ptr, bool ok_with_charsets)
1460 1461
{
  Field *field;
mikael/pappa@dator5.(none)'s avatar
mikael/pappa@dator5.(none) committed
1462
  DBUG_ENTER("check_part_func_fields");
1463

1464 1465
  while ((field= *(ptr++)))
  {
1466 1467 1468 1469 1470
    /*
      For CHAR/VARCHAR fields we need to take special precautions.
      Binary collation with CHAR is automatically supported. Other
      types need some kind of standardisation function handling
    */
1471
    if (field_is_partition_charset(field))
1472 1473
    {
      CHARSET_INFO *cs= ((Field_str*)field)->charset();
1474 1475 1476 1477 1478 1479
      if (!ok_with_charsets ||
          cs->mbmaxlen > 1 ||
          cs->strxfrm_multiply > 1)
      {
        DBUG_RETURN(TRUE);
      }
1480 1481
    }
  }
1482
  DBUG_RETURN(FALSE);
1483 1484 1485
}


1486
/*
1487 1488
  fix partition functions

1489 1490 1491 1492
  SYNOPSIS
    fix_partition_func()
    thd                  The thread object
    table                TABLE object for which partition fields are set-up
1493
    is_create_table_ind  Indicator of whether openfrm was called as part of
1494
                         CREATE or ALTER TABLE
1495

1496
  RETURN VALUE
1497 1498
    TRUE                 Error
    FALSE                Success
1499

1500 1501 1502 1503
  DESCRIPTION
    The name parameter contains the full table name and is used to get the
    database name of the table which is used to set-up a correct
    TABLE_LIST object for use in fix_fields.
1504 1505 1506 1507 1508 1509 1510

NOTES
    This function is called as part of opening the table by opening the .frm
    file. It is a part of CREATE TABLE to do this so it is quite permissible
    that errors due to erroneus syntax isn't found until we come here.
    If the user has used a non-existing field in the table is one such example
    of an error that is not discovered until here.
1511 1512
*/

1513
bool fix_partition_func(THD *thd, TABLE *table,
1514
                        bool is_create_table_ind)
1515 1516
{
  bool result= TRUE;
1517
  partition_info *part_info= table->part_info;
1518
  enum_mark_columns save_mark_used_columns= thd->mark_used_columns;
1519 1520
  DBUG_ENTER("fix_partition_func");

1521 1522 1523 1524
  if (part_info->fixed)
  {
    DBUG_RETURN(FALSE);
  }
1525 1526
  thd->mark_used_columns= MARK_COLUMNS_NONE;
  DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns));
1527

1528
  if (!is_create_table_ind ||
1529
       thd->lex->sql_command != SQLCOM_CREATE_TABLE)
1530
  {
1531
    if (partition_default_handling(table, part_info,
1532
                                   is_create_table_ind,
1533
                                   table->s->normalized_path.str))
1534 1535 1536 1537
    {
      DBUG_RETURN(TRUE);
    }
  }
1538
  if (part_info->is_sub_partitioned())
1539 1540 1541
  {
    DBUG_ASSERT(part_info->subpart_type == HASH_PARTITION);
    /*
1542 1543
      Subpartition is defined. We need to verify that subpartitioning
      function is correct.
1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554
    */
    if (part_info->linear_hash_ind)
      set_linear_hash_mask(part_info, part_info->no_subparts);
    if (part_info->list_of_subpart_fields)
    {
      List_iterator<char> it(part_info->subpart_field_list);
      if (unlikely(handle_list_of_fields(it, table, part_info, TRUE)))
        goto end;
    }
    else
    {
1555 1556
      if (unlikely(fix_fields_part_func(thd, part_info->subpart_expr,
                                        table, TRUE, TRUE)))
1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567
        goto end;
      if (unlikely(part_info->subpart_expr->result_type() != INT_RESULT))
      {
        my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0),
                 "SUBPARTITION");
        goto end;
      }
    }
  }
  DBUG_ASSERT(part_info->part_type != NOT_A_PARTITION);
  /*
1568 1569
    Partition is defined. We need to verify that partitioning
    function is correct.
1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
  */
  if (part_info->part_type == HASH_PARTITION)
  {
    if (part_info->linear_hash_ind)
      set_linear_hash_mask(part_info, part_info->no_parts);
    if (part_info->list_of_part_fields)
    {
      List_iterator<char> it(part_info->part_field_list);
      if (unlikely(handle_list_of_fields(it, table, part_info, FALSE)))
        goto end;
    }
    else
    {
1583 1584
      if (unlikely(fix_fields_part_func(thd, part_info->part_expr,
                                        table, FALSE, TRUE)))
1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
        goto end;
      if (unlikely(part_info->part_expr->result_type() != INT_RESULT))
      {
        my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str);
        goto end;
      }
      part_info->part_result_type= INT_RESULT;
    }
  }
  else
  {
1596
    const char *error_str;
1597 1598 1599
    if (unlikely(fix_fields_part_func(thd, part_info->part_expr,
                                      table, FALSE, TRUE)))
      goto end;
1600 1601
    if (part_info->part_type == RANGE_PARTITION)
    {
1602
      error_str= partition_keywords[PKW_RANGE].str; 
1603
      if (unlikely(part_info->check_range_constants()))
1604 1605 1606 1607
        goto end;
    }
    else if (part_info->part_type == LIST_PARTITION)
    {
1608
      error_str= partition_keywords[PKW_LIST].str; 
1609
      if (unlikely(part_info->check_list_constants()))
1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628
        goto end;
    }
    else
    {
      DBUG_ASSERT(0);
      my_error(ER_INCONSISTENT_PARTITION_INFO_ERROR, MYF(0));
      goto end;
    }
    if (unlikely(part_info->no_parts < 1))
    {
      my_error(ER_PARTITIONS_MUST_BE_DEFINED_ERROR, MYF(0), error_str);
      goto end;
    }
    if (unlikely(part_info->part_expr->result_type() != INT_RESULT))
    {
      my_error(ER_PARTITION_FUNC_NOT_ALLOWED_ERROR, MYF(0), part_str);
      goto end;
    }
  }
1629 1630
  if (((part_info->part_type != HASH_PARTITION ||
      part_info->list_of_part_fields == FALSE) &&
1631
      check_part_func_fields(part_info->part_field_array, TRUE)) ||
1632 1633
      (part_info->list_of_part_fields == FALSE &&
       part_info->is_sub_partitioned() &&
1634
       check_part_func_fields(part_info->subpart_field_array, TRUE)))
1635 1636 1637 1638
  {
    my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0));
    goto end;
  }
1639 1640 1641 1642
  if (unlikely(create_full_part_field_array(table, part_info)))
    goto end;
  if (unlikely(check_primary_key(table)))
    goto end;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
1643 1644
  if (unlikely((!(table->s->db_type()->partition_flags &&
      (table->s->db_type()->partition_flags() & HA_CAN_PARTITION_UNIQUE))) &&
1645 1646
               check_unique_keys(table)))
    goto end;
1647 1648
  if (unlikely(set_up_partition_bitmap(thd, part_info)))
    goto end;
1649
  if (unlikely(part_info->set_up_charset_field_preps()))
1650 1651 1652 1653
  {
    my_error(ER_PARTITION_FUNCTION_IS_NOT_ALLOWED, MYF(0));
    goto end;
  }
1654 1655 1656
  check_range_capable_PF(table);
  set_up_partition_key_maps(table, part_info);
  set_up_partition_func_pointers(part_info);
1657
  set_up_range_analysis_info(part_info);
1658 1659
  result= FALSE;
end:
1660 1661
  thd->mark_used_columns= save_mark_used_columns;
  DBUG_PRINT("info", ("thd->mark_used_columns: %d", thd->mark_used_columns));
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675
  DBUG_RETURN(result);
}


/*
  The code below is support routines for the reverse parsing of the 
  partitioning syntax. This feature is very useful to generate syntax for
  all default values to avoid all default checking when opening the frm
  file. It is also used when altering the partitioning by use of various
  ALTER TABLE commands. Finally it is used for SHOW CREATE TABLES.
*/

static int add_write(File fptr, const char *buf, uint len)
{
1676
  uint len_written= my_write(fptr, (const byte*)buf, len, MYF(0));
1677

1678 1679 1680 1681 1682 1683
  if (likely(len == len_written))
    return 0;
  else
    return 1;
}

1684 1685 1686 1687 1688
static int add_string_object(File fptr, String *string)
{
  return add_write(fptr, string->ptr(), string->length());
}

1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
static int add_string(File fptr, const char *string)
{
  return add_write(fptr, string, strlen(string));
}

static int add_string_len(File fptr, const char *string, uint len)
{
  return add_write(fptr, string, len);
}

static int add_space(File fptr)
{
  return add_string(fptr, space_str);
}

static int add_comma(File fptr)
{
  return add_string(fptr, comma_str);
}

static int add_equal(File fptr)
{
  return add_string(fptr, equal_str);
}

static int add_end_parenthesis(File fptr)
{
  return add_string(fptr, end_paren_str);
}

static int add_begin_parenthesis(File fptr)
{
  return add_string(fptr, begin_paren_str);
}

static int add_part_key_word(File fptr, const char *key_string)
{
  int err= add_string(fptr, key_string);
1727

1728 1729 1730 1731 1732 1733
  err+= add_space(fptr);
  return err + add_begin_parenthesis(fptr);
}

static int add_hash(File fptr)
{
1734
  return add_part_key_word(fptr, partition_keywords[PKW_HASH].str);
1735 1736 1737 1738
}

static int add_partition(File fptr)
{
1739
  char buff[22];
1740 1741 1742 1743 1744 1745 1746
  strxmov(buff, part_str, space_str, NullS);
  return add_string(fptr, buff);
}

static int add_subpartition(File fptr)
{
  int err= add_string(fptr, sub_str);
1747

1748 1749 1750 1751 1752
  return err + add_partition(fptr);
}

static int add_partition_by(File fptr)
{
1753
  char buff[22];
1754 1755 1756 1757 1758 1759 1760
  strxmov(buff, part_str, space_str, by_str, space_str, NullS);
  return add_string(fptr, buff);
}

static int add_subpartition_by(File fptr)
{
  int err= add_string(fptr, sub_str);
1761

1762 1763 1764 1765 1766 1767 1768
  return err + add_partition_by(fptr);
}

static int add_key_partition(File fptr, List<char> field_list)
{
  uint i, no_fields;
  int err;
1769

1770
  List_iterator<char> part_it(field_list);
1771
  err= add_part_key_word(fptr, partition_keywords[PKW_KEY].str);
1772 1773
  no_fields= field_list.elements;
  i= 0;
1774
  while (i < no_fields)
1775 1776
  {
    const char *field_str= part_it++;
1777 1778 1779 1780 1781 1782 1783 1784
    String field_string("", 0, system_charset_info);
    THD *thd= current_thd;
    ulonglong save_options= thd->options;
    thd->options= 0;
    append_identifier(thd, &field_string, field_str,
                      strlen(field_str));
    thd->options= save_options;
    err+= add_string_object(fptr, &field_string);
1785 1786
    if (i != (no_fields-1))
      err+= add_comma(fptr);
1787 1788
    i++;
  }
1789 1790 1791
  return err;
}

1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806
static int add_name_string(File fptr, const char *name)
{
  int err;
  String name_string("", 0, system_charset_info);
  THD *thd= current_thd;
  ulonglong save_options= thd->options;

  thd->options= 0;
  append_identifier(thd, &name_string, name,
                    strlen(name));
  thd->options= save_options;
  err= add_string_object(fptr, &name_string);
  return err;
}

1807 1808
static int add_int(File fptr, longlong number)
{
1809
  char buff[32];
1810 1811 1812 1813
  llstr(number, buff);
  return add_string(fptr, buff);
}

1814 1815 1816 1817 1818 1819 1820
static int add_uint(File fptr, ulonglong number)
{
  char buff[32];
  longlong2str(number, buff, 10);
  return add_string(fptr, buff);
}

1821
static int add_keyword_string(File fptr, const char *keyword,
1822
                              bool should_use_quotes, 
1823 1824 1825
                              const char *keystr)
{
  int err= add_string(fptr, keyword);
1826

1827 1828 1829
  err+= add_space(fptr);
  err+= add_equal(fptr);
  err+= add_space(fptr);
1830 1831
  if (should_use_quotes)
    err+= add_string(fptr, "'");
1832
  err+= add_string(fptr, keystr);
1833 1834
  if (should_use_quotes)
    err+= add_string(fptr, "'");
1835 1836 1837 1838 1839 1840
  return err + add_space(fptr);
}

static int add_keyword_int(File fptr, const char *keyword, longlong num)
{
  int err= add_string(fptr, keyword);
1841

1842 1843 1844 1845 1846 1847 1848
  err+= add_space(fptr);
  err+= add_equal(fptr);
  err+= add_space(fptr);
  err+= add_int(fptr, num);
  return err + add_space(fptr);
}

1849
static int add_engine(File fptr, handlerton *engine_type)
1850
{
antony@ppcg5.local's avatar
antony@ppcg5.local committed
1851
  const char *engine_str= ha_resolve_storage_engine_name(engine_type);
serg@sergbook.mysql.com's avatar
serg@sergbook.mysql.com committed
1852
  DBUG_PRINT("info", ("ENGINE: %s", engine_str));
1853 1854 1855 1856 1857 1858 1859
  int err= add_string(fptr, "ENGINE = ");
  return err + add_string(fptr, engine_str);
}

static int add_partition_options(File fptr, partition_element *p_elem)
{
  int err= 0;
1860

1861
  err+= add_space(fptr);
1862
  if (p_elem->tablespace_name)
serg@sergbook.mysql.com's avatar
serg@sergbook.mysql.com committed
1863
    err+= add_keyword_string(fptr,"TABLESPACE", FALSE,
1864
                             p_elem->tablespace_name);
1865 1866 1867 1868 1869 1870
  if (p_elem->nodegroup_id != UNDEF_NODEGROUP)
    err+= add_keyword_int(fptr,"NODEGROUP",(longlong)p_elem->nodegroup_id);
  if (p_elem->part_max_rows)
    err+= add_keyword_int(fptr,"MAX_ROWS",(longlong)p_elem->part_max_rows);
  if (p_elem->part_min_rows)
    err+= add_keyword_int(fptr,"MIN_ROWS",(longlong)p_elem->part_min_rows);
1871 1872 1873 1874 1875 1876 1877 1878 1879
  if (!(current_thd->variables.sql_mode & MODE_NO_DIR_IN_CREATE))
  {
    if (p_elem->data_file_name)
      err+= add_keyword_string(fptr, "DATA DIRECTORY", TRUE, 
                               p_elem->data_file_name);
    if (p_elem->index_file_name)
      err+= add_keyword_string(fptr, "INDEX DIRECTORY", TRUE, 
                               p_elem->index_file_name);
  }
1880
  if (p_elem->part_comment)
1881
    err+= add_keyword_string(fptr, "COMMENT", TRUE, p_elem->part_comment);
1882 1883 1884
  return err + add_engine(fptr,p_elem->engine_type);
}

1885
static int add_partition_values(File fptr, partition_info *part_info, partition_element *p_elem)
1886 1887
{
  int err= 0;
1888

1889 1890
  if (part_info->part_type == RANGE_PARTITION)
  {
1891
    err+= add_string(fptr, " VALUES LESS THAN ");
1892
    if (!p_elem->max_value)
1893 1894
    {
      err+= add_begin_parenthesis(fptr);
1895
      if (p_elem->signed_flag)
1896 1897
        err+= add_int(fptr, p_elem->range_value);
      else
1898
        err+= add_uint(fptr, p_elem->range_value);
1899 1900 1901
      err+= add_end_parenthesis(fptr);
    }
    else
1902
      err+= add_string(fptr, partition_keywords[PKW_MAXVALUE].str);
1903 1904 1905 1906
  }
  else if (part_info->part_type == LIST_PARTITION)
  {
    uint i;
1907
    List_iterator<part_elem_value> list_val_it(p_elem->list_val_list);
1908
    err+= add_string(fptr, " VALUES IN ");
1909
    uint no_items= p_elem->list_val_list.elements;
1910

1911
    err+= add_begin_parenthesis(fptr);
1912 1913 1914 1915 1916 1917 1918 1919 1920 1921
    if (p_elem->has_null_value)
    {
      err+= add_string(fptr, "NULL");
      if (no_items == 0)
      {
        err+= add_end_parenthesis(fptr);
        goto end;
      }
      err+= add_comma(fptr);
    }
1922 1923 1924
    i= 0;
    do
    {
1925 1926 1927 1928 1929 1930
      part_elem_value *list_value= list_val_it++;

      if (!list_value->unsigned_flag)
        err+= add_int(fptr, list_value->value);
      else
        err+= add_uint(fptr, list_value->value);
1931 1932 1933 1934 1935
      if (i != (no_items-1))
        err+= add_comma(fptr);
    } while (++i < no_items);
    err+= add_end_parenthesis(fptr);
  }
1936
end:
1937
  return err;
1938 1939 1940 1941 1942 1943
}

/*
  Generate the partition syntax from the partition data structure.
  Useful for support of generating defaults, SHOW CREATE TABLES
  and easy partition management.
1944

1945 1946 1947 1948 1949 1950
  SYNOPSIS
    generate_partition_syntax()
    part_info                  The partitioning data structure
    buf_length                 A pointer to the returned buffer length
    use_sql_alloc              Allocate buffer from sql_alloc if true
                               otherwise use my_malloc
1951
    show_partition_options     Should we display partition options
1952

1953 1954 1955
  RETURN VALUES
    NULL error
    buf, buf_length            Buffer and its length
1956

1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978
  DESCRIPTION
  Here we will generate the full syntax for the given command where all
  defaults have been expanded. By so doing the it is also possible to
  make lots of checks of correctness while at it.
  This could will also be reused for SHOW CREATE TABLES and also for all
  type ALTER TABLE commands focusing on changing the PARTITION structure
  in any fashion.

  The implementation writes the syntax to a temporary file (essentially
  an abstraction of a dynamic array) and if all writes goes well it
  allocates a buffer and writes the syntax into this one and returns it.

  As a security precaution the file is deleted before writing into it. This
  means that no other processes on the machine can open and read the file
  while this processing is ongoing.

  The code is optimised for minimal code size since it is not used in any
  common queries.
*/

char *generate_partition_syntax(partition_info *part_info,
                                uint *buf_length,
1979
                                bool use_sql_alloc,
1980
                                bool show_partition_options)
1981
{
1982
  uint i,j, tot_no_parts, no_subparts;
1983 1984 1985 1986
  partition_element *part_elem;
  ulonglong buffer_length;
  char path[FN_REFLEN];
  int err= 0;
1987
  List_iterator<partition_element> part_it(part_info->partitions);
1988 1989
  File fptr;
  char *buf= NULL; //Return buffer
1990 1991
  DBUG_ENTER("generate_partition_syntax");

1992 1993 1994
  if (unlikely(((fptr= create_temp_file(path,mysql_tmpdir,"psy", 
                                        O_RDWR | O_BINARY | O_TRUNC |  
                                        O_TEMPORARY, MYF(MY_WME)))) < 0))
1995
    DBUG_RETURN(NULL);
1996 1997
#ifndef __WIN__
  unlink(path);
1998 1999 2000 2001 2002 2003
#endif
  err+= add_space(fptr);
  err+= add_partition_by(fptr);
  switch (part_info->part_type)
  {
    case RANGE_PARTITION:
2004
      err+= add_part_key_word(fptr, partition_keywords[PKW_RANGE].str);
2005 2006
      break;
    case LIST_PARTITION:
2007
      err+= add_part_key_word(fptr, partition_keywords[PKW_LIST].str);
2008 2009 2010
      break;
    case HASH_PARTITION:
      if (part_info->linear_hash_ind)
2011
        err+= add_string(fptr, partition_keywords[PKW_LINEAR].str);
2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027
      if (part_info->list_of_part_fields)
        err+= add_key_partition(fptr, part_info->part_field_list);
      else
        err+= add_hash(fptr);
      break;
    default:
      DBUG_ASSERT(0);
      /* We really shouldn't get here, no use in continuing from here */
      current_thd->fatal_error();
      DBUG_RETURN(NULL);
  }
  if (part_info->part_expr)
    err+= add_string_len(fptr, part_info->part_func_string,
                         part_info->part_func_len);
  err+= add_end_parenthesis(fptr);
  err+= add_space(fptr);
2028 2029 2030 2031 2032 2033 2034
  if ((!part_info->use_default_no_partitions) &&
       part_info->use_default_partitions)
  {
    err+= add_string(fptr, "PARTITIONS ");
    err+= add_int(fptr, part_info->no_parts);
    err+= add_space(fptr);
  }
2035
  if (part_info->is_sub_partitioned())
2036 2037 2038
  {
    err+= add_subpartition_by(fptr);
    /* Must be hash partitioning for subpartitioning */
2039 2040
    if (part_info->linear_hash_ind)
      err+= add_string(fptr, partition_keywords[PKW_LINEAR].str);
2041 2042 2043 2044 2045 2046 2047 2048 2049
    if (part_info->list_of_subpart_fields)
      err+= add_key_partition(fptr, part_info->subpart_field_list);
    else
      err+= add_hash(fptr);
    if (part_info->subpart_expr)
      err+= add_string_len(fptr, part_info->subpart_func_string,
                           part_info->subpart_func_len);
    err+= add_end_parenthesis(fptr);
    err+= add_space(fptr);
2050 2051 2052 2053 2054 2055 2056 2057
    if ((!part_info->use_default_no_subpartitions) && 
          part_info->use_default_subpartitions)
    {
      err+= add_string(fptr, "SUBPARTITIONS ");
      err+= add_int(fptr, part_info->no_subparts);
      err+= add_space(fptr);
    }
  }
2058
  tot_no_parts= part_info->partitions.elements;
2059
  no_subparts= part_info->no_subparts;
2060

2061
  if (!part_info->use_default_partitions)
2062
  {
2063
    bool first= TRUE;
2064 2065 2066
    err+= add_begin_parenthesis(fptr);
    i= 0;
    do
2067
    {
2068 2069 2070
      part_elem= part_it++;
      if (part_elem->part_state != PART_TO_BE_DROPPED &&
          part_elem->part_state != PART_REORGED_DROPPED)
2071
      {
2072
        if (!first)
2073
        {
2074 2075
          err+= add_comma(fptr);
          err+= add_space(fptr);
2076
        }
2077
        first= FALSE;
2078
        err+= add_partition(fptr);
2079
        err+= add_name_string(fptr, part_elem->partition_name);
2080
        err+= add_partition_values(fptr, part_info, part_elem);
2081 2082
        if (!part_info->is_sub_partitioned() ||
            part_info->use_default_subpartitions)
2083
        {
2084 2085
          if (show_partition_options)
            err+= add_partition_options(fptr, part_elem);
2086 2087
        }
        else
2088 2089 2090 2091 2092 2093 2094 2095 2096
        {
          err+= add_space(fptr);
          err+= add_begin_parenthesis(fptr);
          List_iterator<partition_element> sub_it(part_elem->subpartitions);
          j= 0;
          do
          {
            part_elem= sub_it++;
            err+= add_subpartition(fptr);
2097
            err+= add_name_string(fptr, part_elem->partition_name);
2098 2099
            if (show_partition_options)
              err+= add_partition_options(fptr, part_elem);
2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112
            if (j != (no_subparts-1))
            {
              err+= add_comma(fptr);
              err+= add_space(fptr);
            }
            else
              err+= add_end_parenthesis(fptr);
          } while (++j < no_subparts);
        }
      }
      if (i == (tot_no_parts-1))
        err+= add_end_parenthesis(fptr);
    } while (++i < tot_no_parts);
2113
  }
2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128
  if (err)
    goto close_file;
  buffer_length= my_seek(fptr, 0L,MY_SEEK_END,MYF(0));
  if (unlikely(buffer_length == MY_FILEPOS_ERROR))
    goto close_file;
  if (unlikely(my_seek(fptr, 0L, MY_SEEK_SET, MYF(0)) == MY_FILEPOS_ERROR))
    goto close_file;
  *buf_length= (uint)buffer_length;
  if (use_sql_alloc)
    buf= sql_alloc(*buf_length+1);
  else
    buf= my_malloc(*buf_length+1, MYF(MY_WME));
  if (!buf)
    goto close_file;

2129
  if (unlikely(my_read(fptr, (byte*)buf, *buf_length, MYF(MY_FNABP))))
2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147
  {
    if (!use_sql_alloc)
      my_free(buf, MYF(0));
    else
      buf= NULL;
  }
  else
    buf[*buf_length]= 0;

close_file:
  my_close(fptr, MYF(0));
  DBUG_RETURN(buf);
}


/*
  Check if partition key fields are modified and if it can be handled by the
  underlying storage engine.
2148

2149 2150 2151
  SYNOPSIS
    partition_key_modified
    table                TABLE object for which partition fields are set-up
2152
    fields               Bitmap representing fields to be modified
2153

2154 2155 2156 2157 2158
  RETURN VALUES
    TRUE                 Need special handling of UPDATE
    FALSE                Normal UPDATE handling is ok
*/

2159
bool partition_key_modified(TABLE *table, const MY_BITMAP *fields)
2160
{
2161
  Field **fld;
2162
  partition_info *part_info= table->part_info;
2163
  DBUG_ENTER("partition_key_modified");
2164

2165 2166
  if (!part_info)
    DBUG_RETURN(FALSE);
antony@ppcg5.local's avatar
antony@ppcg5.local committed
2167 2168
  if (table->s->db_type()->partition_flags &&
      (table->s->db_type()->partition_flags() & HA_CAN_UPDATE_PARTITION_KEY))
2169
    DBUG_RETURN(FALSE);
2170 2171
  for (fld= part_info->full_part_field_array; *fld; fld++)
    if (bitmap_is_set(fields, (*fld)->field_index))
2172 2173 2174 2175 2176
      DBUG_RETURN(TRUE);
  DBUG_RETURN(FALSE);
}


2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187
/*
  A function to handle correct handling of NULL values in partition
  functions.
  SYNOPSIS
    part_val_int()
    item_expr                 The item expression to evaluate
  RETURN VALUES
    The value of the partition function, LONGLONG_MIN if any null value
    in function
*/

2188
static inline longlong part_val_int(Item *item_expr)
2189 2190 2191 2192 2193 2194 2195 2196
{
  longlong value= item_expr->val_int();
  if (item_expr->null_value)
    value= LONGLONG_MIN;
  return value;
}


2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216
/*
  The next set of functions are used to calculate the partition identity.
  A handler sets up a variable that corresponds to one of these functions
  to be able to quickly call it whenever the partition id needs to calculated
  based on the record in table->record[0] (or set up to fake that).
  There are 4 functions for hash partitioning and 2 for RANGE/LIST partitions.
  In addition there are 4 variants for RANGE subpartitioning and 4 variants
  for LIST subpartitioning thus in total there are 14 variants of this
  function.

  We have a set of support functions for these 14 variants. There are 4
  variants of hash functions and there is a function for each. The KEY
  partitioning uses the function calculate_key_value to calculate the hash
  value based on an array of fields. The linear hash variants uses the
  method get_part_id_from_linear_hash to get the partition id using the
  hash value and some parameters calculated from the number of partitions.
*/

/*
  Calculate hash value for KEY partitioning using an array of fields.
2217

2218 2219 2220
  SYNOPSIS
    calculate_key_value()
    field_array             An array of the fields in KEY partitioning
2221

2222 2223
  RETURN VALUE
    hash_value calculated
2224

2225 2226 2227 2228 2229 2230 2231
  DESCRIPTION
    Uses the hash function on the character set of the field. Integer and
    floating point fields use the binary character set by default.
*/

static uint32 calculate_key_value(Field **field_array)
{
2232
  ulong nr1= 1;
2233
  ulong nr2= 4;
2234

2235 2236 2237
  do
  {
    Field *field= *field_array;
2238
    field->hash(&nr1, &nr2);
2239
  } while (*(++field_array));
2240
  return (uint32) nr1;
2241 2242 2243 2244 2245 2246
}


/*
  A simple support function to calculate part_id given local part and
  sub part.
2247

2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264
  SYNOPSIS
    get_part_id_for_sub()
    loc_part_id             Local partition id
    sub_part_id             Subpartition id
    no_subparts             Number of subparts
*/

inline
static uint32 get_part_id_for_sub(uint32 loc_part_id, uint32 sub_part_id,
                                  uint no_subparts)
{
  return (uint32)((loc_part_id * no_subparts) + sub_part_id);
}


/*
  Calculate part_id for (SUB)PARTITION BY HASH
2265

2266 2267 2268 2269
  SYNOPSIS
    get_part_id_hash()
    no_parts                 Number of hash partitions
    part_expr                Item tree of hash function
2270
    out:func_value      Value of hash function
2271

2272 2273 2274 2275 2276 2277
  RETURN VALUE
    Calculated partition id
*/

inline
static uint32 get_part_id_hash(uint no_parts,
2278 2279
                               Item *part_expr,
                               longlong *func_value)
2280
{
2281
  longlong int_hash_id;
2282
  DBUG_ENTER("get_part_id_hash");
2283

2284
  *func_value= part_val_int(part_expr);
2285 2286 2287
  int_hash_id= *func_value % no_parts;

  DBUG_RETURN(int_hash_id < 0 ? (uint32) -int_hash_id : (uint32) int_hash_id);
2288 2289 2290 2291 2292
}


/*
  Calculate part_id for (SUB)PARTITION BY LINEAR HASH
2293

2294 2295 2296 2297 2298 2299
  SYNOPSIS
    get_part_id_linear_hash()
    part_info           A reference to the partition_info struct where all the
                        desired information is given
    no_parts            Number of hash partitions
    part_expr           Item tree of hash function
2300
    out:func_value      Value of hash function
2301

2302 2303 2304 2305 2306 2307 2308
  RETURN VALUE
    Calculated partition id
*/

inline
static uint32 get_part_id_linear_hash(partition_info *part_info,
                                      uint no_parts,
2309 2310
                                      Item *part_expr,
                                      longlong *func_value)
2311 2312
{
  DBUG_ENTER("get_part_id_linear_hash");
2313

2314
  *func_value= part_val_int(part_expr);
2315
  DBUG_RETURN(get_part_id_from_linear_hash(*func_value,
2316 2317 2318 2319 2320 2321 2322
                                           part_info->linear_hash_mask,
                                           no_parts));
}


/*
  Calculate part_id for (SUB)PARTITION BY KEY
2323

2324 2325 2326 2327
  SYNOPSIS
    get_part_id_key()
    field_array         Array of fields for PARTTION KEY
    no_parts            Number of KEY partitions
2328

2329 2330 2331 2332 2333 2334
  RETURN VALUE
    Calculated partition id
*/

inline
static uint32 get_part_id_key(Field **field_array,
2335 2336
                              uint no_parts,
                              longlong *func_value)
2337 2338
{
  DBUG_ENTER("get_part_id_key");
2339
  *func_value= calculate_key_value(field_array);
2340
  DBUG_RETURN((uint32) (*func_value % no_parts));
2341 2342 2343 2344 2345
}


/*
  Calculate part_id for (SUB)PARTITION BY LINEAR KEY
2346

2347 2348 2349 2350 2351 2352
  SYNOPSIS
    get_part_id_linear_key()
    part_info           A reference to the partition_info struct where all the
                        desired information is given
    field_array         Array of fields for PARTTION KEY
    no_parts            Number of KEY partitions
2353

2354 2355 2356 2357 2358 2359 2360
  RETURN VALUE
    Calculated partition id
*/

inline
static uint32 get_part_id_linear_key(partition_info *part_info,
                                     Field **field_array,
2361 2362
                                     uint no_parts,
                                     longlong *func_value)
2363 2364
{
  DBUG_ENTER("get_partition_id_linear_key");
2365

2366 2367
  *func_value= calculate_key_value(field_array);
  DBUG_RETURN(get_part_id_from_linear_hash(*func_value,
2368 2369 2370 2371
                                           part_info->linear_hash_mask,
                                           no_parts));
}

2372 2373
/*
  Copy to field buffers and set up field pointers
2374

2375 2376 2377
  SYNOPSIS
    copy_to_part_field_buffers()
    ptr                          Array of fields to copy
2378 2379 2380
    field_bufs                   Array of field buffers to copy to
    restore_ptr                  Array of pointers to restore to

2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399
  RETURN VALUES
    NONE
  DESCRIPTION
    This routine is used to take the data from field pointer, convert
    it to a standard format and store this format in a field buffer
    allocated for this purpose. Next the field pointers are moved to
    point to the field buffers. There is a separate to restore the
    field pointers after this call.
*/

static void copy_to_part_field_buffers(Field **ptr,
                                       char **field_bufs,
                                       char **restore_ptr)
{
  Field *field;
  while ((field= *(ptr++)))
  {
    *restore_ptr= field->ptr;
    restore_ptr++;
2400
    if (!field->maybe_null() || !field->is_null())
2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438
    {
      CHARSET_INFO *cs= ((Field_str*)field)->charset();
      uint len= field->pack_length();
      char *field_buf= *field_bufs;
      /*
         We only use the field buffer for VARCHAR and CHAR strings
         which isn't of a binary collation. We also only use the
         field buffer for fields which are not currently NULL.
         The field buffer will store a normalised string. We use
         the strnxfrm method to normalise the string.
       */
      if (field->type() == MYSQL_TYPE_VARCHAR)
      {
        uint len_bytes= ((Field_varstring*)field)->length_bytes;
        my_strnxfrm(cs, (uchar*)(field_buf + len_bytes), (len - len_bytes),
                    (uchar*)(field->ptr + len_bytes), field->field_length);
        if (len_bytes == 1)
          *field_buf= (uchar)field->field_length;
        else
          int2store(field_buf, field->field_length);
      }
      else
      {
        my_strnxfrm(cs, (uchar*)field_buf, len,
                    (uchar*)field->ptr, field->field_length);
      }
      field->ptr= field_buf;
    }
    field_bufs++;
  }
  return;
}

/*
  Restore field pointers
  SYNOPSIS
    restore_part_field_pointers()
    ptr                            Array of fields to restore
2439 2440
    restore_ptr                    Array of field pointers to restore to

2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453
  RETURN VALUES
*/

static void restore_part_field_pointers(Field **ptr, char **restore_ptr)
{
  Field *field;
  while ((field= *(ptr++)))
  {
    field->ptr= *restore_ptr;
    restore_ptr++;
  }
  return;
}
2454 2455 2456 2457
/*
  This function is used to calculate the partition id where all partition
  fields have been prepared to point to a record where the partition field
  values are bound.
2458

2459 2460 2461 2462
  SYNOPSIS
    get_partition_id()
    part_info           A reference to the partition_info struct where all the
                        desired information is given
2463
    out:part_id         The partition id is returned through this pointer
2464
    out: func_value     Value of partition function (longlong)
2465

2466
  RETURN VALUE
2467 2468 2469 2470 2471
    part_id                     Partition id of partition that would contain
                                row with given values of PF-fields
    HA_ERR_NO_PARTITION_FOUND   The fields of the partition function didn't
                                fit into any partition and thus the values of 
                                the PF-fields are not allowed.
2472

2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501
  DESCRIPTION
    A routine used from write_row, update_row and delete_row from any
    handler supporting partitioning. It is also a support routine for
    get_partition_set used to find the set of partitions needed to scan
    for a certain index scan or full table scan.
    
    It is actually 14 different variants of this function which are called
    through a function pointer.

    get_partition_id_list
    get_partition_id_range
    get_partition_id_hash_nosub
    get_partition_id_key_nosub
    get_partition_id_linear_hash_nosub
    get_partition_id_linear_key_nosub
    get_partition_id_range_sub_hash
    get_partition_id_range_sub_key
    get_partition_id_range_sub_linear_hash
    get_partition_id_range_sub_linear_key
    get_partition_id_list_sub_hash
    get_partition_id_list_sub_key
    get_partition_id_list_sub_linear_hash
    get_partition_id_list_sub_linear_key
*/

/*
  This function is used to calculate the main partition to use in the case of
  subpartitioning and we don't know enough to get the partition identity in
  total.
2502

2503 2504 2505 2506
  SYNOPSIS
    get_part_partition_id()
    part_info           A reference to the partition_info struct where all the
                        desired information is given
2507
    out:part_id         The partition id is returned through this pointer
2508
    out: func_value     The value calculated by partition function
2509

2510
  RETURN VALUE
2511 2512 2513 2514 2515
    part_id                     Partition id of partition that would contain
                                row with given values of PF-fields
    HA_ERR_NO_PARTITION_FOUND   The fields of the partition function didn't
                                fit into any partition and thus the values of 
                                the PF-fields are not allowed.
2516

2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529
  DESCRIPTION
    
    It is actually 6 different variants of this function which are called
    through a function pointer.

    get_partition_id_list
    get_partition_id_range
    get_partition_id_hash_nosub
    get_partition_id_key_nosub
    get_partition_id_linear_hash_nosub
    get_partition_id_linear_key_nosub
*/

2530 2531 2532 2533 2534
static int get_part_id_charset_func_subpart(partition_info *part_info,
                                            uint32 *part_id,
                                            longlong *func_value)
{
  int res;
2535
  copy_to_part_field_buffers(part_info->subpart_charset_field_array,
2536 2537 2538
                             part_info->subpart_field_buffers,
                             part_info->restore_subpart_field_ptrs);
  res= part_info->get_partition_id_charset(part_info, part_id, func_value);
2539
  restore_part_field_pointers(part_info->subpart_charset_field_array,
2540 2541 2542
                              part_info->restore_subpart_field_ptrs);
  return res;
}
2543 2544


2545 2546 2547 2548 2549
static int get_part_id_charset_func_part(partition_info *part_info,
                                         uint32 *part_id,
                                         longlong *func_value)
{
  int res;
2550
  copy_to_part_field_buffers(part_info->part_charset_field_array,
2551 2552 2553
                             part_info->part_field_buffers,
                             part_info->restore_part_field_ptrs);
  res= part_info->get_partition_id_charset(part_info, part_id, func_value);
2554
  restore_part_field_pointers(part_info->part_charset_field_array,
2555 2556 2557 2558
                              part_info->restore_part_field_ptrs);
  return res;
}

2559

2560 2561 2562 2563 2564
static int get_part_id_charset_func_all(partition_info *part_info,
                                        uint32 *part_id,
                                        longlong *func_value)
{
  int res;
2565 2566 2567
  copy_to_part_field_buffers(part_info->full_part_field_array,
                             part_info->full_part_field_buffers,
                             part_info->restore_full_part_field_ptrs);
2568
  res= part_info->get_partition_id_charset(part_info, part_id, func_value);
2569 2570
  restore_part_field_pointers(part_info->full_part_field_array,
                              part_info->restore_full_part_field_ptrs);
2571 2572 2573
  return res;
}

2574

2575 2576 2577 2578 2579
static int get_part_part_id_charset_func(partition_info *part_info,
                                         uint32 *part_id,
                                         longlong *func_value)
{
  int res;
2580
  copy_to_part_field_buffers(part_info->part_charset_field_array,
2581 2582 2583 2584
                             part_info->part_field_buffers,
                             part_info->restore_part_field_ptrs);
  res= part_info->get_part_partition_id_charset(part_info,
                                                part_id, func_value);
2585
  restore_part_field_pointers(part_info->part_charset_field_array,
2586 2587 2588 2589
                              part_info->restore_part_field_ptrs);
  return res;
}

2590

2591 2592 2593
static uint32 get_subpart_id_charset_func(partition_info *part_info)
{
  int res;
2594
  copy_to_part_field_buffers(part_info->subpart_charset_field_array,
2595 2596 2597
                             part_info->subpart_field_buffers,
                             part_info->restore_subpart_field_ptrs);
  res= part_info->get_subpartition_id_charset(part_info);
2598
  restore_part_field_pointers(part_info->subpart_charset_field_array,
2599 2600 2601
                              part_info->restore_subpart_field_ptrs);
  return res;
}
2602 2603


2604
int get_partition_id_list(partition_info *part_info,
2605 2606
                          uint32 *part_id,
                          longlong *func_value)
2607 2608
{
  LIST_PART_ENTRY *list_array= part_info->list_array;
2609 2610 2611
  int list_index;
  int min_list_index= 0;
  int max_list_index= part_info->no_list_values - 1;
2612
  longlong part_func_value= part_val_int(part_info->part_expr);
2613
  longlong list_value;
2614
  bool unsigned_flag= part_info->part_expr->unsigned_flag;
2615 2616
  DBUG_ENTER("get_partition_id_list");

2617 2618 2619 2620 2621 2622 2623 2624 2625
  if (part_info->part_expr->null_value)
  {
    if (part_info->has_null_value)
    {
      *part_id= part_info->has_null_part_id;
      DBUG_RETURN(0);
    }
    goto notfound;
  }
2626
  *func_value= part_func_value;
2627 2628
  if (unsigned_flag)
    part_func_value-= 0x8000000000000000ULL;
2629 2630 2631 2632 2633 2634 2635
  while (max_list_index >= min_list_index)
  {
    list_index= (max_list_index + min_list_index) >> 1;
    list_value= list_array[list_index].list_value;
    if (list_value < part_func_value)
      min_list_index= list_index + 1;
    else if (list_value > part_func_value)
2636 2637 2638
    {
      if (!list_index)
        goto notfound;
2639
      max_list_index= list_index - 1;
2640 2641 2642
    }
    else
    {
2643
      *part_id= (uint32)list_array[list_index].partition_id;
2644
      DBUG_RETURN(0);
2645 2646
    }
  }
2647
notfound:
2648
  *part_id= 0;
2649
  DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND);
2650 2651 2652
}


2653
/*
2654 2655
  Find the sub-array part_info->list_array that corresponds to given interval

2656 2657 2658 2659 2660 2661 2662 2663
  SYNOPSIS 
    get_list_array_idx_for_endpoint()
      part_info         Partitioning info (partitioning type must be LIST)
      left_endpoint     TRUE  - the interval is [a; +inf) or (a; +inf)
                        FALSE - the interval is (-inf; a] or (-inf; a)
      include_endpoint  TRUE iff the interval includes the endpoint

  DESCRIPTION
2664
    This function finds the sub-array of part_info->list_array where values of
2665 2666 2667
    list_array[idx].list_value are contained within the specifed interval.
    list_array is ordered by list_value, so
    1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the 
2668
       sought sub-array starts at some index idx and continues till array end.
2669 2670 2671 2672
       The function returns first number idx, such that 
       list_array[idx].list_value is contained within the passed interval.
       
    2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the
2673
       sought sub-array starts at array start and continues till some last 
2674 2675 2676 2677 2678 2679 2680
       index idx.
       The function returns first number idx, such that 
       list_array[idx].list_value is NOT contained within the passed interval.
       If all array elements are contained, part_info->no_list_values is
       returned.

  NOTE
2681
    The caller will call this function and then will run along the sub-array of
2682 2683 2684 2685 2686 2687
    list_array to collect partition ids. If the number of list values is 
    significantly higher then number of partitions, this could be slow and
    we could invent some other approach. The "run over list array" part is
    already wrapped in a get_next()-like function.

  RETURN
2688
    The edge of corresponding sub-array of part_info->list_array
2689 2690
*/

2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705
uint32 get_list_array_idx_for_endpoint_charset(partition_info *part_info,
                                               bool left_endpoint,
                                               bool include_endpoint)
{
  uint32 res;
  copy_to_part_field_buffers(part_info->part_field_array,
                             part_info->part_field_buffers,
                             part_info->restore_part_field_ptrs);
  res= get_list_array_idx_for_endpoint(part_info, left_endpoint,
                                       include_endpoint);
  restore_part_field_pointers(part_info->part_field_array,
                              part_info->restore_part_field_ptrs);
  return res;
}

2706 2707 2708 2709 2710 2711 2712
uint32 get_list_array_idx_for_endpoint(partition_info *part_info,
                                       bool left_endpoint,
                                       bool include_endpoint)
{
  LIST_PART_ENTRY *list_array= part_info->list_array;
  uint list_index;
  uint min_list_index= 0, max_list_index= part_info->no_list_values - 1;
2713
  longlong list_value;
2714
  /* Get the partitioning function value for the endpoint */
2715
  longlong part_func_value= part_val_int(part_info->part_expr);
2716 2717 2718
  bool unsigned_flag= part_info->part_expr->unsigned_flag;
  DBUG_ENTER("get_list_array_idx_for_endpoint");

2719 2720 2721 2722
  if (part_info->part_expr->null_value)
  {
    DBUG_RETURN(0);
  }
2723 2724
  if (unsigned_flag)
    part_func_value-= 0x8000000000000000ULL;
2725 2726
  DBUG_ASSERT(part_info->no_list_values);
  do
2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741
  {
    list_index= (max_list_index + min_list_index) >> 1;
    list_value= list_array[list_index].list_value;
    if (list_value < part_func_value)
      min_list_index= list_index + 1;
    else if (list_value > part_func_value)
    {
      if (!list_index)
        goto notfound;
      max_list_index= list_index - 1;
    }
    else 
    {
      DBUG_RETURN(list_index + test(left_endpoint ^ include_endpoint));
    }
2742
  } while (max_list_index >= min_list_index);
2743 2744 2745 2746 2747 2748
notfound:
  if (list_value < part_func_value)
    list_index++;
  DBUG_RETURN(list_index);
}

2749

2750
int get_partition_id_range(partition_info *part_info,
2751 2752
                           uint32 *part_id,
                           longlong *func_value)
2753 2754 2755
{
  longlong *range_array= part_info->range_int_array;
  uint max_partition= part_info->no_parts - 1;
2756 2757 2758
  uint min_part_id= 0;
  uint max_part_id= max_partition;
  uint loc_part_id;
2759
  longlong part_func_value= part_val_int(part_info->part_expr);
2760
  bool unsigned_flag= part_info->part_expr->unsigned_flag;
2761
  DBUG_ENTER("get_partition_id_range");
2762

2763 2764 2765 2766 2767
  if (part_info->part_expr->null_value)
  {
    *part_id= 0;
    DBUG_RETURN(0);
  }
2768
  *func_value= part_func_value;
2769 2770
  if (unsigned_flag)
    part_func_value-= 0x8000000000000000ULL;
2771 2772 2773
  while (max_part_id > min_part_id)
  {
    loc_part_id= (max_part_id + min_part_id + 1) >> 1;
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2774
    if (range_array[loc_part_id] <= part_func_value)
2775 2776 2777 2778 2779 2780 2781 2782 2783
      min_part_id= loc_part_id + 1;
    else
      max_part_id= loc_part_id - 1;
  }
  loc_part_id= max_part_id;
  if (part_func_value >= range_array[loc_part_id])
    if (loc_part_id != max_partition)
      loc_part_id++;
  *part_id= (uint32)loc_part_id;
2784 2785 2786 2787 2788 2789
  if (loc_part_id == max_partition &&
      range_array[loc_part_id] != LONGLONG_MAX &&
      part_func_value >= range_array[loc_part_id])
    DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND);

  DBUG_PRINT("exit",("partition: %d", *part_id));
2790
  DBUG_RETURN(0);
2791 2792
}

2793 2794

/*
2795 2796
  Find the sub-array of part_info->range_int_array that covers given interval
 
2797 2798 2799 2800 2801 2802 2803 2804 2805
  SYNOPSIS 
    get_partition_id_range_for_endpoint()
      part_info         Partitioning info (partitioning type must be RANGE)
      left_endpoint     TRUE  - the interval is [a; +inf) or (a; +inf)
                        FALSE - the interval is (-inf; a] or (-inf; a).
      include_endpoint  TRUE <=> the endpoint itself is included in the
                        interval

  DESCRIPTION
2806
    This function finds the sub-array of part_info->range_int_array where the
2807
    elements have non-empty intersections with the given interval.
2808
 
2809 2810 2811 2812 2813 2814 2815
    A range_int_array element at index idx represents the interval
      
      [range_int_array[idx-1], range_int_array[idx]),

    intervals are disjoint and ordered by their right bound, so
    
    1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the
2816
       sought sub-array starts at some index idx and continues till array end.
2817 2818 2819 2820 2821
       The function returns first number idx, such that the interval
       represented by range_int_array[idx] has non empty intersection with 
       the passed interval.
       
    2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the
2822
       sought sub-array starts at array start and continues till some last
2823 2824 2825 2826 2827 2828 2829 2830 2831
       index idx.
       The function returns first number idx, such that the interval
       represented by range_int_array[idx] has EMPTY intersection with the
       passed interval.
       If the interval represented by the last array element has non-empty 
       intersection with the passed interval, part_info->no_parts is
       returned.
       
  RETURN
2832
    The edge of corresponding part_info->range_int_array sub-array.
2833 2834
*/

2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850
static uint32
get_partition_id_range_for_endpoint_charset(partition_info *part_info,
                                            bool left_endpoint,
                                            bool include_endpoint)
{
  uint32 res;
  copy_to_part_field_buffers(part_info->part_field_array,
                             part_info->part_field_buffers,
                             part_info->restore_part_field_ptrs);
  res= get_partition_id_range_for_endpoint(part_info, left_endpoint,
                                           include_endpoint);
  restore_part_field_pointers(part_info->part_field_array,
                              part_info->restore_part_field_ptrs);
  return res;
}

2851 2852 2853 2854 2855 2856 2857
uint32 get_partition_id_range_for_endpoint(partition_info *part_info,
                                           bool left_endpoint,
                                           bool include_endpoint)
{
  longlong *range_array= part_info->range_int_array;
  uint max_partition= part_info->no_parts - 1;
  uint min_part_id= 0, max_part_id= max_partition, loc_part_id;
2858
  /* Get the partitioning function value for the endpoint */
2859
  longlong part_func_value= part_val_int(part_info->part_expr);
2860 2861
  bool unsigned_flag= part_info->part_expr->unsigned_flag;
  DBUG_ENTER("get_partition_id_range_for_endpoint");
2862

2863 2864 2865 2866 2867 2868 2869
  if (part_info->part_expr->null_value)
  {
    uint32 ret_part_id= 0;
    if (!left_endpoint && include_endpoint)
      ret_part_id= 1;
    DBUG_RETURN(ret_part_id);
  }
2870 2871
  if (unsigned_flag)
    part_func_value-= 0x8000000000000000ULL;
2872 2873 2874
  while (max_part_id > min_part_id)
  {
    loc_part_id= (max_part_id + min_part_id + 1) >> 1;
patg@govinda.patg.net's avatar
patg@govinda.patg.net committed
2875
    if (range_array[loc_part_id] <= part_func_value)
2876 2877 2878 2879 2880 2881 2882 2883
      min_part_id= loc_part_id + 1;
    else
      max_part_id= loc_part_id - 1;
  }
  loc_part_id= max_part_id;
  if (loc_part_id < max_partition && 
      part_func_value >= range_array[loc_part_id+1])
  {
2884
   loc_part_id++;
2885 2886 2887 2888 2889 2890 2891 2892
  }
  if (left_endpoint)
  {
    if (part_func_value >= range_array[loc_part_id])
      loc_part_id++;
  }
  else 
  {
2893 2894 2895 2896 2897 2898 2899
    if (loc_part_id < max_partition)
    {
      if (part_func_value == range_array[loc_part_id])
        loc_part_id += test(include_endpoint);
      else if (part_func_value > range_array[loc_part_id])
        loc_part_id++;
    }
2900 2901 2902 2903 2904 2905
    loc_part_id++;
  }
  DBUG_RETURN(loc_part_id);
}


2906
int get_partition_id_hash_nosub(partition_info *part_info,
2907 2908
                                 uint32 *part_id,
                                 longlong *func_value)
2909
{
2910 2911
  *part_id= get_part_id_hash(part_info->no_parts, part_info->part_expr,
                             func_value);
2912
  return 0;
2913 2914 2915
}


2916
int get_partition_id_linear_hash_nosub(partition_info *part_info,
2917 2918
                                        uint32 *part_id,
                                        longlong *func_value)
2919 2920
{
  *part_id= get_part_id_linear_hash(part_info, part_info->no_parts,
2921
                                    part_info->part_expr, func_value);
2922
  return 0;
2923 2924 2925
}


2926
int get_partition_id_key_nosub(partition_info *part_info,
2927 2928
                                uint32 *part_id,
                                longlong *func_value)
2929
{
2930 2931
  *part_id= get_part_id_key(part_info->part_field_array,
                            part_info->no_parts, func_value);
2932
  return 0;
2933 2934 2935
}


2936
int get_partition_id_linear_key_nosub(partition_info *part_info,
2937 2938
                                       uint32 *part_id,
                                       longlong *func_value)
2939 2940 2941
{
  *part_id= get_part_id_linear_key(part_info,
                                   part_info->part_field_array,
2942
                                   part_info->no_parts, func_value);
2943
  return 0;
2944 2945 2946
}


2947
int get_partition_id_range_sub_hash(partition_info *part_info,
2948 2949
                                     uint32 *part_id,
                                     longlong *func_value)
2950 2951 2952
{
  uint32 loc_part_id, sub_part_id;
  uint no_subparts;
2953
  longlong local_func_value;
2954
  int error;
2955
  DBUG_ENTER("get_partition_id_range_sub_hash");
2956

2957 2958
  if (unlikely((error= get_partition_id_range(part_info, &loc_part_id,
                                              func_value))))
2959
  {
2960
    DBUG_RETURN(error);
2961 2962
  }
  no_subparts= part_info->no_subparts;
2963 2964
  sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr,
                                &local_func_value);
2965
  *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
2966
  DBUG_RETURN(0);
2967 2968 2969
}


2970
int get_partition_id_range_sub_linear_hash(partition_info *part_info,
2971 2972
                                            uint32 *part_id,
                                            longlong *func_value)
2973 2974 2975
{
  uint32 loc_part_id, sub_part_id;
  uint no_subparts;
2976
  longlong local_func_value;
2977
  int error;
2978
  DBUG_ENTER("get_partition_id_range_sub_linear_hash");
2979

2980 2981
  if (unlikely((error= get_partition_id_range(part_info, &loc_part_id,
                                              func_value))))
2982
  {
2983
    DBUG_RETURN(error);
2984 2985 2986
  }
  no_subparts= part_info->no_subparts;
  sub_part_id= get_part_id_linear_hash(part_info, no_subparts,
2987 2988
                                       part_info->subpart_expr,
                                       &local_func_value);
2989
  *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
2990
  DBUG_RETURN(0);
2991 2992 2993
}


2994
int get_partition_id_range_sub_key(partition_info *part_info,
2995 2996
                                    uint32 *part_id,
                                    longlong *func_value)
2997 2998 2999
{
  uint32 loc_part_id, sub_part_id;
  uint no_subparts;
3000
  longlong local_func_value;
3001
  int error;
3002
  DBUG_ENTER("get_partition_id_range_sub_key");
3003

3004 3005
  if (unlikely((error= get_partition_id_range(part_info, &loc_part_id,
                                              func_value))))
3006
  {
3007
    DBUG_RETURN(error);
3008 3009
  }
  no_subparts= part_info->no_subparts;
3010 3011
  sub_part_id= get_part_id_key(part_info->subpart_field_array,
                               no_subparts, &local_func_value);
3012
  *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
3013
  DBUG_RETURN(0);
3014 3015 3016
}


3017
int get_partition_id_range_sub_linear_key(partition_info *part_info,
3018 3019
                                           uint32 *part_id,
                                           longlong *func_value)
3020 3021 3022
{
  uint32 loc_part_id, sub_part_id;
  uint no_subparts;
3023
  longlong local_func_value;
3024
  int error;
3025
  DBUG_ENTER("get_partition_id_range_sub_linear_key");
3026

3027 3028
  if (unlikely((error= get_partition_id_range(part_info, &loc_part_id,
                                              func_value))))
3029
  {
3030
    DBUG_RETURN(error);
3031 3032 3033 3034
  }
  no_subparts= part_info->no_subparts;
  sub_part_id= get_part_id_linear_key(part_info,
                                      part_info->subpart_field_array,
3035
                                      no_subparts, &local_func_value);
3036
  *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
3037
  DBUG_RETURN(0);
3038 3039 3040
}


3041
int get_partition_id_list_sub_hash(partition_info *part_info,
3042 3043
                                    uint32 *part_id,
                                    longlong *func_value)
3044 3045 3046
{
  uint32 loc_part_id, sub_part_id;
  uint no_subparts;
3047
  longlong local_func_value;
3048
  int error;
3049
  DBUG_ENTER("get_partition_id_list_sub_hash");
3050

3051 3052
  if (unlikely((error= get_partition_id_list(part_info, &loc_part_id,
                                             func_value))))
3053
  {
3054
    DBUG_RETURN(error);
3055 3056
  }
  no_subparts= part_info->no_subparts;
3057 3058
  sub_part_id= get_part_id_hash(no_subparts, part_info->subpart_expr,
                                &local_func_value);
3059
  *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
3060
  DBUG_RETURN(0);
3061 3062 3063
}


3064
int get_partition_id_list_sub_linear_hash(partition_info *part_info,
3065 3066
                                           uint32 *part_id,
                                           longlong *func_value)
3067 3068 3069
{
  uint32 loc_part_id, sub_part_id;
  uint no_subparts;
3070
  longlong local_func_value;
3071
  int error;
3072
  DBUG_ENTER("get_partition_id_list_sub_linear_hash");
3073

3074 3075
  if (unlikely((error= get_partition_id_list(part_info, &loc_part_id,
                                             func_value))))
3076
  {
3077
    DBUG_RETURN(error);
3078 3079
  }
  no_subparts= part_info->no_subparts;
3080 3081 3082
  sub_part_id= get_part_id_linear_hash(part_info, no_subparts,
                                       part_info->subpart_expr,
                                       &local_func_value);
3083
  *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
3084
  DBUG_RETURN(0);
3085 3086 3087
}


3088
int get_partition_id_list_sub_key(partition_info *part_info,
3089 3090
                                   uint32 *part_id,
                                   longlong *func_value)
3091 3092 3093
{
  uint32 loc_part_id, sub_part_id;
  uint no_subparts;
3094
  longlong local_func_value;
3095
  int error;
3096
  DBUG_ENTER("get_partition_id_range_sub_key");
3097

3098 3099
  if (unlikely((error= get_partition_id_list(part_info, &loc_part_id,
                                             func_value))))
3100
  {
3101
    DBUG_RETURN(error);
3102 3103
  }
  no_subparts= part_info->no_subparts;
3104 3105
  sub_part_id= get_part_id_key(part_info->subpart_field_array,
                               no_subparts, &local_func_value);
3106
  *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
3107
  DBUG_RETURN(0);
3108 3109 3110
}


3111
int get_partition_id_list_sub_linear_key(partition_info *part_info,
3112 3113
                                          uint32 *part_id,
                                          longlong *func_value)
3114 3115 3116
{
  uint32 loc_part_id, sub_part_id;
  uint no_subparts;
3117
  longlong local_func_value;
3118
  int error;
3119
  DBUG_ENTER("get_partition_id_list_sub_linear_key");
3120

3121 3122
  if (unlikely((error= get_partition_id_list(part_info, &loc_part_id,
                                             func_value))))
3123
  {
3124
    DBUG_RETURN(error);
3125 3126 3127 3128
  }
  no_subparts= part_info->no_subparts;
  sub_part_id= get_part_id_linear_key(part_info,
                                      part_info->subpart_field_array,
3129
                                      no_subparts, &local_func_value);
3130
  *part_id= get_part_id_for_sub(loc_part_id, sub_part_id, no_subparts);
3131
  DBUG_RETURN(0);
3132 3133 3134 3135 3136
}


/*
  This function is used to calculate the subpartition id
3137

3138 3139 3140 3141
  SYNOPSIS
    get_subpartition_id()
    part_info           A reference to the partition_info struct where all the
                        desired information is given
3142

3143
  RETURN VALUE
3144 3145
    part_id             The subpartition identity

3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160
  DESCRIPTION
    A routine used in some SELECT's when only partial knowledge of the
    partitions is known.
    
    It is actually 4 different variants of this function which are called
    through a function pointer.

    get_partition_id_hash_sub
    get_partition_id_key_sub
    get_partition_id_linear_hash_sub
    get_partition_id_linear_key_sub
*/

uint32 get_partition_id_hash_sub(partition_info *part_info)
{
3161 3162 3163
  longlong func_value;
  return get_part_id_hash(part_info->no_subparts, part_info->subpart_expr,
                          &func_value);
3164 3165 3166 3167 3168
}


uint32 get_partition_id_linear_hash_sub(partition_info *part_info)
{
3169
  longlong func_value;
3170
  return get_part_id_linear_hash(part_info, part_info->no_subparts,
3171
                                 part_info->subpart_expr, &func_value);
3172 3173 3174 3175 3176
}


uint32 get_partition_id_key_sub(partition_info *part_info)
{
3177
  longlong func_value;
3178
  return get_part_id_key(part_info->subpart_field_array,
3179
                         part_info->no_subparts, &func_value);
3180 3181 3182 3183 3184
}


uint32 get_partition_id_linear_key_sub(partition_info *part_info)
{
3185
  longlong func_value;
3186 3187
  return get_part_id_linear_key(part_info,
                                part_info->subpart_field_array,
3188
                                part_info->no_subparts, &func_value);
3189 3190 3191 3192
}


/*
3193 3194
  Set an indicator on all partition fields that are set by the key

3195 3196 3197 3198
  SYNOPSIS
    set_PF_fields_in_key()
    key_info                   Information about the index
    key_length                 Length of key
3199

3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239
  RETURN VALUE
    TRUE                       Found partition field set by key
    FALSE                      No partition field set by key
*/

static bool set_PF_fields_in_key(KEY *key_info, uint key_length)
{
  KEY_PART_INFO *key_part;
  bool found_part_field= FALSE;
  DBUG_ENTER("set_PF_fields_in_key");

  for (key_part= key_info->key_part; (int)key_length > 0; key_part++)
  {
    if (key_part->null_bit)
      key_length--;
    if (key_part->type == HA_KEYTYPE_BIT)
    {
      if (((Field_bit*)key_part->field)->bit_len)
        key_length--;
    }
    if (key_part->key_part_flag & (HA_BLOB_PART + HA_VAR_LENGTH_PART))
    {
      key_length-= HA_KEY_BLOB_LENGTH;
    }
    if (key_length < key_part->length)
      break;
    key_length-= key_part->length;
    if (key_part->field->flags & FIELD_IN_PART_FUNC_FLAG)
    {
      found_part_field= TRUE;
      key_part->field->flags|= GET_FIXED_FIELDS_FLAG;
    }
  }
  DBUG_RETURN(found_part_field);
}


/*
  We have found that at least one partition field was set by a key, now
  check if a partition function has all its fields bound or not.
3240

3241 3242 3243
  SYNOPSIS
    check_part_func_bound()
    ptr                     Array of fields NULL terminated (partition fields)
3244

3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269
  RETURN VALUE
    TRUE                    All fields in partition function are set
    FALSE                   Not all fields in partition function are set
*/

static bool check_part_func_bound(Field **ptr)
{
  bool result= TRUE;
  DBUG_ENTER("check_part_func_bound");

  for (; *ptr; ptr++)
  {
    if (!((*ptr)->flags & GET_FIXED_FIELDS_FLAG))
    {
      result= FALSE;
      break;
    }
  }
  DBUG_RETURN(result);
}


/*
  Get the id of the subpartitioning part by using the key buffer of the
  index scan.
3270

3271 3272 3273 3274 3275 3276
  SYNOPSIS
    get_sub_part_id_from_key()
    table         The table object
    buf           A buffer that can be used to evaluate the partition function
    key_info      The index object
    key_spec      A key_range containing key and key length
3277

3278 3279
  RETURN VALUES
    part_id       Subpartition id to use
3280

3281 3282 3283 3284 3285 3286 3287 3288 3289 3290
  DESCRIPTION
    Use key buffer to set-up record in buf, move field pointers and
    get the partition identity and restore field pointers afterwards.
*/

static uint32 get_sub_part_id_from_key(const TABLE *table,byte *buf,
                                       KEY *key_info,
                                       const key_range *key_spec)
{
  byte *rec0= table->record[0];
3291
  partition_info *part_info= table->part_info;
3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310
  uint32 part_id;
  DBUG_ENTER("get_sub_part_id_from_key");

  key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length);
  if (likely(rec0 == buf))
    part_id= part_info->get_subpartition_id(part_info);
  else
  {
    Field **part_field_array= part_info->subpart_field_array;
    set_field_ptr(part_field_array, buf, rec0);
    part_id= part_info->get_subpartition_id(part_info);
    set_field_ptr(part_field_array, rec0, buf);
  }
  DBUG_RETURN(part_id);
}

/*
  Get the id of the partitioning part by using the key buffer of the
  index scan.
3311

3312 3313 3314 3315 3316 3317
  SYNOPSIS
    get_part_id_from_key()
    table         The table object
    buf           A buffer that can be used to evaluate the partition function
    key_info      The index object
    key_spec      A key_range containing key and key length
3318 3319
    out:part_id   Partition to use

3320 3321 3322
  RETURN VALUES
    TRUE          Partition to use not found
    FALSE         Ok, part_id indicates partition to use
3323

3324 3325 3326 3327
  DESCRIPTION
    Use key buffer to set-up record in buf, move field pointers and
    get the partition identity and restore field pointers afterwards.
*/
3328

3329 3330 3331 3332 3333
bool get_part_id_from_key(const TABLE *table, byte *buf, KEY *key_info,
                          const key_range *key_spec, uint32 *part_id)
{
  bool result;
  byte *rec0= table->record[0];
3334
  partition_info *part_info= table->part_info;
3335
  longlong func_value;
3336 3337 3338 3339
  DBUG_ENTER("get_part_id_from_key");

  key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length);
  if (likely(rec0 == buf))
3340 3341
    result= part_info->get_part_partition_id(part_info, part_id,
                                             &func_value);
3342 3343 3344 3345
  else
  {
    Field **part_field_array= part_info->part_field_array;
    set_field_ptr(part_field_array, buf, rec0);
3346 3347
    result= part_info->get_part_partition_id(part_info, part_id,
                                             &func_value);
3348 3349 3350 3351 3352 3353 3354 3355
    set_field_ptr(part_field_array, rec0, buf);
  }
  DBUG_RETURN(result);
}

/*
  Get the partitioning id of the full PF by using the key buffer of the
  index scan.
3356

3357 3358 3359 3360 3361 3362
  SYNOPSIS
    get_full_part_id_from_key()
    table         The table object
    buf           A buffer that is used to evaluate the partition function
    key_info      The index object
    key_spec      A key_range containing key and key length
3363 3364
    out:part_spec A partition id containing start part and end part

3365 3366 3367
  RETURN VALUES
    part_spec
    No partitions to scan is indicated by end_part > start_part when returning
3368

3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379
  DESCRIPTION
    Use key buffer to set-up record in buf, move field pointers if needed and
    get the partition identity and restore field pointers afterwards.
*/

void get_full_part_id_from_key(const TABLE *table, byte *buf,
                               KEY *key_info,
                               const key_range *key_spec,
                               part_id_range *part_spec)
{
  bool result;
3380
  partition_info *part_info= table->part_info;
3381
  byte *rec0= table->record[0];
3382
  longlong func_value;
3383 3384 3385 3386
  DBUG_ENTER("get_full_part_id_from_key");

  key_restore(buf, (byte*)key_spec->key, key_info, key_spec->length);
  if (likely(rec0 == buf))
3387 3388
    result= part_info->get_partition_id(part_info, &part_spec->start_part,
                                        &func_value);
3389 3390 3391 3392
  else
  {
    Field **part_field_array= part_info->full_part_field_array;
    set_field_ptr(part_field_array, buf, rec0);
3393 3394
    result= part_info->get_partition_id(part_info, &part_spec->start_part,
                                        &func_value);
3395 3396 3397 3398 3399 3400 3401
    set_field_ptr(part_field_array, rec0, buf);
  }
  part_spec->end_part= part_spec->start_part;
  if (unlikely(result))
    part_spec->start_part++;
  DBUG_VOID_RETURN;
}
3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438

/*
  Prune the set of partitions to use in query 

  SYNOPSIS
    prune_partition_set()
    table         The table object
    out:part_spec Contains start part, end part 

  DESCRIPTION
    This function is called to prune the range of partitions to scan by
    checking the used_partitions bitmap.
    If start_part > end_part at return it means no partition needs to be
    scanned. If start_part == end_part it always means a single partition
    needs to be scanned.

  RETURN VALUE
    part_spec
*/
void prune_partition_set(const TABLE *table, part_id_range *part_spec)
{
  int last_partition= -1;
  uint i;
  partition_info *part_info= table->part_info;

  DBUG_ENTER("prune_partition_set");
  for (i= part_spec->start_part; i <= part_spec->end_part; i++)
  {
    if (bitmap_is_set(&(part_info->used_partitions), i))
    {
      DBUG_PRINT("info", ("Partition %d is set", i));
      if (last_partition == -1)
        /* First partition found in set and pruned bitmap */
        part_spec->start_part= i;
      last_partition= i;
    }
  }
mskold@mysql.com's avatar
mskold@mysql.com committed
3439 3440 3441 3442
  if (last_partition == -1)
    /* No partition found in pruned bitmap */
    part_spec->start_part= part_spec->end_part + 1;  
  else //if (last_partition != -1)
3443 3444 3445 3446 3447
    part_spec->end_part= last_partition;

  DBUG_VOID_RETURN;
}

3448 3449
/*
  Get the set of partitions to use in query.
3450

3451 3452 3453 3454 3455 3456
  SYNOPSIS
    get_partition_set()
    table         The table object
    buf           A buffer that can be used to evaluate the partition function
    index         The index of the key used, if MAX_KEY no index used
    key_spec      A key_range containing key and key length
3457
    out:part_spec Contains start part, end part and indicator if bitmap is
3458
                  used for which partitions to scan
3459

3460 3461 3462 3463 3464 3465 3466 3467 3468
  DESCRIPTION
    This function is called to discover which partitions to use in an index
    scan or a full table scan.
    It returns a range of partitions to scan. If there are holes in this
    range with partitions that are not needed to scan a bit array is used
    to signal which partitions to use and which not to use.
    If start_part > end_part at return it means no partition needs to be
    scanned. If start_part == end_part it always means a single partition
    needs to be scanned.
3469

3470 3471 3472 3473 3474 3475
  RETURN VALUE
    part_spec
*/
void get_partition_set(const TABLE *table, byte *buf, const uint index,
                       const key_range *key_spec, part_id_range *part_spec)
{
3476
  partition_info *part_info= table->part_info;
3477
  uint no_parts= part_info->get_tot_partitions();
3478
  uint i, part_id;
3479 3480
  uint sub_part= no_parts;
  uint32 part_part= no_parts;
3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510
  KEY *key_info= NULL;
  bool found_part_field= FALSE;
  DBUG_ENTER("get_partition_set");

  part_spec->start_part= 0;
  part_spec->end_part= no_parts - 1;
  if ((index < MAX_KEY) && 
       key_spec->flag == (uint)HA_READ_KEY_EXACT &&
       part_info->some_fields_in_PF.is_set(index))
  {
    key_info= table->key_info+index;
    /*
      The index can potentially provide at least one PF-field (field in the
      partition function). Thus it is interesting to continue our probe.
    */
    if (key_spec->length == key_info->key_length)
    {
      /*
        The entire key is set so we can check whether we can immediately
        derive either the complete PF or if we can derive either
        the top PF or the subpartitioning PF. This can be established by
        checking precalculated bits on each index.
      */
      if (part_info->all_fields_in_PF.is_set(index))
      {
        /*
          We can derive the exact partition to use, no more than this one
          is needed.
        */
        get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec);
3511 3512 3513 3514
        /*
          Check if range can be adjusted by looking in used_partitions
        */
        prune_partition_set(table, part_spec);
3515 3516
        DBUG_VOID_RETURN;
      }
3517
      else if (part_info->is_sub_partitioned())
3518 3519 3520 3521 3522
      {
        if (part_info->all_fields_in_SPF.is_set(index))
          sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec);
        else if (part_info->all_fields_in_PPF.is_set(index))
        {
3523 3524
          if (get_part_id_from_key(table,buf,key_info,
                                   key_spec,(uint32*)&part_part))
3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556
          {
            /*
              The value of the RANGE or LIST partitioning was outside of
              allowed values. Thus it is certain that the result of this
              scan will be empty.
            */
            part_spec->start_part= no_parts;
            DBUG_VOID_RETURN;
          }
        }
      }
    }
    else
    {
      /*
        Set an indicator on all partition fields that are bound.
        If at least one PF-field was bound it pays off to check whether
        the PF or PPF or SPF has been bound.
        (PF = Partition Function, SPF = Subpartition Function and
         PPF = Partition Function part of subpartitioning)
      */
      if ((found_part_field= set_PF_fields_in_key(key_info,
                                                  key_spec->length)))
      {
        if (check_part_func_bound(part_info->full_part_field_array))
        {
          /*
            We were able to bind all fields in the partition function even
            by using only a part of the key. Calculate the partition to use.
          */
          get_full_part_id_from_key(table,buf,key_info,key_spec,part_spec);
          clear_indicator_in_key_fields(key_info);
3557 3558 3559 3560
          /*
            Check if range can be adjusted by looking in used_partitions
          */
          prune_partition_set(table, part_spec);
3561 3562
          DBUG_VOID_RETURN; 
        }
3563
        else if (part_info->is_sub_partitioned())
3564
        {
3565 3566 3567
          if (check_part_func_bound(part_info->subpart_field_array))
            sub_part= get_sub_part_id_from_key(table, buf, key_info, key_spec);
          else if (check_part_func_bound(part_info->part_field_array))
3568
          {
3569 3570 3571 3572 3573 3574
            if (get_part_id_from_key(table,buf,key_info,key_spec,&part_part))
            {
              part_spec->start_part= no_parts;
              clear_indicator_in_key_fields(key_info);
              DBUG_VOID_RETURN;
            }
3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603
          }
        }
      }
    }
  }
  {
    /*
      The next step is to analyse the table condition to see whether any
      information about which partitions to scan can be derived from there.
      Currently not implemented.
    */
  }
  /*
    If we come here we have found a range of sorts we have either discovered
    nothing or we have discovered a range of partitions with possible holes
    in it. We need a bitvector to further the work here.
  */
  if (!(part_part == no_parts && sub_part == no_parts))
  {
    /*
      We can only arrive here if we are using subpartitioning.
    */
    if (part_part != no_parts)
    {
      /*
        We know the top partition and need to scan all underlying
        subpartitions. This is a range without holes.
      */
      DBUG_ASSERT(sub_part == no_parts);
3604
      part_spec->start_part= part_part * part_info->no_subparts;
3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619
      part_spec->end_part= part_spec->start_part+part_info->no_subparts - 1;
    }
    else
    {
      DBUG_ASSERT(sub_part != no_parts);
      part_spec->start_part= sub_part;
      part_spec->end_part=sub_part+
                           (part_info->no_subparts*(part_info->no_parts-1));
      for (i= 0, part_id= sub_part; i < part_info->no_parts;
           i++, part_id+= part_info->no_subparts)
        ; //Set bit part_id in bit array
    }
  }
  if (found_part_field)
    clear_indicator_in_key_fields(key_info);
3620 3621 3622 3623
  /*
    Check if range can be adjusted by looking in used_partitions
  */
  prune_partition_set(table, part_spec);
3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644
  DBUG_VOID_RETURN;
}

/*
   If the table is partitioned we will read the partition info into the
   .frm file here.
   -------------------------------
   |  Fileinfo     64 bytes      |
   -------------------------------
   | Formnames     7 bytes       |
   -------------------------------
   | Not used    4021 bytes      |
   -------------------------------
   | Keyinfo + record            |
   -------------------------------
   | Padded to next multiple     |
   | of IO_SIZE                  |
   -------------------------------
   | Forminfo     288 bytes      |
   -------------------------------
   | Screen buffer, to make      |
3645
   |field names readable        |
3646 3647
   -------------------------------
   | Packed field info           |
3648
   |17 + 1 + strlen(field_name) |
3649 3650 3651 3652 3653 3654 3655 3656
   | + 1 end of file character   |
   -------------------------------
   | Partition info              |
   -------------------------------
   We provide the length of partition length in Fileinfo[55-58].

   Read the partition syntax from the frm file and parse it to get the
   data structures of the partitioning.
3657

3658 3659 3660
   SYNOPSIS
     mysql_unpack_partition()
     thd                           Thread object
3661
     part_buf                      Partition info from frm file
3662 3663
     part_info_len                 Length of partition syntax
     table                         Table object of partitioned table
3664 3665 3666
     create_table_ind              Is it called from CREATE TABLE
     default_db_type               What is the default engine of the table

3667 3668 3669
   RETURN VALUE
     TRUE                          Error
     FALSE                         Sucess
3670

3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681
   DESCRIPTION
     Read the partition syntax from the current position in the frm file.
     Initiate a LEX object, save the list of item tree objects to free after
     the query is done. Set-up partition info object such that parser knows
     it is called from internally. Call parser to create data structures
     (best possible recreation of item trees and so forth since there is no
     serialisation of these objects other than in parseable text format).
     We need to save the text of the partition functions since it is not
     possible to retrace this given an item tree.
*/

3682 3683 3684
bool mysql_unpack_partition(THD *thd,
                            const char *part_buf, uint part_info_len,
                            const char *part_state, uint part_state_len,
3685
                            TABLE* table, bool is_create_table_ind,
3686
                            handlerton *default_db_type)
3687 3688 3689
{
  bool result= TRUE;
  partition_info *part_info;
3690
  CHARSET_INFO *old_character_set_client= thd->variables.character_set_client;
3691 3692
  LEX *old_lex= thd->lex;
  LEX lex;
3693
  DBUG_ENTER("mysql_unpack_partition");
3694

3695
  thd->lex= &lex;
3696
  thd->variables.character_set_client= system_charset_info;
3697 3698 3699 3700 3701

  Lex_input_stream lip(thd, part_buf, part_info_len);
  thd->m_lip= &lip;

  lex_start(thd);
3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718
  /*
    We need to use the current SELECT_LEX since I need to keep the
    Name_resolution_context object which is referenced from the
    Item_field objects.
    This is not a nice solution since if the parser uses current_select
    for anything else it will corrupt the current LEX object.
  */
  thd->lex->current_select= old_lex->current_select; 
  /*
    All Items created is put into a free list on the THD object. This list
    is used to free all Item objects after completing a query. We don't
    want that to happen with the Item tree created as part of the partition
    info. This should be attached to the table object and remain so until
    the table object is released.
    Thus we move away the current list temporarily and start a new list that
    we then save in the partition info structure.
  */
3719
  lex.part_info= new partition_info();/* Indicates MYSQLparse from this place */
3720 3721 3722 3723 3724 3725 3726 3727
  if (!lex.part_info)
  {
    mem_alloc_error(sizeof(partition_info));
    goto end;
  }
  lex.part_info->part_state= part_state;
  lex.part_info->part_state_len= part_state_len;
  DBUG_PRINT("info", ("Parse: %s", part_buf));
3728
  if (MYSQLparse((void*)thd) || thd->is_fatal_error)
3729
  {
3730
    thd->free_items();
3731 3732
    goto end;
  }
3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748
  /*
    The parsed syntax residing in the frm file can still contain defaults.
    The reason is that the frm file is sometimes saved outside of this
    MySQL Server and used in backup and restore of clusters or partitioned
    tables. It is not certain that the restore will restore exactly the
    same default partitioning.
    
    The easiest manner of handling this is to simply continue using the
    part_info we already built up during mysql_create_table if we are
    in the process of creating a table. If the table already exists we
    need to discover the number of partitions for the default parts. Since
    the handler object hasn't been created here yet we need to postpone this
    to the fix_partition_func method.
  */

  DBUG_PRINT("info", ("Successful parse"));
3749
  part_info= lex.part_info;
3750 3751 3752
  DBUG_PRINT("info", ("default engine = %d, default_db_type = %d",
             ha_legacy_type(part_info->default_engine_type),
             ha_legacy_type(default_db_type)));
3753
  if (is_create_table_ind && old_lex->sql_command == SQLCOM_CREATE_TABLE)
3754
  {
3755
    if (old_lex->create_info.options & HA_LEX_CREATE_TABLE_LIKE)
3756 3757
    {
      /*
3758 3759 3760
        This code is executed when we create table in CREATE TABLE t1 LIKE t2.
        old_lex->query_tables contains table list element for t2 and the table
        we are opening has name t1.
3761
      */
3762 3763
      if (partition_default_handling(table, part_info, FALSE,
                                     old_lex->query_tables->table->s->path.str))
3764
      {
3765 3766
        result= TRUE;
        goto end;
3767 3768 3769
      }
    }
    else
3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782
    {
      /*
        When we come here we are doing a create table. In this case we
        have already done some preparatory work on the old part_info
        object. We don't really need this new partition_info object.
        Thus we go back to the old partition info object.
        We need to free any memory objects allocated on item_free_list
        by the parser since we are keeping the old info from the first
        parser call in CREATE TABLE.
        We'll ensure that this object isn't put into table cache also
        just to ensure we don't get into strange situations with the
        item objects.
      */
3783
      thd->free_items();
3784
      part_info= thd->work_part_info;
3785 3786
      table->s->version= 0UL;
    }
3787
  }
3788
  table->part_info= part_info;
3789
  table->file->set_part_info(part_info);
3790
  if (!part_info->default_engine_type)
3791
    part_info->default_engine_type= default_db_type;
3792
  DBUG_ASSERT(part_info->default_engine_type == default_db_type);
3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805

  {
  /*
    This code part allocates memory for the serialised item information for
    the partition functions. In most cases this is not needed but if the
    table is used for SHOW CREATE TABLES or ALTER TABLE that modifies
    partition information it is needed and the info is lost if we don't
    save it here so unfortunately we have to do it here even if in most
    cases it is not needed. This is a consequence of that item trees are
    not serialisable.
  */
    uint part_func_len= part_info->part_func_len;
    uint subpart_func_len= part_info->subpart_func_len; 
3806 3807 3808 3809
    char *part_func_string= NULL;
    char *subpart_func_string= NULL;
    if ((part_func_len &&
        !((part_func_string= thd->alloc(part_func_len)))) ||
3810
        (subpart_func_len &&
3811
        !((subpart_func_string= thd->alloc(subpart_func_len)))))
3812
    {
3813
      mem_alloc_error(part_func_len);
3814
      thd->free_items();
3815 3816
      goto end;
    }
3817 3818
    if (part_func_len)
      memcpy(part_func_string, part_info->part_func_string, part_func_len);
3819 3820 3821 3822 3823 3824 3825 3826 3827
    if (subpart_func_len)
      memcpy(subpart_func_string, part_info->subpart_func_string,
             subpart_func_len);
    part_info->part_func_string= part_func_string;
    part_info->subpart_func_string= subpart_func_string;
  }

  result= FALSE;
end:
monty@mysql.com's avatar
monty@mysql.com committed
3828
  lex_end(thd->lex);
3829
  thd->lex= old_lex;
3830
  thd->variables.character_set_client= old_character_set_client;
3831 3832
  DBUG_RETURN(result);
}
3833

3834

3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870
/*
  Set engine type on all partition element objects
  SYNOPSIS
    set_engine_all_partitions()
    part_info                  Partition info
    engine_type                Handlerton reference of engine
  RETURN VALUES
    NONE
*/

static
void
set_engine_all_partitions(partition_info *part_info,
                          handlerton *engine_type)
{
  uint i= 0;
  List_iterator<partition_element> part_it(part_info->partitions);
  do
  {
    partition_element *part_elem= part_it++;

    part_elem->engine_type= engine_type;
    if (part_info->is_sub_partitioned())
    {
      List_iterator<partition_element> sub_it(part_elem->subpartitions);
      uint j= 0;

      do
      {
        partition_element *sub_elem= sub_it++;

        sub_elem->engine_type= engine_type;
      } while (++j < part_info->no_subparts);
    }
  } while (++i < part_info->no_parts);
}
3871 3872
/*
  SYNOPSIS
3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884
    fast_end_partition()
    thd                           Thread object
    out:copied                    Number of records copied
    out:deleted                   Number of records deleted
    table_list                    Table list with the one table in it
    empty                         Has nothing been done
    lpt                           Struct to be used by error handler

  RETURN VALUES
    FALSE                         Success
    TRUE                          Failure

3885
  DESCRIPTION
3886 3887
    Support routine to handle the successful cases for partition
    management.
3888 3889
*/

3890 3891
static int fast_end_partition(THD *thd, ulonglong copied,
                              ulonglong deleted,
3892
                              TABLE *table,
3893 3894 3895
                              TABLE_LIST *table_list, bool is_empty,
                              ALTER_PARTITION_PARAM_TYPE *lpt,
                              bool written_bin_log)
3896
{
3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916
  int error;
  DBUG_ENTER("fast_end_partition");

  thd->proc_info="end";
  if (!is_empty)
    query_cache_invalidate3(thd, table_list, 0);
  error= ha_commit_stmt(thd);
  if (ha_commit(thd))
    error= 1;
  if (!error || is_empty)
  {
    char tmp_name[80];
    if ((!is_empty) && (!written_bin_log) &&
        (!thd->lex->no_write_to_binlog))
      write_bin_log(thd, FALSE, thd->query, thd->query_length);
    close_thread_tables(thd);
    my_snprintf(tmp_name, sizeof(tmp_name), ER(ER_INSERT_INFO),
                (ulong) (copied + deleted),
                (ulong) deleted,
                (ulong) 0);
3917
    send_ok(thd, (ha_rows) (copied+deleted),0L,tmp_name);
3918 3919
    DBUG_RETURN(FALSE);
  }
3920
  table->file->print_error(error, MYF(0));
3921 3922 3923 3924
  DBUG_RETURN(TRUE);
}


3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952
/*
  Check engine mix that it is correct
  SYNOPSIS
    check_engine_condition()
    p_elem                   Partition element
    default_engine           Have user specified engine on table level
    inout::engine_type       Current engine used
    inout::first             Is it first partition
  RETURN VALUE
    TRUE                     Failed check
    FALSE                    Ok
  DESCRIPTION
    (specified partition handler ) specified table handler
    (NDB, NDB) NDB           OK
    (MYISAM, MYISAM) -       OK
    (MYISAM, -)      -       NOT OK
    (MYISAM, -)    MYISAM    OK
    (- , MYISAM)   -         NOT OK
    (- , -)        MYISAM    OK
    (-,-)          -         OK
    (NDB, MYISAM) *          NOT OK
*/

static bool check_engine_condition(partition_element *p_elem,
                                   bool default_engine,
                                   handlerton **engine_type,
                                   bool *first)
{
3953 3954 3955
  DBUG_ENTER("check_engine_condition");

  DBUG_PRINT("enter", ("def_eng = %u, first = %u", default_engine, *first));
3956
  if (*first && default_engine)
3957
  {
3958
    *engine_type= p_elem->engine_type;
3959
  }
3960 3961
  *first= FALSE;
  if ((!default_engine &&
3962 3963
      (p_elem->engine_type != (*engine_type) &&
       p_elem->engine_type)) ||
3964
      (default_engine &&
3965 3966 3967 3968
       p_elem->engine_type != (*engine_type)))
  {
    DBUG_RETURN(TRUE);
  }
3969
  else
3970 3971 3972
  {
    DBUG_RETURN(FALSE);
  }
3973 3974
}

3975 3976 3977
/*
  We need to check if engine used by all partitions can handle
  partitioning natively.
3978

3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002
  SYNOPSIS
    check_native_partitioned()
    create_info            Create info in CREATE TABLE
    out:ret_val            Return value
    part_info              Partition info
    thd                    Thread object

  RETURN VALUES
  Value returned in bool ret_value
    TRUE                   Native partitioning supported by engine
    FALSE                  Need to use partition handler

  Return value from function
    TRUE                   Error
    FALSE                  Success
*/

static bool check_native_partitioned(HA_CREATE_INFO *create_info,bool *ret_val,
                                     partition_info *part_info, THD *thd)
{
  List_iterator<partition_element> part_it(part_info->partitions);
  bool first= TRUE;
  bool default_engine;
  handlerton *engine_type= create_info->db_type;
4003
  handlerton *old_engine_type= engine_type;
4004
  uint i= 0;
4005
  uint no_parts= part_info->partitions.elements;
4006 4007
  DBUG_ENTER("check_native_partitioned");

4008 4009
  default_engine= (create_info->used_fields & HA_CREATE_USED_ENGINE) ?
                   FALSE : TRUE;
4010 4011 4012
  DBUG_PRINT("info", ("engine_type = %u, default = %u",
                       ha_legacy_type(engine_type),
                       default_engine));
4013
  if (no_parts)
4014
  {
4015
    do
4016
    {
4017
      partition_element *part_elem= part_it++;
4018
      if (part_info->is_sub_partitioned() &&
4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045
          part_elem->subpartitions.elements)
      {
        uint no_subparts= part_elem->subpartitions.elements;
        uint j= 0;
        List_iterator<partition_element> sub_it(part_elem->subpartitions);
        do
        {
          partition_element *sub_elem= sub_it++;
          if (check_engine_condition(sub_elem, default_engine,
                                     &engine_type, &first))
            goto error;
        } while (++j < no_subparts);
        /*
          In case of subpartitioning and defaults we allow that only
          subparts have specified engines, as long as the parts haven't
          specified the wrong engine it's ok.
        */
        if (check_engine_condition(part_elem, FALSE,
                                   &engine_type, &first))
          goto error;
      }
      else if (check_engine_condition(part_elem, default_engine,
                                      &engine_type, &first))
        goto error;
    } while (++i < no_parts);
  }

4046 4047 4048 4049
  /*
    All engines are of the same type. Check if this engine supports
    native partitioning.
  */
4050 4051 4052 4053 4054

  if (!engine_type)
    engine_type= old_engine_type;
  DBUG_PRINT("info", ("engine_type = %s",
              ha_resolve_storage_engine_name(engine_type)));
4055 4056 4057 4058 4059 4060 4061 4062
  if (engine_type->partition_flags &&
      (engine_type->partition_flags() & HA_CAN_PARTITION))
  {
    create_info->db_type= engine_type;
    DBUG_PRINT("info", ("Changed to native partitioning"));
    *ret_val= TRUE;
  }
  DBUG_RETURN(FALSE);
4063 4064 4065 4066 4067
error:
  /*
    Mixed engines not yet supported but when supported it will need
    the partition handler
  */
4068
  my_error(ER_MIX_HANDLER_ERROR, MYF(0));
4069 4070
  *ret_val= FALSE;
  DBUG_RETURN(TRUE);
4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103
}


/*
  Prepare for ALTER TABLE of partition structure

  SYNOPSIS
    prep_alter_part_table()
    thd                        Thread object
    table                      Table object
    inout:alter_info           Alter information
    inout:create_info          Create info for CREATE TABLE
    old_db_type                Old engine type
    out:partition_changed      Boolean indicating whether partition changed
    out:fast_alter_partition   Boolean indicating whether fast partition
                               change is requested

  RETURN VALUES
    TRUE                       Error
    FALSE                      Success
    partition_changed
    fast_alter_partition

  DESCRIPTION
    This method handles all preparations for ALTER TABLE for partitioned
    tables
    We need to handle both partition management command such as Add Partition
    and others here as well as an ALTER TABLE that completely changes the
    partitioning and yet others that don't change anything at all. We start
    by checking the partition management variants and then check the general
    change patterns.
*/

4104
uint prep_alter_part_table(THD *thd, TABLE *table, Alter_info *alter_info,
4105 4106 4107 4108 4109 4110 4111
                           HA_CREATE_INFO *create_info,
                           handlerton *old_db_type,
                           bool *partition_changed,
                           uint *fast_alter_partition)
{
  DBUG_ENTER("prep_alter_part_table");

4112 4113 4114 4115 4116 4117 4118 4119 4120
  /*
    We are going to manipulate the partition info on the table object
    so we need to ensure that the data structure of the table object
    is freed by setting version to 0. table->s->version= 0 forces a
    flush of the table object in close_thread_tables().
  */
  if (table->part_info)
    table->s->version= 0L;

4121 4122 4123
  thd->work_part_info= thd->lex->part_info;
  if (thd->work_part_info &&
      !(thd->work_part_info= thd->lex->part_info->get_clone()))
4124 4125
    DBUG_RETURN(TRUE);

4126 4127 4128 4129 4130 4131 4132 4133
  if (alter_info->flags &
      (ALTER_ADD_PARTITION | ALTER_DROP_PARTITION |
       ALTER_COALESCE_PARTITION | ALTER_REORGANIZE_PARTITION |
       ALTER_TABLE_REORG | ALTER_OPTIMIZE_PARTITION |
       ALTER_CHECK_PARTITION | ALTER_ANALYZE_PARTITION |
       ALTER_REPAIR_PARTITION | ALTER_REBUILD_PARTITION))
  {
    partition_info *tab_part_info= table->part_info;
4134
    partition_info *alt_part_info= thd->work_part_info;
4135
    uint flags= 0;
4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149
    if (!tab_part_info)
    {
      my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0));
      DBUG_RETURN(TRUE);
    }
    if (alter_info->flags == ALTER_TABLE_REORG)
    {
      uint new_part_no, curr_part_no;
      if (tab_part_info->part_type != HASH_PARTITION ||
          tab_part_info->use_default_no_partitions)
      {
        my_error(ER_REORG_NO_PARAM_ERROR, MYF(0));
        DBUG_RETURN(TRUE);
      }
4150
      new_part_no= table->file->get_default_no_partitions(create_info);
4151 4152 4153 4154 4155 4156 4157 4158
      curr_part_no= tab_part_info->no_parts;
      if (new_part_no == curr_part_no)
      {
        /*
          No change is needed, we will have the same number of partitions
          after the change as before. Thus we can reply ok immediately
          without any changes at all.
        */
4159 4160
        DBUG_RETURN(fast_end_partition(thd, ULL(0), ULL(0),
                                       table, NULL,
4161 4162 4163 4164 4165 4166 4167 4168 4169
                                       TRUE, NULL, FALSE));
      }
      else if (new_part_no > curr_part_no)
      {
        /*
          We will add more partitions, we use the ADD PARTITION without
          setting the flag for no default number of partitions
        */
        alter_info->flags|= ALTER_ADD_PARTITION;
4170
        thd->work_part_info->no_parts= new_part_no - curr_part_no;
4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181
      }
      else
      {
        /*
          We will remove hash partitions, we use the COALESCE PARTITION
          without setting the flag for no default number of partitions
        */
        alter_info->flags|= ALTER_COALESCE_PARTITION;
        alter_info->no_parts= curr_part_no - new_part_no;
      }
    }
antony@ppcg5.local's avatar
antony@ppcg5.local committed
4182 4183
    if (table->s->db_type()->alter_table_flags &&
        (!(flags= table->s->db_type()->alter_table_flags(alter_info->flags))))
4184 4185 4186 4187
    {
      my_error(ER_PARTITION_FUNCTION_FAILURE, MYF(0));
      DBUG_RETURN(1);
    }
4188 4189 4190 4191
    *fast_alter_partition=
      ((flags & (HA_FAST_CHANGE_PARTITION | HA_PARTITION_ONE_PHASE)) != 0);
    DBUG_PRINT("info", ("*fast_alter_partition: %d  flags: 0x%x",
                        *fast_alter_partition, flags));
4192 4193
    if (((alter_info->flags & ALTER_ADD_PARTITION) ||
         (alter_info->flags & ALTER_REORGANIZE_PARTITION)) &&
4194 4195
         (thd->work_part_info->part_type != tab_part_info->part_type) &&
         (thd->work_part_info->part_type != NOT_A_PARTITION))
4196
    {
4197
      if (thd->work_part_info->part_type == RANGE_PARTITION)
4198 4199 4200 4201
      {
        my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0),
                 "RANGE", "LESS THAN");
      }
4202
      else if (thd->work_part_info->part_type == LIST_PARTITION)
4203
      {
4204
        DBUG_ASSERT(thd->work_part_info->part_type == LIST_PARTITION);
4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220
        my_error(ER_PARTITION_WRONG_VALUES_ERROR, MYF(0),
                 "LIST", "IN");
      }
      else if (tab_part_info->part_type == RANGE_PARTITION)
      {
        my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0),
                 "RANGE", "LESS THAN");
      }
      else
      {
        DBUG_ASSERT(tab_part_info->part_type == LIST_PARTITION);
        my_error(ER_PARTITION_REQUIRES_VALUES_ERROR, MYF(0),
                 "LIST", "IN");
      }
      DBUG_RETURN(TRUE);
    }
4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247
    if (alter_info->flags & ALTER_ADD_PARTITION)
    {
      /*
        We start by moving the new partitions to the list of temporary
        partitions. We will then check that the new partitions fit in the
        partitioning scheme as currently set-up.
        Partitions are always added at the end in ADD PARTITION.
      */
      uint no_new_partitions= alt_part_info->no_parts;
      uint no_orig_partitions= tab_part_info->no_parts;
      uint check_total_partitions= no_new_partitions + no_orig_partitions;
      uint new_total_partitions= check_total_partitions;
      /*
        We allow quite a lot of values to be supplied by defaults, however we
        must know the number of new partitions in this case.
      */
      if (thd->lex->no_write_to_binlog &&
          tab_part_info->part_type != HASH_PARTITION)
      {
        my_error(ER_NO_BINLOG_ERROR, MYF(0));
        DBUG_RETURN(TRUE);
      } 
      if (no_new_partitions == 0)
      {
        my_error(ER_ADD_PARTITION_NO_NEW_PARTITION, MYF(0));
        DBUG_RETURN(TRUE);
      }
4248
      if (tab_part_info->is_sub_partitioned())
4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265
      {
        if (alt_part_info->no_subparts == 0)
          alt_part_info->no_subparts= tab_part_info->no_subparts;
        else if (alt_part_info->no_subparts != tab_part_info->no_subparts)
        {
          my_error(ER_ADD_PARTITION_SUBPART_ERROR, MYF(0));
          DBUG_RETURN(TRUE);
        }
        check_total_partitions= new_total_partitions*
                                alt_part_info->no_subparts;
      }
      if (check_total_partitions > MAX_PARTITIONS)
      {
        my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0));
        DBUG_RETURN(TRUE);
      }
      alt_part_info->part_type= tab_part_info->part_type;
4266
      alt_part_info->subpart_type= tab_part_info->subpart_type;
4267 4268 4269
      if (alt_part_info->set_up_defaults_for_partitioning(table->file,
                                                          ULL(0), 
                                                          tab_part_info->no_parts))
4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459
      {
        DBUG_RETURN(TRUE);
      }
/*
Handling of on-line cases:

ADD PARTITION for RANGE/LIST PARTITIONING:
------------------------------------------
For range and list partitions add partition is simply adding a
new empty partition to the table. If the handler support this we
will use the simple method of doing this. The figure below shows
an example of this and the states involved in making this change.
            
Existing partitions                                     New added partitions
------       ------        ------        ------      |  ------    ------
|    |       |    |        |    |        |    |      |  |    |    |    |
| p0 |       | p1 |        | p2 |        | p3 |      |  | p4 |    | p5 |
------       ------        ------        ------      |  ------    ------
PART_NORMAL  PART_NORMAL   PART_NORMAL   PART_NORMAL    PART_TO_BE_ADDED*2
PART_NORMAL  PART_NORMAL   PART_NORMAL   PART_NORMAL    PART_IS_ADDED*2

The first line is the states before adding the new partitions and the 
second line is after the new partitions are added. All the partitions are
in the partitions list, no partitions are placed in the temp_partitions
list.

ADD PARTITION for HASH PARTITIONING
-----------------------------------
This little figure tries to show the various partitions involved when
adding two new partitions to a linear hash based partitioned table with
four partitions to start with, which lists are used and the states they
pass through. Adding partitions to a normal hash based is similar except
that it is always all the existing partitions that are reorganised not
only a subset of them.

Existing partitions                                     New added partitions
------       ------        ------        ------      |  ------    ------
|    |       |    |        |    |        |    |      |  |    |    |    |
| p0 |       | p1 |        | p2 |        | p3 |      |  | p4 |    | p5 |
------       ------        ------        ------      |  ------    ------
PART_CHANGED PART_CHANGED  PART_NORMAL   PART_NORMAL    PART_TO_BE_ADDED
PART_IS_CHANGED*2          PART_NORMAL   PART_NORMAL    PART_IS_ADDED
PART_NORMAL  PART_NORMAL   PART_NORMAL   PART_NORMAL    PART_IS_ADDED

Reorganised existing partitions
------      ------
|    |      |    |
| p0'|      | p1'|
------      ------

p0 - p5 will be in the partitions list of partitions.
p0' and p1' will actually not exist as separate objects, there presence can
be deduced from the state of the partition and also the names of those
partitions can be deduced this way.

After adding the partitions and copying the partition data to p0', p1',
p4 and p5 from p0 and p1 the states change to adapt for the new situation
where p0 and p1 is dropped and replaced by p0' and p1' and the new p4 and
p5 are in the table again.

The first line above shows the states of the partitions before we start
adding and copying partitions, the second after completing the adding
and copying and finally the third line after also dropping the partitions
that are reorganised.
*/
      if (*fast_alter_partition &&
          tab_part_info->part_type == HASH_PARTITION)
      {
        uint part_no= 0, start_part= 1, start_sec_part= 1;
        uint end_part= 0, end_sec_part= 0;
        uint upper_2n= tab_part_info->linear_hash_mask + 1;
        uint lower_2n= upper_2n >> 1;
        bool all_parts= TRUE;
        if (tab_part_info->linear_hash_ind &&
            no_new_partitions < upper_2n)
        {
          /*
            An analysis of which parts needs reorganisation shows that it is
            divided into two intervals. The first interval is those parts
            that are reorganised up until upper_2n - 1. From upper_2n and
            onwards it starts again from partition 0 and goes on until
            it reaches p(upper_2n - 1). If the last new partition reaches
            beyond upper_2n - 1 then the first interval will end with
            p(lower_2n - 1) and start with p(no_orig_partitions - lower_2n).
            If lower_2n partitions are added then p0 to p(lower_2n - 1) will
            be reorganised which means that the two interval becomes one
            interval at this point. Thus only when adding less than
            lower_2n partitions and going beyond a total of upper_2n we
            actually get two intervals.

            To exemplify this assume we have 6 partitions to start with and
            add 1, 2, 3, 5, 6, 7, 8, 9 partitions.
            The first to add after p5 is p6 = 110 in bit numbers. Thus we
            can see that 10 = p2 will be partition to reorganise if only one
            partition.
            If 2 partitions are added we reorganise [p2, p3]. Those two
            cases are covered by the second if part below.
            If 3 partitions are added we reorganise [p2, p3] U [p0,p0]. This
            part is covered by the else part below.
            If 5 partitions are added we get [p2,p3] U [p0, p2] = [p0, p3].
            This is covered by the first if part where we need the max check
            to here use lower_2n - 1.
            If 7 partitions are added we get [p2,p3] U [p0, p4] = [p0, p4].
            This is covered by the first if part but here we use the first
            calculated end_part.
            Finally with 9 new partitions we would also reorganise p6 if we
            used the method below but we cannot reorganise more partitions
            than what we had from the start and thus we simply set all_parts
            to TRUE. In this case we don't get into this if-part at all.
          */
          all_parts= FALSE;
          if (no_new_partitions >= lower_2n)
          {
            /*
              In this case there is only one interval since the two intervals
              overlap and this starts from zero to last_part_no - upper_2n
            */
            start_part= 0;
            end_part= new_total_partitions - (upper_2n + 1);
            end_part= max(lower_2n - 1, end_part);
          }
          else if (new_total_partitions <= upper_2n)
          {
            /*
              Also in this case there is only one interval since we are not
              going over a 2**n boundary
            */
            start_part= no_orig_partitions - lower_2n;
            end_part= start_part + (no_new_partitions - 1);
          }
          else
          {
            /* We have two non-overlapping intervals since we are not
               passing a 2**n border and we have not at least lower_2n
               new parts that would ensure that the intervals become
               overlapping.
            */
            start_part= no_orig_partitions - lower_2n;
            end_part= upper_2n - 1;
            start_sec_part= 0;
            end_sec_part= new_total_partitions - (upper_2n + 1);
          }
        }
        List_iterator<partition_element> tab_it(tab_part_info->partitions);
        part_no= 0;
        do
        {
          partition_element *p_elem= tab_it++;
          if (all_parts ||
              (part_no >= start_part && part_no <= end_part) ||
              (part_no >= start_sec_part && part_no <= end_sec_part))
          {
            p_elem->part_state= PART_CHANGED;
          }
        } while (++part_no < no_orig_partitions);
      }
      /*
        Need to concatenate the lists here to make it possible to check the
        partition info for correctness using check_partition_info.
        For on-line add partition we set the state of this partition to
        PART_TO_BE_ADDED to ensure that it is known that it is not yet
        usable (becomes usable when partition is created and the switch of
        partition configuration is made.
      */
      {
        List_iterator<partition_element> alt_it(alt_part_info->partitions);
        uint part_count= 0;
        do
        {
          partition_element *part_elem= alt_it++;
          if (*fast_alter_partition)
            part_elem->part_state= PART_TO_BE_ADDED;
          if (tab_part_info->partitions.push_back(part_elem))
          {
            mem_alloc_error(1);
            DBUG_RETURN(TRUE);
          }
        } while (++part_count < no_new_partitions);
        tab_part_info->no_parts+= no_new_partitions;
      }
      /*
        If we specify partitions explicitly we don't use defaults anymore.
        Using ADD PARTITION also means that we don't have the default number
        of partitions anymore. We use this code also for Table reorganisations
        and here we don't set any default flags to FALSE.
      */
      if (!(alter_info->flags & ALTER_TABLE_REORG))
      {
        if (!alt_part_info->use_default_partitions)
        {
4460
          DBUG_PRINT("info", ("part_info: 0x%lx", (long) tab_part_info));
4461 4462 4463
          tab_part_info->use_default_partitions= FALSE;
        }
        tab_part_info->use_default_no_partitions= FALSE;
4464
        tab_part_info->is_auto_partitioned= FALSE;
4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479
      }
    }
    else if (alter_info->flags == ALTER_DROP_PARTITION)
    {
      /*
        Drop a partition from a range partition and list partitioning is
        always safe and can be made more or less immediate. It is necessary
        however to ensure that the partition to be removed is safely removed
        and that REPAIR TABLE can remove the partition if for some reason the
        command to drop the partition failed in the middle.
      */
      uint part_count= 0;
      uint no_parts_dropped= alter_info->partition_names.elements;
      uint no_parts_found= 0;
      List_iterator<partition_element> part_it(tab_part_info->partitions);
4480 4481

      tab_part_info->is_auto_partitioned= FALSE;
4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515
      if (!(tab_part_info->part_type == RANGE_PARTITION ||
            tab_part_info->part_type == LIST_PARTITION))
      {
        my_error(ER_ONLY_ON_RANGE_LIST_PARTITION, MYF(0), "DROP");
        DBUG_RETURN(TRUE);
      }
      if (no_parts_dropped >= tab_part_info->no_parts)
      {
        my_error(ER_DROP_LAST_PARTITION, MYF(0));
        DBUG_RETURN(TRUE);
      }
      do
      {
        partition_element *part_elem= part_it++;
        if (is_name_in_list(part_elem->partition_name,
                            alter_info->partition_names))
        {
          /*
            Set state to indicate that the partition is to be dropped.
          */
          no_parts_found++;
          part_elem->part_state= PART_TO_BE_DROPPED;
        }
      } while (++part_count < tab_part_info->no_parts);
      if (no_parts_found != no_parts_dropped)
      {
        my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0), "DROP");
        DBUG_RETURN(TRUE);
      }
      if (table->file->is_fk_defined_on_table_or_index(MAX_KEY))
      {
        my_error(ER_ROW_IS_REFERENCED, MYF(0));
        DBUG_RETURN(TRUE);
      }
4516
      tab_part_info->no_parts-= no_parts_dropped;
4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560
    }
    else if ((alter_info->flags & ALTER_OPTIMIZE_PARTITION) ||
             (alter_info->flags & ALTER_ANALYZE_PARTITION) ||
             (alter_info->flags & ALTER_CHECK_PARTITION) ||
             (alter_info->flags & ALTER_REPAIR_PARTITION) ||
             (alter_info->flags & ALTER_REBUILD_PARTITION))
    {
      uint no_parts_opt= alter_info->partition_names.elements;
      uint part_count= 0;
      uint no_parts_found= 0;
      List_iterator<partition_element> part_it(tab_part_info->partitions);

      do
      {
        partition_element *part_elem= part_it++;
        if ((alter_info->flags & ALTER_ALL_PARTITION) ||
            (is_name_in_list(part_elem->partition_name,
                             alter_info->partition_names)))
        {
          /*
            Mark the partition as a partition to be "changed" by
            analyzing/optimizing/rebuilding/checking/repairing
          */
          no_parts_found++;
          part_elem->part_state= PART_CHANGED;
        }
      } while (++part_count < tab_part_info->no_parts);
      if (no_parts_found != no_parts_opt &&
          (!(alter_info->flags & ALTER_ALL_PARTITION)))
      {
        const char *ptr;
        if (alter_info->flags & ALTER_OPTIMIZE_PARTITION)
          ptr= "OPTIMIZE";
        else if (alter_info->flags & ALTER_ANALYZE_PARTITION)
          ptr= "ANALYZE";
        else if (alter_info->flags & ALTER_CHECK_PARTITION)
          ptr= "CHECK";
        else if (alter_info->flags & ALTER_REPAIR_PARTITION)
          ptr= "REPAIR";
        else
          ptr= "REBUILD";
        my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0), ptr);
        DBUG_RETURN(TRUE);
      }
4561 4562 4563 4564 4565
      if (!(*fast_alter_partition))
      {
        table->file->print_error(HA_ERR_WRONG_COMMAND, MYF(0));
        DBUG_RETURN(TRUE);
      }
4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665
    }
    else if (alter_info->flags & ALTER_COALESCE_PARTITION)
    {
      uint no_parts_coalesced= alter_info->no_parts;
      uint no_parts_remain= tab_part_info->no_parts - no_parts_coalesced;
      List_iterator<partition_element> part_it(tab_part_info->partitions);
      if (tab_part_info->part_type != HASH_PARTITION)
      {
        my_error(ER_COALESCE_ONLY_ON_HASH_PARTITION, MYF(0));
        DBUG_RETURN(TRUE);
      }
      if (no_parts_coalesced == 0)
      {
        my_error(ER_COALESCE_PARTITION_NO_PARTITION, MYF(0));
        DBUG_RETURN(TRUE);
      }
      if (no_parts_coalesced >= tab_part_info->no_parts)
      {
        my_error(ER_DROP_LAST_PARTITION, MYF(0));
        DBUG_RETURN(TRUE);
      }
/*
Online handling:
COALESCE PARTITION:
-------------------
The figure below shows the manner in which partitions are handled when
performing an on-line coalesce partition and which states they go through
at start, after adding and copying partitions and finally after dropping
the partitions to drop. The figure shows an example using four partitions
to start with, using linear hash and coalescing one partition (always the
last partition).

Using linear hash then all remaining partitions will have a new reorganised
part.

Existing partitions                     Coalesced partition 
------       ------              ------   |      ------
|    |       |    |              |    |   |      |    |
| p0 |       | p1 |              | p2 |   |      | p3 |
------       ------              ------   |      ------
PART_NORMAL  PART_CHANGED        PART_NORMAL     PART_REORGED_DROPPED
PART_NORMAL  PART_IS_CHANGED     PART_NORMAL     PART_TO_BE_DROPPED
PART_NORMAL  PART_NORMAL         PART_NORMAL     PART_IS_DROPPED

Reorganised existing partitions
            ------
            |    |
            | p1'|
            ------

p0 - p3 is in the partitions list.
The p1' partition will actually not be in any list it is deduced from the
state of p1.
*/
      {
        uint part_count= 0, start_part= 1, start_sec_part= 1;
        uint end_part= 0, end_sec_part= 0;
        bool all_parts= TRUE;
        if (*fast_alter_partition &&
            tab_part_info->linear_hash_ind)
        {
          uint upper_2n= tab_part_info->linear_hash_mask + 1;
          uint lower_2n= upper_2n >> 1;
          all_parts= FALSE;
          if (no_parts_coalesced >= lower_2n)
          {
            all_parts= TRUE;
          }
          else if (no_parts_remain >= lower_2n)
          {
            end_part= tab_part_info->no_parts - (lower_2n + 1);
            start_part= no_parts_remain - lower_2n;
          }
          else
          {
            start_part= 0;
            end_part= tab_part_info->no_parts - (lower_2n + 1);
            end_sec_part= (lower_2n >> 1) - 1;
            start_sec_part= end_sec_part - (lower_2n - (no_parts_remain + 1));
          }
        }
        do
        {
          partition_element *p_elem= part_it++;
          if (*fast_alter_partition &&
              (all_parts ||
              (part_count >= start_part && part_count <= end_part) ||
              (part_count >= start_sec_part && part_count <= end_sec_part)))
            p_elem->part_state= PART_CHANGED;
          if (++part_count > no_parts_remain)
          {
            if (*fast_alter_partition)
              p_elem->part_state= PART_REORGED_DROPPED;
            else
              part_it.remove();
          }
        } while (part_count < tab_part_info->no_parts);
        tab_part_info->no_parts= no_parts_remain;
      }
      if (!(alter_info->flags & ALTER_TABLE_REORG))
4666
      {
4667
        tab_part_info->use_default_no_partitions= FALSE;
4668 4669
        tab_part_info->is_auto_partitioned= FALSE;
      }
4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684
    }
    else if (alter_info->flags == ALTER_REORGANIZE_PARTITION)
    {
      /*
        Reorganise partitions takes a number of partitions that are next
        to each other (at least for RANGE PARTITIONS) and then uses those
        to create a set of new partitions. So data is copied from those
        partitions into the new set of partitions. Those new partitions
        can have more values in the LIST value specifications or less both
        are allowed. The ranges can be different but since they are 
        changing a set of consecutive partitions they must cover the same
        range as those changed from.
        This command can be used on RANGE and LIST partitions.
      */
      uint no_parts_reorged= alter_info->partition_names.elements;
4685 4686
      uint no_parts_new= thd->work_part_info->partitions.elements;
      partition_info *alt_part_info= thd->work_part_info;
4687
      uint check_total_partitions;
4688 4689

      tab_part_info->is_auto_partitioned= FALSE;
4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701
      if (no_parts_reorged > tab_part_info->no_parts)
      {
        my_error(ER_REORG_PARTITION_NOT_EXIST, MYF(0));
        DBUG_RETURN(TRUE);
      }
      if (!(tab_part_info->part_type == RANGE_PARTITION ||
            tab_part_info->part_type == LIST_PARTITION) &&
           (no_parts_new != no_parts_reorged))
      {
        my_error(ER_REORG_HASH_ONLY_ON_SAME_NO, MYF(0));
        DBUG_RETURN(TRUE);
      }
4702 4703 4704 4705 4706 4707 4708
      if (tab_part_info->is_sub_partitioned() &&
          alt_part_info->no_subparts &&
          alt_part_info->no_subparts != tab_part_info->no_subparts)
      {
        my_error(ER_PARTITION_WRONG_NO_SUBPART_ERROR, MYF(0));
        DBUG_RETURN(TRUE);
      }
4709 4710 4711 4712 4713 4714 4715
      check_total_partitions= tab_part_info->no_parts + no_parts_new;
      check_total_partitions-= no_parts_reorged;
      if (check_total_partitions > MAX_PARTITIONS)
      {
        my_error(ER_TOO_MANY_PARTITIONS_ERROR, MYF(0));
        DBUG_RETURN(TRUE);
      }
4716 4717
      alt_part_info->part_type= tab_part_info->part_type;
      alt_part_info->subpart_type= tab_part_info->subpart_type;
4718
      alt_part_info->no_subparts= tab_part_info->no_subparts;
4719 4720 4721 4722 4723 4724 4725
      DBUG_ASSERT(!alt_part_info->use_default_partitions);
      if (alt_part_info->set_up_defaults_for_partitioning(table->file,
                                                          ULL(0), 
                                                          0))
      {
        DBUG_RETURN(TRUE);
      }
4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850
/*
Online handling:
REORGANIZE PARTITION:
---------------------
The figure exemplifies the handling of partitions, their state changes and
how they are organised. It exemplifies four partitions where two of the
partitions are reorganised (p1 and p2) into two new partitions (p4 and p5).
The reason of this change could be to change range limits, change list
values or for hash partitions simply reorganise the partition which could
also involve moving them to new disks or new node groups (MySQL Cluster).

Existing partitions                                  
------       ------        ------        ------
|    |       |    |        |    |        |    |
| p0 |       | p1 |        | p2 |        | p3 |
------       ------        ------        ------
PART_NORMAL  PART_TO_BE_REORGED          PART_NORMAL
PART_NORMAL  PART_TO_BE_DROPPED          PART_NORMAL
PART_NORMAL  PART_IS_DROPPED             PART_NORMAL

Reorganised new partitions (replacing p1 and p2)
------      ------
|    |      |    |
| p4 |      | p5 |
------      ------
PART_TO_BE_ADDED
PART_IS_ADDED
PART_IS_ADDED

All unchanged partitions and the new partitions are in the partitions list
in the order they will have when the change is completed. The reorganised
partitions are placed in the temp_partitions list. PART_IS_ADDED is only a
temporary state not written in the frm file. It is used to ensure we write
the generated partition syntax in a correct manner.
*/
      {
        List_iterator<partition_element> tab_it(tab_part_info->partitions);
        uint part_count= 0;
        bool found_first= FALSE;
        bool found_last= FALSE;
        bool is_last_partition_reorged;
        uint drop_count= 0;
        longlong tab_max_range= 0, alt_max_range= 0;
        do
        {
          partition_element *part_elem= tab_it++;
          is_last_partition_reorged= FALSE;
          if (is_name_in_list(part_elem->partition_name,
                              alter_info->partition_names))
          {
            is_last_partition_reorged= TRUE;
            drop_count++;
            tab_max_range= part_elem->range_value;
            if (*fast_alter_partition &&
                tab_part_info->temp_partitions.push_back(part_elem))
            {
              mem_alloc_error(1);
              DBUG_RETURN(TRUE);
            }
            if (*fast_alter_partition)
              part_elem->part_state= PART_TO_BE_REORGED;
            if (!found_first)
            {
              uint alt_part_count= 0;
              found_first= TRUE;
              List_iterator<partition_element>
                                 alt_it(alt_part_info->partitions);
              do
              {
                partition_element *alt_part_elem= alt_it++;
                alt_max_range= alt_part_elem->range_value;
                if (*fast_alter_partition)
                  alt_part_elem->part_state= PART_TO_BE_ADDED;
                if (alt_part_count == 0)
                  tab_it.replace(alt_part_elem);
                else
                  tab_it.after(alt_part_elem);
              } while (++alt_part_count < no_parts_new);
            }
            else if (found_last)
            {
              my_error(ER_CONSECUTIVE_REORG_PARTITIONS, MYF(0));
              DBUG_RETURN(TRUE);
            }
            else
              tab_it.remove();
          }
          else
          {
            if (found_first)
              found_last= TRUE;
          }
        } while (++part_count < tab_part_info->no_parts);
        if (drop_count != no_parts_reorged)
        {
          my_error(ER_DROP_PARTITION_NON_EXISTENT, MYF(0), "REORGANIZE");
          DBUG_RETURN(TRUE);
        }
        if (tab_part_info->part_type == RANGE_PARTITION &&
            ((is_last_partition_reorged &&
               alt_max_range < tab_max_range) ||
              (!is_last_partition_reorged &&
               alt_max_range != tab_max_range)))
        {
          /*
            For range partitioning the total resulting range before and
            after the change must be the same except in one case. This is
            when the last partition is reorganised, in this case it is
            acceptable to increase the total range.
            The reason is that it is not allowed to have "holes" in the
            middle of the ranges and thus we should not allow to reorganise
            to create "holes". Also we should not allow using REORGANIZE
            to drop data.
          */
          my_error(ER_REORG_OUTSIDE_RANGE, MYF(0));
          DBUG_RETURN(TRUE);
        }
        tab_part_info->no_parts= check_total_partitions;
      }
    }
    else
    {
      DBUG_ASSERT(FALSE);
    }
    *partition_changed= TRUE;
4851
    thd->work_part_info= tab_part_info;
4852 4853 4854
    if (alter_info->flags == ALTER_ADD_PARTITION ||
        alter_info->flags == ALTER_REORGANIZE_PARTITION)
    {
4855
      if (tab_part_info->use_default_subpartitions &&
4856 4857 4858 4859 4860
          !alt_part_info->use_default_subpartitions)
      {
        tab_part_info->use_default_subpartitions= FALSE;
        tab_part_info->use_default_no_subpartitions= FALSE;
      }
4861
      if (tab_part_info->check_partition_info(thd, (handlerton**)NULL,
4862
                                              table->file, ULL(0), FALSE))
4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883
      {
        DBUG_RETURN(TRUE);
      }
    }
  }
  else
  {
    /*
     When thd->lex->part_info has a reference to a partition_info the
     ALTER TABLE contained a definition of a partitioning.

     Case I:
       If there was a partition before and there is a new one defined.
       We use the new partitioning. The new partitioning is already
       defined in the correct variable so no work is needed to
       accomplish this.
       We do however need to update partition_changed to ensure that not
       only the frm file is changed in the ALTER TABLE command.

     Case IIa:
       There was a partitioning before and there is no new one defined.
4884
       Also the user has not specified to remove partitioning explicitly.
4885 4886 4887 4888 4889 4890 4891 4892

       We use the old partitioning also for the new table. We do this
       by assigning the partition_info from the table loaded in
       open_ltable to the partition_info struct used by mysql_create_table
       later in this method.

     Case IIb:
       There was a partitioning before and there is no new one defined.
4893
       The user has specified explicitly to remove partitioning
4894

4895 4896 4897
       Since the user has specified explicitly to remove partitioning
       we override the old partitioning info and create a new table using
       the specified engine.
4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919
       In this case the partition also is changed.

     Case III:
       There was no partitioning before altering the table, there is
       partitioning defined in the altered table. Use the new partitioning.
       No work needed since the partitioning info is already in the
       correct variable.

       In this case we discover one case where the new partitioning is using
       the same partition function as the default (PARTITION BY KEY or
       PARTITION BY LINEAR KEY with the list of fields equal to the primary
       key fields OR PARTITION BY [LINEAR] KEY() for tables without primary
       key)
       Also here partition has changed and thus a new table must be
       created.

     Case IV:
       There was no partitioning before and no partitioning defined.
       Obviously no work needed.
    */
    if (table->part_info)
    {
4920
      if (alter_info->flags & ALTER_REMOVE_PARTITIONING)
4921 4922
      {
        DBUG_PRINT("info", ("Remove partitioning"));
4923
        if (!(create_info->used_fields & HA_CREATE_USED_ENGINE))
4924 4925 4926 4927
        {
          DBUG_PRINT("info", ("No explicit engine used"));
          create_info->db_type= table->part_info->default_engine_type;
        }
serg@sergbook.mysql.com's avatar
serg@sergbook.mysql.com committed
4928
        DBUG_PRINT("info", ("New engine type: %s",
antony@ppcg5.local's avatar
antony@ppcg5.local committed
4929
                   ha_resolve_storage_engine_name(create_info->db_type)));
4930
        thd->work_part_info= NULL;
4931 4932
        *partition_changed= TRUE;
      }
4933
      else if (!thd->work_part_info)
4934 4935 4936 4937 4938
      {
        /*
          Retain partitioning but possibly with a new storage engine
          beneath.
        */
4939
        thd->work_part_info= table->part_info;
4940
        if (create_info->used_fields & HA_CREATE_USED_ENGINE &&
4941 4942 4943 4944 4945
            create_info->db_type != table->part_info->default_engine_type)
        {
          /*
            Make sure change of engine happens to all partitions.
          */
4946
          DBUG_PRINT("info", ("partition changed"));
4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962
          if (table->part_info->is_auto_partitioned)
          {
            /*
              If the user originally didn't specify partitioning to be
              used we can remove it now.
            */
            thd->work_part_info= NULL;
          }
          else
          {
            /*
              Ensure that all partitions have the proper engine set-up
            */
            set_engine_all_partitions(thd->work_part_info,
                                      create_info->db_type);
          }
4963 4964 4965
          *partition_changed= TRUE;
        }
      }
4966
    }
4967
    if (thd->work_part_info)
4968
    {
4969
      partition_info *part_info= thd->work_part_info;
4970
      bool is_native_partitioned= FALSE;
4971 4972 4973 4974
      /*
        Need to cater for engine types that can handle partition without
        using the partition handler.
      */
4975
      if (thd->work_part_info != table->part_info)
4976 4977
      {
        DBUG_PRINT("info", ("partition changed"));
4978
        *partition_changed= TRUE;
4979
      }
4980
      if (create_info->db_type == partition_hton)
4981 4982 4983 4984 4985
        part_info->default_engine_type= table->part_info->default_engine_type;
      else
        part_info->default_engine_type= create_info->db_type;
      if (check_native_partitioned(create_info, &is_native_partitioned,
                                   part_info, thd))
4986
      {
4987
        DBUG_RETURN(TRUE);
4988
      }
4989
      if (!is_native_partitioned)
4990
      {
4991
        DBUG_ASSERT(create_info->db_type);
4992
        create_info->db_type= partition_hton;
4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028
      }
    }
  }
  DBUG_RETURN(FALSE);
}


/*
  Change partitions, used to implement ALTER TABLE ADD/REORGANIZE/COALESCE
  partitions. This method is used to implement both single-phase and multi-
  phase implementations of ADD/REORGANIZE/COALESCE partitions.

  SYNOPSIS
    mysql_change_partitions()
    lpt                        Struct containing parameters

  RETURN VALUES
    TRUE                          Failure
    FALSE                         Success

  DESCRIPTION
    Request handler to add partitions as set in states of the partition

    Elements of the lpt parameters used:
    create_info                Create information used to create partitions
    db                         Database name
    table_name                 Table name
    copied                     Output parameter where number of copied
                               records are added
    deleted                    Output parameter where number of deleted
                               records are added
*/

static bool mysql_change_partitions(ALTER_PARTITION_PARAM_TYPE *lpt)
{
  char path[FN_REFLEN+1];
5029 5030
  int error;
  handler *file= lpt->table->file;
5031 5032
  DBUG_ENTER("mysql_change_partitions");

5033
  build_table_filename(path, sizeof(path), lpt->db, lpt->table_name, "", 0);
5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044
  if ((error= file->change_partitions(lpt->create_info, path, &lpt->copied,
                                      &lpt->deleted, lpt->pack_frm_data,
                                      lpt->pack_frm_len)))
  {
    if (error != ER_OUTOFMEMORY)
      file->print_error(error, MYF(0));
    else
      lpt->thd->fatal_error();
    DBUG_RETURN(TRUE);
  }
  DBUG_RETURN(FALSE);
5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069
}


/*
  Rename partitions in an ALTER TABLE of partitions

  SYNOPSIS
    mysql_rename_partitions()
    lpt                        Struct containing parameters

  RETURN VALUES
    TRUE                          Failure
    FALSE                         Success

  DESCRIPTION
    Request handler to rename partitions as set in states of the partition

    Parameters used:
    db                         Database name
    table_name                 Table name
*/

static bool mysql_rename_partitions(ALTER_PARTITION_PARAM_TYPE *lpt)
{
  char path[FN_REFLEN+1];
5070
  int error;
5071 5072
  DBUG_ENTER("mysql_rename_partitions");

5073
  build_table_filename(path, sizeof(path), lpt->db, lpt->table_name, "", 0);
5074 5075 5076 5077 5078 5079 5080
  if ((error= lpt->table->file->rename_partitions(path)))
  {
    if (error != 1)
      lpt->table->file->print_error(error, MYF(0));
    DBUG_RETURN(TRUE);
  }
  DBUG_RETURN(FALSE);
5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110
}


/*
  Drop partitions in an ALTER TABLE of partitions

  SYNOPSIS
    mysql_drop_partitions()
    lpt                        Struct containing parameters

  RETURN VALUES
    TRUE                          Failure
    FALSE                         Success
  DESCRIPTION
    Drop the partitions marked with PART_TO_BE_DROPPED state and remove
    those partitions from the list.

    Parameters used:
    table                       Table object
    db                          Database name
    table_name                  Table name
*/

static bool mysql_drop_partitions(ALTER_PARTITION_PARAM_TYPE *lpt)
{
  char path[FN_REFLEN+1];
  partition_info *part_info= lpt->table->part_info;
  List_iterator<partition_element> part_it(part_info->partitions);
  uint i= 0;
  uint remove_count= 0;
5111
  int error;
5112 5113
  DBUG_ENTER("mysql_drop_partitions");

5114
  build_table_filename(path, sizeof(path), lpt->db, lpt->table_name, "", 0);
5115
  if ((error= lpt->table->file->drop_partitions(path)))
5116
  {
5117
    lpt->table->file->print_error(error, MYF(0));
5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133
    DBUG_RETURN(TRUE);
  }
  do
  {
    partition_element *part_elem= part_it++;
    if (part_elem->part_state == PART_IS_DROPPED)
    {
      part_it.remove();
      remove_count++;
    }
  } while (++i < part_info->no_parts);
  part_info->no_parts-= remove_count;
  DBUG_RETURN(FALSE);
}


5134 5135 5136 5137 5138 5139 5140 5141 5142
/*
  Insert log entry into list
  SYNOPSIS
    insert_part_info_log_entry_list()
    log_entry
  RETURN VALUES
    NONE
*/

5143 5144
static void insert_part_info_log_entry_list(partition_info *part_info,
                                            DDL_LOG_MEMORY_ENTRY *log_entry)
5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159
{
  log_entry->next_active_log_entry= part_info->first_log_entry;
  part_info->first_log_entry= log_entry;
}


/*
  Release all log entries for this partition info struct
  SYNOPSIS
    release_part_info_log_entries()
    first_log_entry                 First log entry in list to release
  RETURN VALUES
    NONE
*/

5160
static void release_part_info_log_entries(DDL_LOG_MEMORY_ENTRY *log_entry)
5161 5162 5163 5164 5165
{
  DBUG_ENTER("release_part_info_log_entries");

  while (log_entry)
  {
5166
    release_ddl_log_memory_entry(log_entry);
5167
    log_entry= log_entry->next_active_log_entry;
5168 5169 5170 5171 5172
  }
  DBUG_VOID_RETURN;
}


5173
/*
5174
  Log an delete/rename frm file
5175
  SYNOPSIS
5176
    write_log_replace_delete_frm()
5177 5178
    lpt                            Struct for parameters
    next_entry                     Next reference to use in log record
5179 5180 5181
    from_path                      Name to rename from
    to_path                        Name to rename to
    replace_flag                   TRUE if replace, else delete
5182
  RETURN VALUES
5183 5184
    TRUE                           Error
    FALSE                          Success
5185
  DESCRIPTION
5186
    Support routine that writes a replace or delete of an frm file into the
5187
    ddl log. It also inserts an entry that keeps track of used space into
5188
    the partition info object
5189 5190
*/

5191 5192 5193 5194 5195
static bool write_log_replace_delete_frm(ALTER_PARTITION_PARAM_TYPE *lpt,
                                         uint next_entry,
                                         const char *from_path,
                                         const char *to_path,
                                         bool replace_flag)
5196
{
5197 5198
  DDL_LOG_ENTRY ddl_log_entry;
  DDL_LOG_MEMORY_ENTRY *log_entry;
5199
  DBUG_ENTER("write_log_replace_delete_frm");
5200

5201
  if (replace_flag)
5202
    ddl_log_entry.action_type= DDL_LOG_REPLACE_ACTION;
5203
  else
5204 5205
    ddl_log_entry.action_type= DDL_LOG_DELETE_ACTION;
  ddl_log_entry.next_entry= next_entry;
5206
  ddl_log_entry.handler_name= reg_ext;
5207
  ddl_log_entry.name= to_path;
5208
  if (replace_flag)
5209 5210
    ddl_log_entry.from_name= from_path;
  if (write_ddl_log_entry(&ddl_log_entry, &log_entry))
5211 5212 5213
  {
    DBUG_RETURN(TRUE);
  }
5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226
  insert_part_info_log_entry_list(lpt->part_info, log_entry);
  DBUG_RETURN(FALSE);
}


/*
  Log final partition changes in change partition
  SYNOPSIS
    write_log_changed_partitions()
    lpt                      Struct containing parameters
  RETURN VALUES
    TRUE                     Error
    FALSE                    Success
5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238
  DESCRIPTION
    This code is used to perform safe ADD PARTITION for HASH partitions
    and COALESCE for HASH partitions and REORGANIZE for any type of
    partitions.
    We prepare entries for all partitions except the reorganised partitions
    in REORGANIZE partition, those are handled by
    write_log_dropped_partitions. For those partitions that are replaced
    special care is needed to ensure that this is performed correctly and
    this requires a two-phased approach with this log as a helper for this.

    This code is closely intertwined with the code in rename_partitions in
    the partition handler.
5239 5240
*/

5241 5242
static bool write_log_changed_partitions(ALTER_PARTITION_PARAM_TYPE *lpt,
                                         uint *next_entry, const char *path)
5243
{
5244
  DDL_LOG_ENTRY ddl_log_entry;
5245
  partition_info *part_info= lpt->part_info;
5246
  DDL_LOG_MEMORY_ENTRY *log_entry;
5247 5248 5249 5250 5251 5252
  char tmp_path[FN_LEN];
  char normal_path[FN_LEN];
  List_iterator<partition_element> part_it(part_info->partitions);
  uint temp_partitions= part_info->temp_partitions.elements;
  uint no_elements= part_info->partitions.elements;
  uint i= 0;
5253
  DBUG_ENTER("write_log_changed_partitions");
5254 5255 5256 5257 5258 5259 5260

  do
  {
    partition_element *part_elem= part_it++;
    if (part_elem->part_state == PART_IS_CHANGED ||
        (part_elem->part_state == PART_IS_ADDED && temp_partitions))
    {
5261
      if (part_info->is_sub_partitioned())
5262 5263 5264 5265 5266 5267 5268
      {
        List_iterator<partition_element> sub_it(part_elem->subpartitions);
        uint no_subparts= part_info->no_subparts;
        uint j= 0;
        do
        {
          partition_element *sub_elem= sub_it++;
5269 5270
          ddl_log_entry.next_entry= *next_entry;
          ddl_log_entry.handler_name=
5271 5272 5273 5274 5275 5276 5277 5278 5279
               ha_resolve_storage_engine_name(sub_elem->engine_type);
          create_subpartition_name(tmp_path, path,
                                   part_elem->partition_name,
                                   sub_elem->partition_name,
                                   TEMP_PART_NAME);
          create_subpartition_name(normal_path, path,
                                   part_elem->partition_name,
                                   sub_elem->partition_name,
                                   NORMAL_PART_NAME);
5280 5281
          ddl_log_entry.name= normal_path;
          ddl_log_entry.from_name= tmp_path;
5282
          if (part_elem->part_state == PART_IS_CHANGED)
5283
            ddl_log_entry.action_type= DDL_LOG_REPLACE_ACTION;
5284
          else
5285 5286
            ddl_log_entry.action_type= DDL_LOG_RENAME_ACTION;
          if (write_ddl_log_entry(&ddl_log_entry, &log_entry))
5287 5288 5289 5290 5291 5292 5293 5294 5295 5296
          {
            DBUG_RETURN(TRUE);
          }
          *next_entry= log_entry->entry_pos;
          sub_elem->log_entry= log_entry;
          insert_part_info_log_entry_list(part_info, log_entry);
        } while (++j < no_subparts);
      }
      else
      {
5297 5298
        ddl_log_entry.next_entry= *next_entry;
        ddl_log_entry.handler_name=
5299 5300 5301 5302 5303 5304 5305
               ha_resolve_storage_engine_name(part_elem->engine_type);
        create_partition_name(tmp_path, path,
                              part_elem->partition_name,
                              TEMP_PART_NAME, TRUE);
        create_partition_name(normal_path, path,
                              part_elem->partition_name,
                              NORMAL_PART_NAME, TRUE);
5306 5307
        ddl_log_entry.name= normal_path;
        ddl_log_entry.from_name= tmp_path;
5308
        if (part_elem->part_state == PART_IS_CHANGED)
5309
          ddl_log_entry.action_type= DDL_LOG_REPLACE_ACTION;
5310
        else
5311 5312
          ddl_log_entry.action_type= DDL_LOG_RENAME_ACTION;
        if (write_ddl_log_entry(&ddl_log_entry, &log_entry))
5313 5314 5315 5316
        {
          DBUG_RETURN(TRUE);
        }
        *next_entry= log_entry->entry_pos;
5317
        part_elem->log_entry= log_entry;
5318 5319 5320
        insert_part_info_log_entry_list(part_info, log_entry);
      }
    }
5321
  } while (++i < no_elements);
5322
  DBUG_RETURN(FALSE);
5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335
}


/*
  Log dropped partitions
  SYNOPSIS
    write_log_dropped_partitions()
    lpt                      Struct containing parameters
  RETURN VALUES
    TRUE                     Error
    FALSE                    Success
*/

5336 5337 5338 5339
static bool write_log_dropped_partitions(ALTER_PARTITION_PARAM_TYPE *lpt,
                                         uint *next_entry,
                                         const char *path,
                                         bool temp_list)
5340
{
5341
  DDL_LOG_ENTRY ddl_log_entry;
5342
  partition_info *part_info= lpt->part_info;
5343
  DDL_LOG_MEMORY_ENTRY *log_entry;
5344 5345
  char tmp_path[FN_LEN];
  List_iterator<partition_element> part_it(part_info->partitions);
5346 5347
  List_iterator<partition_element> temp_it(part_info->temp_partitions);
  uint no_temp_partitions= part_info->temp_partitions.elements;
5348 5349 5350
  uint no_elements= part_info->partitions.elements;
  DBUG_ENTER("write_log_dropped_partitions");

5351
  ddl_log_entry.action_type= DDL_LOG_DELETE_ACTION;
5352 5353 5354
  if (temp_list)
    no_elements= no_temp_partitions;
  while (no_elements--)
5355
  {
5356 5357 5358 5359 5360
    partition_element *part_elem;
    if (temp_list)
      part_elem= temp_it++;
    else
      part_elem= part_it++;
5361
    if (part_elem->part_state == PART_TO_BE_DROPPED ||
5362 5363
        part_elem->part_state == PART_TO_BE_ADDED ||
        part_elem->part_state == PART_CHANGED)
5364
    {
5365 5366 5367 5368 5369 5370 5371
      uint name_variant;
      if (part_elem->part_state == PART_CHANGED ||
          (part_elem->part_state == PART_TO_BE_ADDED &&
           no_temp_partitions))
        name_variant= TEMP_PART_NAME;
      else
        name_variant= NORMAL_PART_NAME;
5372
      if (part_info->is_sub_partitioned())
5373 5374 5375
      {
        List_iterator<partition_element> sub_it(part_elem->subpartitions);
        uint no_subparts= part_info->no_subparts;
5376
        uint j= 0;
5377 5378 5379
        do
        {
          partition_element *sub_elem= sub_it++;
5380 5381
          ddl_log_entry.next_entry= *next_entry;
          ddl_log_entry.handler_name=
5382
               ha_resolve_storage_engine_name(sub_elem->engine_type);
5383 5384 5385
          create_subpartition_name(tmp_path, path,
                                   part_elem->partition_name,
                                   sub_elem->partition_name,
5386
                                   name_variant);
5387 5388
          ddl_log_entry.name= tmp_path;
          if (write_ddl_log_entry(&ddl_log_entry, &log_entry))
5389 5390 5391 5392
          {
            DBUG_RETURN(TRUE);
          }
          *next_entry= log_entry->entry_pos;
5393
          sub_elem->log_entry= log_entry;
5394
          insert_part_info_log_entry_list(part_info, log_entry);
5395
        } while (++j < no_subparts);
5396 5397 5398
      }
      else
      {
5399 5400
        ddl_log_entry.next_entry= *next_entry;
        ddl_log_entry.handler_name=
5401 5402 5403
               ha_resolve_storage_engine_name(part_elem->engine_type);
        create_partition_name(tmp_path, path,
                              part_elem->partition_name,
5404
                              name_variant, TRUE);
5405 5406
        ddl_log_entry.name= tmp_path;
        if (write_ddl_log_entry(&ddl_log_entry, &log_entry))
5407 5408 5409 5410
        {
          DBUG_RETURN(TRUE);
        }
        *next_entry= log_entry->entry_pos;
5411
        part_elem->log_entry= log_entry;
5412 5413 5414
        insert_part_info_log_entry_list(part_info, log_entry);
      }
    }
5415
  }
5416 5417 5418 5419
  DBUG_RETURN(FALSE);
}


5420
/*
5421
  Set execute log entry in ddl log for this partitioned table
5422 5423 5424 5425 5426 5427 5428 5429
  SYNOPSIS
    set_part_info_exec_log_entry()
    part_info                      Partition info object
    exec_log_entry                 Log entry
  RETURN VALUES
    NONE
*/

5430 5431
static void set_part_info_exec_log_entry(partition_info *part_info,
                                         DDL_LOG_MEMORY_ENTRY *exec_log_entry)
5432 5433 5434 5435 5436 5437
{
  part_info->exec_log_entry= exec_log_entry;
  exec_log_entry->next_active_log_entry= NULL;
}


5438
/*
5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449
  Write the log entry to ensure that the shadow frm file is removed at
  crash.
  SYNOPSIS
    write_log_drop_shadow_frm()
    lpt                      Struct containing parameters
    install_frm              Should we log action to install shadow frm or should
                             the action be to remove the shadow frm file.
  RETURN VALUES
    TRUE                     Error
    FALSE                    Success
  DESCRIPTION
5450
    Prepare an entry to the ddl log indicating a drop/install of the shadow frm
5451 5452 5453
    file and its corresponding handler file.
*/

5454
static bool write_log_drop_shadow_frm(ALTER_PARTITION_PARAM_TYPE *lpt)
5455 5456
{
  partition_info *part_info= lpt->part_info;
5457 5458
  DDL_LOG_MEMORY_ENTRY *log_entry;
  DDL_LOG_MEMORY_ENTRY *exec_log_entry= NULL;
5459 5460
  char shadow_path[FN_LEN];
  DBUG_ENTER("write_log_drop_shadow_frm");
5461

5462
  build_table_filename(shadow_path, sizeof(shadow_path), lpt->db,
5463
                       lpt->table_name, "#", 0);
5464
  pthread_mutex_lock(&LOCK_gdl);
5465 5466 5467 5468 5469 5470 5471
  if (write_log_replace_delete_frm(lpt, 0UL, NULL,
                                  (const char*)shadow_path, FALSE))
    goto error;
  log_entry= part_info->first_log_entry;
  if (write_execute_ddl_log_entry(log_entry->entry_pos,
                                    FALSE, &exec_log_entry))
    goto error;
5472
  pthread_mutex_unlock(&LOCK_gdl);
5473 5474 5475 5476
  set_part_info_exec_log_entry(part_info, exec_log_entry);
  DBUG_RETURN(FALSE);

error:
5477
  release_part_info_log_entries(part_info->first_log_entry);
5478
  pthread_mutex_unlock(&LOCK_gdl);
5479
  part_info->first_log_entry= NULL;
5480
  my_error(ER_DDL_LOG_ERROR, MYF(0));
5481 5482 5483 5484 5485 5486
  DBUG_RETURN(TRUE);
}


/*
  Log renaming of shadow frm to real frm name and dropping of old frm
5487
  SYNOPSIS
5488
    write_log_rename_frm()
5489 5490 5491 5492 5493
    lpt                      Struct containing parameters
  RETURN VALUES
    TRUE                     Error
    FALSE                    Success
  DESCRIPTION
5494 5495
    Prepare an entry to ensure that we complete the renaming of the frm
    file if failure occurs in the middle of the rename process.
5496 5497
*/

5498
static bool write_log_rename_frm(ALTER_PARTITION_PARAM_TYPE *lpt)
5499
{
5500
  partition_info *part_info= lpt->part_info;
5501 5502
  DDL_LOG_MEMORY_ENTRY *log_entry;
  DDL_LOG_MEMORY_ENTRY *exec_log_entry= part_info->exec_log_entry;
5503
  char path[FN_LEN];
5504
  char shadow_path[FN_LEN];
5505
  DDL_LOG_MEMORY_ENTRY *old_first_log_entry= part_info->first_log_entry;
5506
  DBUG_ENTER("write_log_rename_frm");
5507

5508 5509
  part_info->first_log_entry= NULL;
  build_table_filename(path, sizeof(path), lpt->db,
5510
                       lpt->table_name, "", 0);
5511
  build_table_filename(shadow_path, sizeof(shadow_path), lpt->db,
5512
                       lpt->table_name, "#", 0);
5513
  pthread_mutex_lock(&LOCK_gdl);
5514
  if (write_log_replace_delete_frm(lpt, 0UL, shadow_path, path, TRUE))
5515 5516 5517 5518 5519 5520 5521
    goto error;
  log_entry= part_info->first_log_entry;
  part_info->frm_log_entry= log_entry;
  if (write_execute_ddl_log_entry(log_entry->entry_pos,
                                    FALSE, &exec_log_entry))
    goto error;
  release_part_info_log_entries(old_first_log_entry);
5522
  pthread_mutex_unlock(&LOCK_gdl);
5523 5524 5525
  DBUG_RETURN(FALSE);

error:
5526
  release_part_info_log_entries(part_info->first_log_entry);
5527
  pthread_mutex_unlock(&LOCK_gdl);
5528
  part_info->first_log_entry= old_first_log_entry;
5529
  part_info->frm_log_entry= NULL;
5530
  my_error(ER_DDL_LOG_ERROR, MYF(0));
5531
  DBUG_RETURN(TRUE);
5532 5533 5534 5535
}


/*
5536 5537
  Write the log entries to ensure that the drop partition command is completed
  even in the presence of a crash.
5538 5539

  SYNOPSIS
5540
    write_log_drop_partition()
5541 5542 5543 5544 5545
    lpt                      Struct containing parameters
  RETURN VALUES
    TRUE                     Error
    FALSE                    Success
  DESCRIPTION
5546
    Prepare entries to the ddl log indicating all partitions to drop and to
5547
    install the shadow frm file and remove the old frm file.
5548 5549
*/

5550
static bool write_log_drop_partition(ALTER_PARTITION_PARAM_TYPE *lpt)
5551
{
5552
  partition_info *part_info= lpt->part_info;
5553 5554
  DDL_LOG_MEMORY_ENTRY *log_entry;
  DDL_LOG_MEMORY_ENTRY *exec_log_entry= part_info->exec_log_entry;
5555 5556
  char tmp_path[FN_LEN];
  char path[FN_LEN];
5557
  uint next_entry= 0;
5558
  DDL_LOG_MEMORY_ENTRY *old_first_log_entry= part_info->first_log_entry;
5559
  DBUG_ENTER("write_log_drop_partition");
5560

5561 5562
  part_info->first_log_entry= NULL;
  build_table_filename(path, sizeof(path), lpt->db,
5563
                       lpt->table_name, "", 0);
5564
  build_table_filename(tmp_path, sizeof(tmp_path), lpt->db,
5565
                       lpt->table_name, "#", 0);
5566
  pthread_mutex_lock(&LOCK_gdl);
5567 5568 5569
  if (write_log_dropped_partitions(lpt, &next_entry, (const char*)path,
                                   FALSE))
    goto error;
5570 5571
  if (write_log_replace_delete_frm(lpt, next_entry, (const char*)tmp_path,
                                  (const char*)path, TRUE))
5572 5573 5574 5575 5576 5577 5578
    goto error;
  log_entry= part_info->first_log_entry;
  part_info->frm_log_entry= log_entry;
  if (write_execute_ddl_log_entry(log_entry->entry_pos,
                                    FALSE, &exec_log_entry))
    goto error;
  release_part_info_log_entries(old_first_log_entry);
5579
  pthread_mutex_unlock(&LOCK_gdl);
5580 5581 5582
  DBUG_RETURN(FALSE);

error:
5583
  release_part_info_log_entries(part_info->first_log_entry);
5584
  pthread_mutex_unlock(&LOCK_gdl);
5585
  part_info->first_log_entry= old_first_log_entry;
5586
  part_info->frm_log_entry= NULL;
5587
  my_error(ER_DDL_LOG_ERROR, MYF(0));
5588
  DBUG_RETURN(TRUE);
5589 5590 5591 5592
}


/*
5593 5594 5595
  Write the log entries to ensure that the add partition command is not
  executed at all if a crash before it has completed

5596
  SYNOPSIS
5597
    write_log_add_change_partition()
5598 5599 5600 5601 5602
    lpt                      Struct containing parameters
  RETURN VALUES
    TRUE                     Error
    FALSE                    Success
  DESCRIPTION
5603
    Prepare entries to the ddl log indicating all partitions to drop and to
5604
    remove the shadow frm file.
5605
    We always inject entries backwards in the list in the ddl log since we
5606
    don't know the entry position until we have written it.
5607 5608
*/

5609
static bool write_log_add_change_partition(ALTER_PARTITION_PARAM_TYPE *lpt)
5610
{
5611
  partition_info *part_info= lpt->part_info;
5612 5613
  DDL_LOG_MEMORY_ENTRY *log_entry;
  DDL_LOG_MEMORY_ENTRY *exec_log_entry= NULL;
5614 5615 5616 5617
  char tmp_path[FN_LEN];
  char path[FN_LEN];
  uint next_entry= 0;
  DBUG_ENTER("write_log_add_change_partition");
5618

5619
  build_table_filename(path, sizeof(path), lpt->db,
5620
                       lpt->table_name, "", 0);
5621
  build_table_filename(tmp_path, sizeof(tmp_path), lpt->db,
5622
                       lpt->table_name, "#", 0);
5623
  pthread_mutex_lock(&LOCK_gdl);
5624 5625 5626 5627 5628 5629 5630 5631 5632 5633
  if (write_log_dropped_partitions(lpt, &next_entry, (const char*)path,
                                   FALSE))
    goto error;
  if (write_log_replace_delete_frm(lpt, next_entry, NULL, tmp_path,
                                  FALSE))
    goto error;
  log_entry= part_info->first_log_entry;
  if (write_execute_ddl_log_entry(log_entry->entry_pos,
                                    FALSE, &exec_log_entry))
    goto error;
5634
  pthread_mutex_unlock(&LOCK_gdl);
5635 5636 5637 5638
  set_part_info_exec_log_entry(part_info, exec_log_entry);
  DBUG_RETURN(FALSE);

error:
5639
  release_part_info_log_entries(part_info->first_log_entry);
5640
  pthread_mutex_unlock(&LOCK_gdl);
5641
  part_info->first_log_entry= NULL;
5642
  my_error(ER_DDL_LOG_ERROR, MYF(0));
5643
  DBUG_RETURN(TRUE);
5644 5645 5646 5647 5648 5649 5650 5651
}


/*
  Write description of how to complete the operation after first phase of
  change partitions.

  SYNOPSIS
5652
    write_log_final_change_partition()
5653 5654 5655 5656 5657 5658 5659 5660 5661 5662
    lpt                      Struct containing parameters
  RETURN VALUES
    TRUE                     Error
    FALSE                    Success
  DESCRIPTION
    We will write log entries that specify to remove all partitions reorganised,
    to rename others to reflect the new naming scheme and to install the shadow
    frm file.
*/

5663
static bool write_log_final_change_partition(ALTER_PARTITION_PARAM_TYPE *lpt)
5664
{
5665
  partition_info *part_info= lpt->part_info;
5666 5667
  DDL_LOG_MEMORY_ENTRY *log_entry;
  DDL_LOG_MEMORY_ENTRY *exec_log_entry= part_info->exec_log_entry;
5668
  char path[FN_LEN];
5669
  char shadow_path[FN_LEN];
5670
  DDL_LOG_MEMORY_ENTRY *old_first_log_entry= part_info->first_log_entry;
5671 5672
  uint next_entry= 0;
  DBUG_ENTER("write_log_final_change_partition");
5673

5674 5675
  part_info->first_log_entry= NULL;
  build_table_filename(path, sizeof(path), lpt->db,
5676
                       lpt->table_name, "", 0);
5677
  build_table_filename(shadow_path, sizeof(shadow_path), lpt->db,
5678
                       lpt->table_name, "#", 0);
5679
  pthread_mutex_lock(&LOCK_gdl);
5680
  if (write_log_dropped_partitions(lpt, &next_entry, (const char*)path,
5681
                      lpt->alter_info->flags & ALTER_REORGANIZE_PARTITION))
5682 5683 5684
    goto error;
  if (write_log_changed_partitions(lpt, &next_entry, (const char*)path))
    goto error;
5685
  if (write_log_replace_delete_frm(lpt, 0UL, shadow_path, path, TRUE))
5686 5687 5688 5689 5690 5691 5692
    goto error;
  log_entry= part_info->first_log_entry;
  part_info->frm_log_entry= log_entry;
  if (write_execute_ddl_log_entry(log_entry->entry_pos,
                                    FALSE, &exec_log_entry))
    goto error;
  release_part_info_log_entries(old_first_log_entry);
5693
  pthread_mutex_unlock(&LOCK_gdl);
5694 5695 5696
  DBUG_RETURN(FALSE);

error:
5697
  release_part_info_log_entries(part_info->first_log_entry);
5698
  pthread_mutex_unlock(&LOCK_gdl);
5699
  part_info->first_log_entry= old_first_log_entry;
5700
  part_info->frm_log_entry= NULL;
5701
  my_error(ER_DDL_LOG_ERROR, MYF(0));
5702
  DBUG_RETURN(TRUE);
5703 5704 5705
}


5706
/*
5707
  Remove entry from ddl log and release resources for others to use
5708 5709 5710 5711 5712 5713 5714 5715

  SYNOPSIS
    write_log_completed()
    lpt                      Struct containing parameters
  RETURN VALUES
    TRUE                     Error
    FALSE                    Success
*/
5716

5717 5718
static void write_log_completed(ALTER_PARTITION_PARAM_TYPE *lpt,
                                bool dont_crash)
5719
{
5720
  partition_info *part_info= lpt->part_info;
5721
  DDL_LOG_MEMORY_ENTRY *log_entry= part_info->exec_log_entry;
5722
  DBUG_ENTER("write_log_completed");
5723

5724
  DBUG_ASSERT(log_entry);
5725
  pthread_mutex_lock(&LOCK_gdl);
5726
  if (write_execute_ddl_log_entry(0UL, TRUE, &log_entry))
5727 5728
  {
    /*
5729
      Failed to write, Bad...
5730 5731
      We have completed the operation but have log records to REMOVE
      stuff that shouldn't be removed. What clever things could one do
5732 5733
      here? An error output was written to the error output by the
      above method so we don't do anything here.
5734
    */
5735
    ;
5736 5737 5738
  }
  release_part_info_log_entries(part_info->first_log_entry);
  release_part_info_log_entries(part_info->exec_log_entry);
5739
  pthread_mutex_unlock(&LOCK_gdl);
5740 5741
  part_info->exec_log_entry= NULL;
  part_info->first_log_entry= NULL;
5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754
  DBUG_VOID_RETURN;
}


/*
   Release all log entries
   SYNOPSIS
     release_log_entries()
     part_info                  Partition info struct
   RETURN VALUES
     NONE
*/

5755
static void release_log_entries(partition_info *part_info)
5756
{
5757
  pthread_mutex_lock(&LOCK_gdl);
5758 5759
  release_part_info_log_entries(part_info->first_log_entry);
  release_part_info_log_entries(part_info->exec_log_entry);
5760
  pthread_mutex_unlock(&LOCK_gdl);
5761 5762
  part_info->first_log_entry= NULL;
  part_info->exec_log_entry= NULL;
5763 5764 5765
}


5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782
/*
  Get a lock on table name to avoid that anyone can open the table in
  a critical part of the ALTER TABLE.
  SYNOPSIS
    get_name_lock()
    lpt                        Struct carrying parameters
  RETURN VALUES
    FALSE                      Success
    TRUE                       Failure
*/

static int get_name_lock(ALTER_PARTITION_PARAM_TYPE *lpt)
{
  int error= 0;
  DBUG_ENTER("get_name_lock");

  bzero(&lpt->table_list, sizeof(lpt->table_list));
5783
  lpt->table_list.db= (char*)lpt->db;
5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838
  lpt->table_list.table= lpt->table;
  lpt->table_list.table_name= (char*)lpt->table_name;
  pthread_mutex_lock(&LOCK_open);
  error= lock_table_name(lpt->thd, &lpt->table_list, FALSE);
  pthread_mutex_unlock(&LOCK_open);
  DBUG_RETURN(error);
}


/*
  Unlock and close table before renaming and dropping partitions
  SYNOPSIS
    alter_close_tables()
    lpt                        Struct carrying parameters
  RETURN VALUES
    0
*/

static int alter_close_tables(ALTER_PARTITION_PARAM_TYPE *lpt)
{
  THD *thd= lpt->thd;
  TABLE *table= lpt->table;
  DBUG_ENTER("alter_close_tables");
  /*
    We need to also unlock tables and close all handlers.
    We set lock to zero to ensure we don't do this twice
    and we set db_stat to zero to ensure we don't close twice.
  */
  mysql_unlock_tables(thd, thd->lock);
  thd->lock= 0;
  table->file->close();
  table->db_stat= 0;
  DBUG_RETURN(0);
}


/*
  Release a lock name
  SYNOPSIS
    release_name_lock()
    lpt
  RETURN VALUES
    0
*/

static int release_name_lock(ALTER_PARTITION_PARAM_TYPE *lpt)
{
  DBUG_ENTER("release_name_lock");
  pthread_mutex_lock(&LOCK_open);
  unlock_table_name(lpt->thd, &lpt->table_list);
  pthread_mutex_unlock(&LOCK_open);
  DBUG_RETURN(0);
}


5839 5840 5841 5842 5843 5844 5845 5846 5847 5848
/*
  Handle errors for ALTER TABLE for partitioning
  SYNOPSIS
    handle_alter_part_error()
    lpt                        Struct carrying parameters
    not_completed              Was request in complete phase when error occurred
  RETURN VALUES
    NONE
*/

5849 5850 5851 5852
void handle_alter_part_error(ALTER_PARTITION_PARAM_TYPE *lpt,
                             bool not_completed,
                             bool drop_partition,
                             bool frm_install)
5853 5854 5855 5856 5857
{
  partition_info *part_info= lpt->part_info;
  DBUG_ENTER("handle_alter_part_error");

  if (!part_info->first_log_entry &&
5858 5859
      execute_ddl_log_entry(current_thd,
                            part_info->first_log_entry->entry_pos))
5860 5861
  {
    /*
5862 5863
      We couldn't recover from error, most likely manual interaction
      is required.
5864
    */
5865 5866
    write_log_completed(lpt, FALSE);
    release_log_entries(part_info);
5867 5868 5869 5870 5871
    if (not_completed)
    {
      if (drop_partition)
      {
        /* Table is still ok, but we left a shadow frm file behind. */
5872
        push_warning_printf(lpt->thd, MYSQL_ERROR::WARN_LEVEL_WARN, 1,
5873 5874 5875
                            "%s %s",
           "Operation was unsuccessful, table is still intact,",
           "but it is possible that a shadow frm file was left behind");
5876 5877 5878 5879
      }
      else
      {
        push_warning_printf(lpt->thd, MYSQL_ERROR::WARN_LEVEL_WARN, 1,
5880 5881 5882 5883 5884
                            "%s %s %s %s",
           "Operation was unsuccessful, table is still intact,",
           "but it is possible that a shadow frm file was left behind.",
           "It is also possible that temporary partitions are left behind,",
           "these could be empty or more or less filled with records");
5885 5886 5887 5888
      }
    }
    else
    {
5889
      if (frm_install)
5890 5891 5892 5893 5894
      {
        /*
           Failed during install of shadow frm file, table isn't intact
           and dropped partitions are still there
        */
5895
        push_warning_printf(lpt->thd, MYSQL_ERROR::WARN_LEVEL_WARN, 1,
5896 5897 5898 5899
                            "%s %s %s",
          "Failed during alter of partitions, table is no longer intact.",
          "The frm file is in an unknown state, and a backup",
          "is required.");
5900 5901 5902 5903
      }
      else if (drop_partition)
      {
        /*
5904 5905 5906 5907
          Table is ok, we have switched to new table but left dropped
          partitions still in their places. We remove the log records and
          ask the user to perform the action manually. We remove the log
          records and ask the user to perform the action manually.
5908
        */
5909
        push_warning_printf(lpt->thd, MYSQL_ERROR::WARN_LEVEL_WARN, 1,
5910 5911 5912
                            "%s %s",
              "Failed during drop of partitions, table is intact.",
              "Manual drop of remaining partitions is required");
5913
      }
5914
      else
5915
      {
5916
        /*
5917 5918 5919
          We failed during renaming of partitions. The table is most
          certainly in a very bad state so we give user warning and disable
          the table by writing an ancient frm version into it.
5920
        */
5921
        push_warning_printf(lpt->thd, MYSQL_ERROR::WARN_LEVEL_WARN, 1,
5922 5923 5924 5925
                            "%s %s %s",
           "Failed during renaming of partitions. We are now in a position",
           "where table is not reusable",
           "Table is disabled by writing ancient frm file version into it");
5926 5927
      }
    }
5928 5929 5930
  }
  else
  {
5931
    release_log_entries(part_info);
5932 5933 5934 5935
    if (not_completed)
    {
      /*
        We hit an error before things were completed but managed
5936 5937
        to recover from the error. An error occurred and we have
        restored things to original so no need for further action.
5938
      */
5939
      ;
5940 5941 5942 5943 5944 5945
    }
    else
    {
      /*
        We hit an error after we had completed most of the operation
        and were successful in a second attempt so the operation
5946 5947 5948
        actually is successful now. We need to issue a warning that
        even though we reported an error the operation was successfully
        completed.
5949
      */
5950 5951 5952
      push_warning_printf(lpt->thd, MYSQL_ERROR::WARN_LEVEL_WARN, 1,"%s %s",
         "Operation was successfully completed by failure handling,",
         "after failure of normal operation");
5953 5954 5955 5956 5957 5958
    }
  }
  DBUG_VOID_RETURN;
}


5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982
/*
  Actually perform the change requested by ALTER TABLE of partitions
  previously prepared.

  SYNOPSIS
    fast_alter_partition_table()
    thd                           Thread object
    table                         Table object
    alter_info                    ALTER TABLE info
    create_info                   Create info for CREATE TABLE
    table_list                    List of the table involved
    db                            Database name of new table
    table_name                    Table name of new table

  RETURN VALUES
    TRUE                          Error
    FALSE                         Success

  DESCRIPTION
    Perform all ALTER TABLE operations for partitioned tables that can be
    performed fast without a full copy of the original table.
*/

uint fast_alter_partition_table(THD *thd, TABLE *table,
5983
                                Alter_info *alter_info,
5984 5985
                                HA_CREATE_INFO *create_info,
                                TABLE_LIST *table_list,
5986
                                char *db,
5987 5988 5989 5990 5991 5992 5993 5994
                                const char *table_name,
                                uint fast_alter_partition)
{
  /* Set-up struct used to write frm files */
  partition_info *part_info= table->part_info;
  ALTER_PARTITION_PARAM_TYPE lpt_obj;
  ALTER_PARTITION_PARAM_TYPE *lpt= &lpt_obj;
  bool written_bin_log= TRUE;
5995 5996
  bool not_completed= TRUE;
  bool frm_install= FALSE;
5997 5998 5999
  DBUG_ENTER("fast_alter_partition_table");

  lpt->thd= thd;
6000
  lpt->part_info= part_info;
6001
  lpt->alter_info= alter_info;
6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014
  lpt->create_info= create_info;
  lpt->db_options= create_info->table_options;
  if (create_info->row_type == ROW_TYPE_DYNAMIC)
    lpt->db_options|= HA_OPTION_PACK_RECORD;
  lpt->table= table;
  lpt->key_info_buffer= 0;
  lpt->key_count= 0;
  lpt->db= db;
  lpt->table_name= table_name;
  lpt->copied= 0;
  lpt->deleted= 0;
  lpt->pack_frm_data= NULL;
  lpt->pack_frm_len= 0;
6015
  thd->work_part_info= part_info;
6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030

  if (alter_info->flags & ALTER_OPTIMIZE_PARTITION ||
      alter_info->flags & ALTER_ANALYZE_PARTITION ||
      alter_info->flags & ALTER_CHECK_PARTITION ||
      alter_info->flags & ALTER_REPAIR_PARTITION)
  {
    /*
      In this case the user has specified that he wants a set of partitions
      to be optimised and the partition engine can handle optimising
      partitions natively without requiring a full rebuild of the
      partitions.

      In this case it is enough to call optimise_partitions, there is no
      need to change frm files or anything else.
    */
6031
    int error;
6032 6033
    written_bin_log= FALSE;
    if (((alter_info->flags & ALTER_OPTIMIZE_PARTITION) &&
6034
         (error= table->file->optimize_partitions(thd))) ||
6035
        ((alter_info->flags & ALTER_ANALYZE_PARTITION) &&
6036
         (error= table->file->analyze_partitions(thd))) ||
6037
        ((alter_info->flags & ALTER_CHECK_PARTITION) &&
6038
         (error= table->file->check_partitions(thd))) ||
6039
        ((alter_info->flags & ALTER_REPAIR_PARTITION) &&
6040
         (error= table->file->repair_partitions(thd))))
6041
    {
6042
      table->file->print_error(error, MYF(0));
6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079
      DBUG_RETURN(TRUE);
    }
  }
  else if (fast_alter_partition & HA_PARTITION_ONE_PHASE)
  {
    /*
      In the case where the engine supports one phase online partition
      changes it is not necessary to have any exclusive locks. The
      correctness is upheld instead by transactions being aborted if they
      access the table after its partition definition has changed (if they
      are still using the old partition definition).

      The handler is in this case responsible to ensure that all users
      start using the new frm file after it has changed. To implement
      one phase it is necessary for the handler to have the master copy
      of the frm file and use discovery mechanisms to renew it. Thus
      write frm will write the frm, pack the new frm and finally
      the frm is deleted and the discovery mechanisms will either restore
      back to the old or installing the new after the change is activated.

      Thus all open tables will be discovered that they are old, if not
      earlier as soon as they try an operation using the old table. One
      should ensure that this is checked already when opening a table,
      even if it is found in the cache of open tables.

      change_partitions will perform all operations and it is the duty of
      the handler to ensure that the frm files in the system gets updated
      in synch with the changes made and if an error occurs that a proper
      error handling is done.

      If the MySQL Server crashes at this moment but the handler succeeds
      in performing the change then the binlog is not written for the
      change. There is no way to solve this as long as the binlog is not
      transactional and even then it is hard to solve it completely.
 
      The first approach here was to downgrade locks. Now a different approach
      is decided upon. The idea is that the handler will have access to the
6080
      Alter_info when store_lock arrives with TL_WRITE_ALLOW_READ. So if the
6081 6082 6083 6084 6085 6086
      handler knows that this functionality can be handled with a lower lock
      level it will set the lock level to TL_WRITE_ALLOW_WRITE immediately.
      Thus the need to downgrade the lock disappears.
      1) Write the new frm, pack it and then delete it
      2) Perform the change within the handler
    */
6087 6088
    if (mysql_write_frm(lpt, WFRM_WRITE_SHADOW | WFRM_PACK_FRM) ||
        mysql_change_partitions(lpt))
6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116
    {
      DBUG_RETURN(TRUE);
    }
  }
  else if (alter_info->flags == ALTER_DROP_PARTITION)
  {
    /*
      Now after all checks and setting state on dropped partitions we can
      start the actual dropping of the partitions.

      Drop partition is actually two things happening. The first is that
      a lot of records are deleted. The second is that the behaviour of
      subsequent updates and writes and deletes will change. The delete
      part can be handled without any particular high lock level by
      transactional engines whereas non-transactional engines need to
      ensure that this change is done with an exclusive lock on the table.
      The second part, the change of partitioning does however require
      an exclusive lock to install the new partitioning as one atomic
      operation. If this is not the case, it is possible for two
      transactions to see the change in a different order than their
      serialisation order. Thus we need an exclusive lock for both
      transactional and non-transactional engines.

      For LIST partitions it could be possible to avoid the exclusive lock
      (and for RANGE partitions if they didn't rearrange range definitions
      after a DROP PARTITION) if one ensured that failed accesses to the
      dropped partitions was aborted for sure (thus only possible for
      transactional engines).
6117 6118 6119

      0) Write an entry that removes the shadow frm file if crash occurs 
      1) Write the new frm file as a shadow frm
6120
      2) Write the ddl log to ensure that the operation is completed
6121 6122
         even in the presence of a MySQL Server crash
      3) Lock the table in TL_WRITE_ONLY to ensure all other accesses to
6123 6124 6125 6126 6127 6128 6129
         the table have completed. This ensures that other threads can not
         execute on the table in parallel.
      4) Get a name lock on the table. This ensures that we can release all
         locks on the table and since no one can open the table, there can
         be no new threads accessing the table. They will be hanging on the
         name lock.
      5) Close all tables that have already been opened but didn't stumble on
6130 6131
         the abort locked previously. This is done as part of the
         get_name_lock call.
6132 6133
      6) We are now ready to release all locks we got in this thread.
      7) Write the bin log
6134 6135 6136 6137 6138 6139
         Unfortunately the writing of the binlog is not synchronised with
         other logging activities. So no matter in which order the binlog
         is written compared to other activities there will always be cases
         where crashes make strange things occur. In this placement it can
         happen that the ALTER TABLE DROP PARTITION gets performed in the
         master but not in the slaves if we have a crash, after writing the
6140 6141
         ddl log but before writing the binlog. A solution to this would
         require writing the statement first in the ddl log and then
6142 6143
         when recovering from the crash read the binlog and insert it into
         the binlog if not written already.
6144 6145 6146 6147 6148 6149 6150
      8) Install the previously written shadow frm file
      9) Prepare handlers for drop of partitions
      10) Drop the partitions
      11) Remove entries from ddl log
      12) Release name lock so that all other threads can access the table
          again.
      13) Complete query
6151 6152 6153

      We insert Error injections at all places where it could be interesting
      to test if recovery is properly done.
6154
    */
6155
    if (write_log_drop_shadow_frm(lpt) ||
6156
        ERROR_INJECT_CRASH("crash_drop_partition_1") ||
6157
        mysql_write_frm(lpt, WFRM_WRITE_SHADOW) ||
6158
        ERROR_INJECT_CRASH("crash_drop_partition_2") ||
6159
        write_log_drop_partition(lpt) ||
6160
        ERROR_INJECT_CRASH("crash_drop_partition_3") ||
6161 6162
        (not_completed= FALSE) ||
        abort_and_upgrade_lock(lpt) || /* Always returns 0 */
6163
        ERROR_INJECT_CRASH("crash_drop_partition_4") ||
6164 6165 6166 6167
        get_name_lock(lpt) ||
        ERROR_INJECT_CRASH("crash_drop_partition_5") ||
        alter_close_tables(lpt) ||
        ERROR_INJECT_CRASH("crash_drop_partition_6") ||
6168 6169
        ((!thd->lex->no_write_to_binlog) &&
         (write_bin_log(thd, FALSE,
6170
                        thd->query, thd->query_length), FALSE)) ||
6171
        ERROR_INJECT_CRASH("crash_drop_partition_7") ||
6172
        ((frm_install= TRUE), FALSE) ||
6173
        mysql_write_frm(lpt, WFRM_INSTALL_SHADOW) ||
6174
        ((frm_install= FALSE), FALSE) ||
6175
        ERROR_INJECT_CRASH("crash_drop_partition_8") ||
6176
        mysql_drop_partitions(lpt) ||
6177
        ERROR_INJECT_CRASH("crash_drop_partition_9") ||
6178
        (write_log_completed(lpt, FALSE), FALSE) ||
6179
        ERROR_INJECT_CRASH("crash_drop_partition_10") ||
6180
        (release_name_lock(lpt), FALSE)) 
6181
    {
6182
      handle_alter_part_error(lpt, not_completed, TRUE, frm_install);
6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198
      DBUG_RETURN(TRUE);
    }
  }
  else if ((alter_info->flags & ALTER_ADD_PARTITION) &&
           (part_info->part_type == RANGE_PARTITION ||
            part_info->part_type == LIST_PARTITION))
  {
    /*
      ADD RANGE/LIST PARTITIONS
      In this case there are no tuples removed and no tuples are added.
      Thus the operation is merely adding a new partition. Thus it is
      necessary to perform the change as an atomic operation. Otherwise
      someone reading without seeing the new partition could potentially
      miss updates made by a transaction serialised before it that are
      inserted into the new partition.

6199 6200
      0) Write an entry that removes the shadow frm file if crash occurs 
      1) Write the new frm file as a shadow frm file
6201
      2) Log the changes to happen in ddl log
6202 6203 6204 6205
      2) Add the new partitions
      3) Lock all partitions in TL_WRITE_ONLY to ensure that no users
         are still using the old partitioning scheme. Wait until all
         ongoing users have completed before progressing.
6206 6207 6208 6209 6210 6211 6212 6213 6214 6215
      4) Get a name lock on the table. This ensures that we can release all
         locks on the table and since no one can open the table, there can
         be no new threads accessing the table. They will be hanging on the
         name lock.
      5) Close all tables that have already been opened but didn't stumble on
         the abort locked previously. This is done as part of the
         get_name_lock call.
      6) Close all table handlers and unlock all handlers but retain name lock
      7) Write binlog
      8) Now the change is completed except for the installation of the
6216 6217
         new frm file. We thus write an action in the log to change to
         the shadow frm file
6218
      9) Install the new frm file of the table where the partitions are
6219
         added to the table.
6220 6221 6222 6223
      10)Wait until all accesses using the old frm file has completed
      11)Remove entries from ddl log
      12)Release name lock
      13)Complete query
6224
    */
6225
    if (write_log_add_change_partition(lpt) ||
6226
        ERROR_INJECT_CRASH("crash_add_partition_1") ||
6227
        mysql_write_frm(lpt, WFRM_WRITE_SHADOW) ||
6228
        ERROR_INJECT_CRASH("crash_add_partition_2") ||
6229
        mysql_change_partitions(lpt) ||
6230
        ERROR_INJECT_CRASH("crash_add_partition_3") ||
6231
        abort_and_upgrade_lock(lpt) || /* Always returns 0 */
6232 6233 6234 6235 6236
        ERROR_INJECT_CRASH("crash_add_partition_3") ||
        get_name_lock(lpt) ||
        ERROR_INJECT_CRASH("crash_add_partition_4") ||
        alter_close_tables(lpt) ||
        ERROR_INJECT_CRASH("crash_add_partition_5") ||
6237 6238 6239
        ((!thd->lex->no_write_to_binlog) &&
         (write_bin_log(thd, FALSE,
                        thd->query, thd->query_length), FALSE)) ||
6240
        ERROR_INJECT_CRASH("crash_add_partition_6") ||
6241
        write_log_rename_frm(lpt) ||
6242
        (not_completed= FALSE) ||
6243
        ERROR_INJECT_CRASH("crash_add_partition_7") ||
6244
        ((frm_install= TRUE), FALSE) ||
6245
        mysql_write_frm(lpt, WFRM_INSTALL_SHADOW) ||
6246
        ERROR_INJECT_CRASH("crash_add_partition_8") ||
6247
        (write_log_completed(lpt, FALSE), FALSE) ||
6248 6249
        ERROR_INJECT_CRASH("crash_add_partition_9") ||
        (release_name_lock(lpt), FALSE)) 
6250
    {
6251
      handle_alter_part_error(lpt, not_completed, FALSE, frm_install);
6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287
      DBUG_RETURN(TRUE);
    }
  }
  else
  {
    /*
      ADD HASH PARTITION/
      COALESCE PARTITION/
      REBUILD PARTITION/
      REORGANIZE PARTITION
 
      In this case all records are still around after the change although
      possibly organised into new partitions, thus by ensuring that all
      updates go to both the old and the new partitioning scheme we can
      actually perform this operation lock-free. The only exception to
      this is when REORGANIZE PARTITION adds/drops ranges. In this case
      there needs to be an exclusive lock during the time when the range
      changes occur.
      This is only possible if the handler can ensure double-write for a
      period. The double write will ensure that it doesn't matter where the
      data is read from since both places are updated for writes. If such
      double writing is not performed then it is necessary to perform the
      change with the usual exclusive lock. With double writes it is even
      possible to perform writes in parallel with the reorganisation of
      partitions.

      Without double write procedure we get the following procedure.
      The only difference with using double write is that we can downgrade
      the lock to TL_WRITE_ALLOW_WRITE. Double write in this case only
      double writes from old to new. If we had double writing in both
      directions we could perform the change completely without exclusive
      lock for HASH partitions.
      Handlers that perform double writing during the copy phase can actually
      use a lower lock level. This can be handled inside store_lock in the
      respective handler.

6288 6289 6290 6291 6292
      0) Write an entry that removes the shadow frm file if crash occurs 
      1) Write the shadow frm file of new partitioning
      2) Log such that temporary partitions added in change phase are
         removed in a crash situation
      3) Add the new partitions
6293
         Copy from the reorganised partitions to the new partitions
6294 6295 6296
      4) Log that operation is completed and log all complete actions
         needed to complete operation from here
      5) Lock all partitions in TL_WRITE_ONLY to ensure that no users
6297 6298
         are still using the old partitioning scheme. Wait until all
         ongoing users have completed before progressing.
6299 6300 6301 6302
      6) Get a name lock of the table
      7) Close all tables opened but not yet locked, after this call we are
         certain that no other thread is in the lock wait queue or has
         opened the table. The name lock will ensure that they are blocked
6303
         on the open call. This is achieved also by get_name_lock call.
6304 6305 6306 6307 6308 6309 6310 6311 6312
      8) Close all partitions opened by this thread, but retain name lock.
      9) Write bin log
      10) Prepare handlers for rename and delete of partitions
      11) Rename and drop the reorged partitions such that they are no
          longer used and rename those added to their real new names.
      12) Install the shadow frm file
      13) Release the name lock to enable other threads to start using the
          table again.
      14) Complete query
6313
    */
6314
    if (write_log_add_change_partition(lpt) ||
6315
        ERROR_INJECT_CRASH("crash_change_partition_1") ||
6316
        mysql_write_frm(lpt, WFRM_WRITE_SHADOW) ||
6317
        ERROR_INJECT_CRASH("crash_change_partition_2") ||
6318
        mysql_change_partitions(lpt) ||
6319 6320
        ERROR_INJECT_CRASH("crash_change_partition_3") ||
        write_log_final_change_partition(lpt) ||
6321
        ERROR_INJECT_CRASH("crash_change_partition_4") ||
6322 6323
        (not_completed= FALSE) ||
        abort_and_upgrade_lock(lpt) || /* Always returns 0 */
6324
        ERROR_INJECT_CRASH("crash_change_partition_5") ||
6325
        get_name_lock(lpt) ||
6326
        ERROR_INJECT_CRASH("crash_change_partition_6") ||
6327
        alter_close_tables(lpt) ||
6328
        ERROR_INJECT_CRASH("crash_change_partition_7") ||
6329 6330 6331
        ((!thd->lex->no_write_to_binlog) &&
         (write_bin_log(thd, FALSE,
                        thd->query, thd->query_length), FALSE)) ||
6332
        ERROR_INJECT_CRASH("crash_change_partition_8") ||
6333
        mysql_write_frm(lpt, WFRM_INSTALL_SHADOW) ||
6334
        ERROR_INJECT_CRASH("crash_change_partition_9") ||
6335
        mysql_drop_partitions(lpt) ||
6336
        ERROR_INJECT_CRASH("crash_change_partition_10") ||
6337
        mysql_rename_partitions(lpt) ||
6338
        ((frm_install= TRUE), FALSE) ||
6339
        ERROR_INJECT_CRASH("crash_change_partition_11") ||
6340
        (write_log_completed(lpt, FALSE), FALSE) ||
6341
        ERROR_INJECT_CRASH("crash_change_partition_12") ||
6342
        (release_name_lock(lpt), FALSE))
6343
    {
6344
      handle_alter_part_error(lpt, not_completed, FALSE, frm_install);
6345
      DBUG_RETURN(TRUE);
6346 6347 6348 6349 6350 6351 6352
    }
  }
  /*
    A final step is to write the query to the binlog and send ok to the
    user
  */
  DBUG_RETURN(fast_end_partition(thd, lpt->copied, lpt->deleted,
6353
                                 table, table_list, FALSE, NULL,
6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446
                                 written_bin_log));
}
#endif


/*
  Prepare for calling val_int on partition function by setting fields to
  point to the record where the values of the PF-fields are stored.

  SYNOPSIS
    set_field_ptr()
    ptr                 Array of fields to change ptr
    new_buf             New record pointer
    old_buf             Old record pointer

  DESCRIPTION
    Set ptr in field objects of field array to refer to new_buf record
    instead of previously old_buf. Used before calling val_int and after
    it is used to restore pointers to table->record[0].
    This routine is placed outside of partition code since it can be useful
    also for other programs.
*/

void set_field_ptr(Field **ptr, const byte *new_buf,
                   const byte *old_buf)
{
  my_ptrdiff_t diff= (new_buf - old_buf);
  DBUG_ENTER("set_field_ptr");

  do
  {
    (*ptr)->move_field_offset(diff);
  } while (*(++ptr));
  DBUG_VOID_RETURN;
}


/*
  Prepare for calling val_int on partition function by setting fields to
  point to the record where the values of the PF-fields are stored.
  This variant works on a key_part reference.
  It is not required that all fields are NOT NULL fields.

  SYNOPSIS
    set_key_field_ptr()
    key_info            key info with a set of fields to change ptr
    new_buf             New record pointer
    old_buf             Old record pointer

  DESCRIPTION
    Set ptr in field objects of field array to refer to new_buf record
    instead of previously old_buf. Used before calling val_int and after
    it is used to restore pointers to table->record[0].
    This routine is placed outside of partition code since it can be useful
    also for other programs.
*/

void set_key_field_ptr(KEY *key_info, const byte *new_buf,
                       const byte *old_buf)
{
  KEY_PART_INFO *key_part= key_info->key_part;
  uint key_parts= key_info->key_parts;
  uint i= 0;
  my_ptrdiff_t diff= (new_buf - old_buf);
  DBUG_ENTER("set_key_field_ptr");

  do
  {
    key_part->field->move_field_offset(diff);
    key_part++;
  } while (++i < key_parts);
  DBUG_VOID_RETURN;
}


/*
  SYNOPSIS
    mem_alloc_error()
    size                Size of memory attempted to allocate
    None

  RETURN VALUES
    None

  DESCRIPTION
    A routine to use for all the many places in the code where memory
    allocation error can happen, a tremendous amount of them, needs
    simple routine that signals this error.
*/

void mem_alloc_error(size_t size)
{
  my_error(ER_OUTOFMEMORY, MYF(0), size);
6447
}
6448

6449
#ifdef WITH_PARTITION_STORAGE_ENGINE
6450
/*
6451 6452
  Return comma-separated list of used partitions in the provided given string

6453 6454 6455 6456
  SYNOPSIS
    make_used_partitions_str()
      part_info  IN  Partitioning info
      parts_str  OUT The string to fill
6457 6458 6459 6460 6461 6462 6463

  DESCRIPTION
    Generate a list of used partitions (from bits in part_info->used_partitions
    bitmap), asd store it into the provided String object.
    
  NOTE
    The produced string must not be longer then MAX_PARTITIONS * (1 + FN_LEN).
6464 6465 6466 6467 6468 6469 6470 6471 6472
*/

void make_used_partitions_str(partition_info *part_info, String *parts_str)
{
  parts_str->length(0);
  partition_element *pe;
  uint partition_id= 0;
  List_iterator<partition_element> it(part_info->partitions);
  
6473
  if (part_info->is_sub_partitioned())
6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511
  {
    partition_element *head_pe;
    while ((head_pe= it++))
    {
      List_iterator<partition_element> it2(head_pe->subpartitions);
      while ((pe= it2++))
      {
        if (bitmap_is_set(&part_info->used_partitions, partition_id))
        {
          if (parts_str->length())
            parts_str->append(',');
          parts_str->append(head_pe->partition_name,
                           strlen(head_pe->partition_name),
                           system_charset_info);
          parts_str->append('_');
          parts_str->append(pe->partition_name,
                           strlen(pe->partition_name),
                           system_charset_info);
        }
        partition_id++;
      }
    }
  }
  else
  {
    while ((pe= it++))
    {
      if (bitmap_is_set(&part_info->used_partitions, partition_id))
      {
        if (parts_str->length())
          parts_str->append(',');
        parts_str->append(pe->partition_name, strlen(pe->partition_name),
                         system_charset_info);
      }
      partition_id++;
    }
  }
}
6512
#endif
6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549

/****************************************************************************
 * Partition interval analysis support
 ***************************************************************************/

/*
  Setup partition_info::* members related to partitioning range analysis

  SYNOPSIS
    set_up_partition_func_pointers()
      part_info  Partitioning info structure

  DESCRIPTION
    Assuming that passed partition_info structure already has correct values
    for members that specify [sub]partitioning type, table fields, and
    functions, set up partition_info::* members that are related to
    Partitioning Interval Analysis (see get_partitions_in_range_iter for its
    definition)

  IMPLEMENTATION
    There are two available interval analyzer functions:
    (1) get_part_iter_for_interval_via_mapping 
    (2) get_part_iter_for_interval_via_walking

    They both have limited applicability:
    (1) is applicable for "PARTITION BY <RANGE|LIST>(func(t.field))", where
    func is a monotonic function.
    
    (2) is applicable for 
      "[SUB]PARTITION BY <any-partitioning-type>(any_func(t.integer_field))"
      
    If both are applicable, (1) is preferred over (2).
    
    This function sets part_info::get_part_iter_for_interval according to
    this criteria, and also sets some auxilary fields that the function
    uses.
*/
6550
#ifdef WITH_PARTITION_STORAGE_ENGINE
6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579
static void set_up_range_analysis_info(partition_info *part_info)
{
  enum_monotonicity_info minfo;

  /* Set the catch-all default */
  part_info->get_part_iter_for_interval= NULL;
  part_info->get_subpart_iter_for_interval= NULL;

  /* 
    Check if get_part_iter_for_interval_via_mapping() can be used for 
    partitioning
  */
  switch (part_info->part_type) {
  case RANGE_PARTITION:
  case LIST_PARTITION:
    minfo= part_info->part_expr->get_monotonicity_info();
    if (minfo != NON_MONOTONIC)
    {
      part_info->range_analysis_include_bounds=
        test(minfo == MONOTONIC_INCREASING);
      part_info->get_part_iter_for_interval=
        get_part_iter_for_interval_via_mapping;
      goto setup_subparts;
    }
  default:
    ;
  }
   
  /*
6580
    Check if get_part_iter_for_interval_via_walking() can be used for
6581 6582 6583 6584 6585 6586 6587 6588
    partitioning
  */
  if (part_info->no_part_fields == 1)
  {
    Field *field= part_info->part_field_array[0];
    switch (field->type()) {
    case MYSQL_TYPE_TINY:
    case MYSQL_TYPE_SHORT:
6589
    case MYSQL_TYPE_INT24:
6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601
    case MYSQL_TYPE_LONG:
    case MYSQL_TYPE_LONGLONG:
      part_info->get_part_iter_for_interval=
        get_part_iter_for_interval_via_walking;
      break;
    default:
      ;
    }
  }

setup_subparts:
  /*
6602
    Check if get_part_iter_for_interval_via_walking() can be used for
6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641
    subpartitioning
  */
  if (part_info->no_subpart_fields == 1)
  {
    Field *field= part_info->subpart_field_array[0];
    switch (field->type()) {
    case MYSQL_TYPE_TINY:
    case MYSQL_TYPE_SHORT:
    case MYSQL_TYPE_LONG:
    case MYSQL_TYPE_LONGLONG:
      part_info->get_subpart_iter_for_interval=
        get_part_iter_for_interval_via_walking;
      break;
    default:
      ;
    }
  }
}


typedef uint32 (*get_endpoint_func)(partition_info*, bool left_endpoint,
                                    bool include_endpoint);

/*
  Partitioning Interval Analysis: Initialize the iterator for "mapping" case

  SYNOPSIS
    get_part_iter_for_interval_via_mapping()
      part_info   Partition info
      is_subpart  TRUE  - act for subpartitioning
                  FALSE - act for partitioning
      min_value   minimum field value, in opt_range key format.
      max_value   minimum field value, in opt_range key format.
      flags       Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE,
                  NO_MAX_RANGE.
      part_iter   Iterator structure to be initialized

  DESCRIPTION
    Initialize partition set iterator to walk over the interval in
6642 6643
    ordered-array-of-partitions (for RANGE partitioning) or 
    ordered-array-of-list-constants (for LIST partitioning) space.
6644 6645

  IMPLEMENTATION
6646
    This function is used when partitioning is done by
6647 6648 6649 6650 6651 6652 6653 6654
    <RANGE|LIST>(ascending_func(t.field)), and we can map an interval in
    t.field space into a sub-array of partition_info::range_int_array or
    partition_info::list_array (see get_partition_id_range_for_endpoint,
    get_list_array_idx_for_endpoint for details).
    
    The function performs this interval mapping, and sets the iterator to
    traverse the sub-array and return appropriate partitions.
    
6655
  RETURN
6656 6657 6658 6659 6660 6661 6662
    0 - No matching partitions (iterator not initialized)
    1 - Ok, iterator intialized for traversal of matching partitions.
   -1 - All partitions would match (iterator not initialized)
*/

int get_part_iter_for_interval_via_mapping(partition_info *part_info,
                                           bool is_subpart,
6663
                                           char *min_value, char *max_value,
6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674
                                           uint flags,
                                           PARTITION_ITERATOR *part_iter)
{
  DBUG_ASSERT(!is_subpart);
  Field *field= part_info->part_field_array[0];
  uint32             max_endpoint_val;
  get_endpoint_func  get_endpoint;
  uint field_len= field->pack_length_in_rec();

  if (part_info->part_type == RANGE_PARTITION)
  {
6675
    if (part_info->part_charset_field_array)
6676 6677 6678
      get_endpoint=        get_partition_id_range_for_endpoint_charset;
    else
      get_endpoint=        get_partition_id_range_for_endpoint;
6679 6680 6681 6682 6683
    max_endpoint_val=    part_info->no_parts;
    part_iter->get_next= get_next_partition_id_range;
  }
  else if (part_info->part_type == LIST_PARTITION)
  {
6684

6685
    if (part_info->part_charset_field_array)
6686 6687 6688
      get_endpoint=        get_list_array_idx_for_endpoint_charset;
    else
      get_endpoint=        get_list_array_idx_for_endpoint;
6689 6690 6691
    max_endpoint_val=    part_info->no_list_values;
    part_iter->get_next= get_next_partition_id_list;
    part_iter->part_info= part_info;
6692
    part_iter->ret_null_part= part_iter->ret_null_part_orig= FALSE;
6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704
    if (max_endpoint_val == 0)
    {
      /*
        We handle this special case without optimisations since it is
        of little practical value but causes a great number of complex
        checks later in the code.
      */
      part_iter->part_nums.start= part_iter->part_nums.end= 0;
      part_iter->part_nums.cur= 0;
      part_iter->ret_null_part= part_iter->ret_null_part_orig= TRUE;
      return -1;
    }
6705 6706
  }
  else
6707
    assert(0);
6708

6709 6710 6711 6712 6713 6714
  /* 
    Find minimum: Do special handling if the interval has left bound in form
     " NULL <= X ":
  */
  if (field->real_maybe_null() && part_info->has_null_value && 
      !(flags & (NO_MIN_RANGE | NEAR_MIN)) && *min_value)
6715
  {
6716 6717 6718
    part_iter->ret_null_part= part_iter->ret_null_part_orig= TRUE;
    part_iter->part_nums.start= part_iter->part_nums.cur= 0;
    if (*max_value && !(flags & NO_MAX_RANGE))
6719
    {
6720 6721 6722
      /* The right bound is X <= NULL, i.e. it is a "X IS NULL" interval */
      part_iter->part_nums.end= 0;
      return 1;
6723 6724
    }
  }
6725 6726
  else
  {
6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744
    if (flags & NO_MIN_RANGE)
      part_iter->part_nums.start= part_iter->part_nums.cur= 0;
    else
    {
      /*
        Store the interval edge in the record buffer, and call the
        function that maps the edge in table-field space to an edge
        in ordered-set-of-partitions (for RANGE partitioning) or 
        index-in-ordered-array-of-list-constants (for LIST) space.
      */
      store_key_image_to_rec(field, min_value, field_len);
      bool include_endp= part_info->range_analysis_include_bounds ||
                         !test(flags & NEAR_MIN);
      part_iter->part_nums.start= get_endpoint(part_info, 1, include_endp);
      part_iter->part_nums.cur= part_iter->part_nums.start;
      if (part_iter->part_nums.start == max_endpoint_val)
        return 0; /* No partitions */
    }
6745 6746 6747 6748
  }

  /* Find maximum, do the same as above but for right interval bound */
  if (flags & NO_MAX_RANGE)
6749
    part_iter->part_nums.end= max_endpoint_val;
6750 6751 6752 6753 6754
  else
  {
    store_key_image_to_rec(field, max_value, field_len);
    bool include_endp= part_info->range_analysis_include_bounds ||
                       !test(flags & NEAR_MAX);
6755
    part_iter->part_nums.end= get_endpoint(part_info, 0, include_endp);
6756 6757
    if (part_iter->part_nums.start == part_iter->part_nums.end &&
        !part_iter->ret_null_part)
6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768
      return 0; /* No partitions */
  }
  return 1; /* Ok, iterator initialized */
}


/* See get_part_iter_for_interval_via_walking for definition of what this is */
#define MAX_RANGE_TO_WALK 10


/*
6769
  Partitioning Interval Analysis: Initialize iterator to walk field interval
6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784

  SYNOPSIS
    get_part_iter_for_interval_via_walking()
      part_info   Partition info
      is_subpart  TRUE  - act for subpartitioning
                  FALSE - act for partitioning
      min_value   minimum field value, in opt_range key format.
      max_value   minimum field value, in opt_range key format.
      flags       Some combination of NEAR_MIN, NEAR_MAX, NO_MIN_RANGE,
                  NO_MAX_RANGE.
      part_iter   Iterator structure to be initialized

  DESCRIPTION
    Initialize partition set iterator to walk over interval in integer field
    space. That is, for "const1 <=? t.field <=? const2" interval, initialize 
6785 6786
    the iterator to return a set of [sub]partitions obtained with the
    following procedure:
6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799
      get partition id for t.field = const1,   return it
      get partition id for t.field = const1+1, return it
       ...                 t.field = const1+2, ...
       ...                           ...       ...
       ...                 t.field = const2    ...

  IMPLEMENTATION
    See get_partitions_in_range_iter for general description of interval
    analysis. We support walking over the following intervals: 
      "t.field IS NULL" 
      "c1 <=? t.field <=? c2", where c1 and c2 are finite. 
    Intervals with +inf/-inf, and [NULL, c1] interval can be processed but
    that is more tricky and I don't have time to do it right now.
6800

6801 6802 6803 6804 6805 6806 6807 6808
    Additionally we have these requirements:
    * number of values in the interval must be less then number of
      [sub]partitions, and 
    * Number of values in the interval must be less then MAX_RANGE_TO_WALK.
    
    The rationale behind these requirements is that if they are not met
    we're likely to hit most of the partitions and traversing the interval
    will only add overhead. So it's better return "all partitions used" in
6809
    that case.
6810 6811 6812 6813 6814 6815 6816 6817 6818

  RETURN
    0 - No matching partitions, iterator not initialized
    1 - Some partitions would match, iterator intialized for traversing them
   -1 - All partitions would match, iterator not initialized
*/

int get_part_iter_for_interval_via_walking(partition_info *part_info,
                                           bool is_subpart,
6819
                                           char *min_value, char *max_value,
6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857
                                           uint flags,
                                           PARTITION_ITERATOR *part_iter)
{
  Field *field;
  uint total_parts;
  partition_iter_func get_next_func;
  if (is_subpart)
  {
    field= part_info->subpart_field_array[0];
    total_parts= part_info->no_subparts;
    get_next_func=  get_next_subpartition_via_walking;
  }
  else
  {
    field= part_info->part_field_array[0];
    total_parts= part_info->no_parts;
    get_next_func=  get_next_partition_via_walking;
  }

  /* Handle the "t.field IS NULL" interval, it is a special case */
  if (field->real_maybe_null() && !(flags & (NO_MIN_RANGE | NO_MAX_RANGE)) &&
      *min_value && *max_value)
  {
    /* 
      We don't have a part_iter->get_next() function that would find which
      partition "t.field IS NULL" belongs to, so find partition that contains 
      NULL right here, and return an iterator over singleton set.
    */
    uint32 part_id;
    field->set_null();
    if (is_subpart)
    {
      part_id= part_info->get_subpartition_id(part_info);
      init_single_partition_iterator(part_id, part_iter);
      return 1; /* Ok, iterator initialized */
    }
    else
    {
sergefp@mysql.com's avatar
sergefp@mysql.com committed
6858
      longlong dummy;
6859 6860 6861 6862 6863
      int res= part_info->is_sub_partitioned() ?
                  part_info->get_part_partition_id(part_info, &part_id,
                                                   &dummy):
                  part_info->get_partition_id(part_info, &part_id, &dummy);
      if (!res)
6864 6865 6866 6867 6868 6869 6870 6871
      {
        init_single_partition_iterator(part_id, part_iter);
        return 1; /* Ok, iterator initialized */
      }
    }
    return 0; /* No partitions match */
  }

6872 6873 6874 6875 6876
  if ((field->real_maybe_null() && 
       ((!(flags & NO_MIN_RANGE) && *min_value) ||  // NULL <? X
        (!(flags & NO_MAX_RANGE) && *max_value))) ||  // X <? NULL
      (flags & (NO_MIN_RANGE | NO_MAX_RANGE)))    // -inf at any bound
  {
6877
    return -1; /* Can't handle this interval, have to use all partitions */
6878
  }
6879 6880 6881 6882 6883 6884 6885 6886 6887
  
  /* Get integers for left and right interval bound */
  longlong a, b;
  uint len= field->pack_length_in_rec();
  store_key_image_to_rec(field, min_value, len);
  a= field->val_int();
  
  store_key_image_to_rec(field, max_value, len);
  b= field->val_int();
6888 6889 6890 6891 6892 6893 6894 6895 6896
  
  /* 
    Handle a special case where the distance between interval bounds is 
    exactly 4G-1. This interval is too big for range walking, and if it is an
    (x,y]-type interval then the following "b +=..." code will convert it to 
    an empty interval by "wrapping around" a + 4G-1 + 1 = a. 
  */
  if ((ulonglong)b - (ulonglong)a == ~0ULL)
    return -1;
6897 6898 6899

  a += test(flags & NEAR_MIN);
  b += test(!(flags & NEAR_MAX));
6900
  ulonglong n_values= b - a;
6901 6902 6903 6904
  
  if (n_values > total_parts || n_values > MAX_RANGE_TO_WALK)
    return -1;

6905
  part_iter->field_vals.start= part_iter->field_vals.cur= a;
6906
  part_iter->field_vals.end=   b;
6907 6908 6909 6910 6911 6912 6913 6914 6915 6916
  part_iter->part_info= part_info;
  part_iter->get_next=  get_next_func;
  return 1;
}


/*
  PARTITION_ITERATOR::get_next implementation: enumerate partitions in range

  SYNOPSIS
6917
    get_next_partition_id_range()
6918 6919 6920 6921 6922
      part_iter  Partition set iterator structure

  DESCRIPTION
    This is implementation of PARTITION_ITERATOR::get_next() that returns
    [sub]partition ids in [min_partition_id, max_partition_id] range.
6923
    The function conforms to partition_iter_func type.
6924 6925 6926 6927 6928 6929 6930 6931

  RETURN
    partition id
    NOT_A_PARTITION_ID if there are no more partitions
*/

uint32 get_next_partition_id_range(PARTITION_ITERATOR* part_iter)
{
6932 6933 6934
  if (part_iter->part_nums.cur == part_iter->part_nums.end)
  {
    part_iter->part_nums.cur= part_iter->part_nums.start;
6935
    return NOT_A_PARTITION_ID;
6936
  }
6937
  else
6938
    return part_iter->part_nums.cur++;
6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949
}


/*
  PARTITION_ITERATOR::get_next implementation for LIST partitioning

  SYNOPSIS
    get_next_partition_id_list()
      part_iter  Partition set iterator structure

  DESCRIPTION
6950
    This implementation of PARTITION_ITERATOR::get_next() is special for 
6951 6952
    LIST partitioning: it enumerates partition ids in 
    part_info->list_array[i] where i runs over [min_idx, max_idx] interval.
6953
    The function conforms to partition_iter_func type.
6954 6955 6956 6957 6958 6959 6960 6961

  RETURN 
    partition id
    NOT_A_PARTITION_ID if there are no more partitions
*/

uint32 get_next_partition_id_list(PARTITION_ITERATOR *part_iter)
{
6962
  if (part_iter->part_nums.cur == part_iter->part_nums.end)
6963
  {
6964
    if (part_iter->ret_null_part)
6965
    {
6966
      part_iter->ret_null_part= FALSE;
6967 6968
      return part_iter->part_info->has_null_part_id;
    }
6969 6970
    part_iter->part_nums.cur= part_iter->part_nums.start;
    part_iter->ret_null_part= part_iter->ret_null_part_orig;
6971
    return NOT_A_PARTITION_ID;
6972
  }
6973 6974
  else
    return part_iter->part_info->list_array[part_iter->
6975
                                            part_nums.cur++].partition_id;
6976 6977 6978 6979
}


/*
6980
  PARTITION_ITERATOR::get_next implementation: walk over field-space interval
6981 6982 6983 6984 6985 6986

  SYNOPSIS
    get_next_partition_via_walking()
      part_iter  Partitioning iterator

  DESCRIPTION
6987 6988 6989
    This implementation of PARTITION_ITERATOR::get_next() returns ids of
    partitions that contain records with partitioning field value within
    [start_val, end_val] interval.
6990
    The function conforms to partition_iter_func type.
6991 6992 6993 6994 6995 6996 6997 6998 6999 7000

  RETURN 
    partition id
    NOT_A_PARTITION_ID if there are no more partitioning.
*/

static uint32 get_next_partition_via_walking(PARTITION_ITERATOR *part_iter)
{
  uint32 part_id;
  Field *field= part_iter->part_info->part_field_array[0];
7001
  while (part_iter->field_vals.cur != part_iter->field_vals.end)
7002
  {
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7003
    longlong dummy;
7004 7005
    field->store(part_iter->field_vals.cur++,
                 ((Field_num*)field)->unsigned_flag);
7006
    if (part_iter->part_info->is_sub_partitioned() &&
7007 7008 7009
        !part_iter->part_info->get_part_partition_id(part_iter->part_info,
                                                     &part_id, &dummy) ||
        !part_iter->part_info->get_partition_id(part_iter->part_info,
sergefp@mysql.com's avatar
sergefp@mysql.com committed
7010
                                                &part_id, &dummy))
7011 7012
      return part_id;
  }
7013
  part_iter->field_vals.cur= part_iter->field_vals.start;
7014 7015 7016 7017 7018 7019 7020 7021 7022
  return NOT_A_PARTITION_ID;
}


/* Same as get_next_partition_via_walking, but for subpartitions */

static uint32 get_next_subpartition_via_walking(PARTITION_ITERATOR *part_iter)
{
  Field *field= part_iter->part_info->subpart_field_array[0];
7023 7024 7025
  if (part_iter->field_vals.cur == part_iter->field_vals.end)
  {
    part_iter->field_vals.cur= part_iter->field_vals.start;
7026
    return NOT_A_PARTITION_ID;
7027 7028
  }
  field->store(part_iter->field_vals.cur++, FALSE);
7029 7030
  return part_iter->part_info->get_subpartition_id(part_iter->part_info);
}
7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050


/*
  Create partition names

  SYNOPSIS
    create_partition_name()
    out:out                   Created partition name string
    in1                       First part
    in2                       Second part
    name_variant              Normal, temporary or renamed partition name

  RETURN VALUE
    NONE

  DESCRIPTION
    This method is used to calculate the partition name, service routine to
    the del_ren_cre_table method.
*/

7051 7052 7053
void create_partition_name(char *out, const char *in1,
                           const char *in2, uint name_variant,
                           bool translate)
7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092
{
  char transl_part_name[FN_REFLEN];
  const char *transl_part;

  if (translate)
  {
    tablename_to_filename(in2, transl_part_name, FN_REFLEN);
    transl_part= transl_part_name;
  }
  else
    transl_part= in2;
  if (name_variant == NORMAL_PART_NAME)
    strxmov(out, in1, "#P#", transl_part, NullS);
  else if (name_variant == TEMP_PART_NAME)
    strxmov(out, in1, "#P#", transl_part, "#TMP#", NullS);
  else if (name_variant == RENAMED_PART_NAME)
    strxmov(out, in1, "#P#", transl_part, "#REN#", NullS);
}


/*
  Create subpartition name

  SYNOPSIS
    create_subpartition_name()
    out:out                   Created partition name string
    in1                       First part
    in2                       Second part
    in3                       Third part
    name_variant              Normal, temporary or renamed partition name

  RETURN VALUE
    NONE

  DESCRIPTION
  This method is used to calculate the subpartition name, service routine to
  the del_ren_cre_table method.
*/

7093 7094 7095
void create_subpartition_name(char *out, const char *in1,
                              const char *in2, const char *in3,
                              uint name_variant)
7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110
{
  char transl_part_name[FN_REFLEN], transl_subpart_name[FN_REFLEN];

  tablename_to_filename(in2, transl_part_name, FN_REFLEN);
  tablename_to_filename(in3, transl_subpart_name, FN_REFLEN);
  if (name_variant == NORMAL_PART_NAME)
    strxmov(out, in1, "#P#", transl_part_name,
            "#SP#", transl_subpart_name, NullS);
  else if (name_variant == TEMP_PART_NAME)
    strxmov(out, in1, "#P#", transl_part_name,
            "#SP#", transl_subpart_name, "#TMP#", NullS);
  else if (name_variant == RENAMED_PART_NAME)
    strxmov(out, in1, "#P#", transl_part_name,
            "#SP#", transl_subpart_name, "#REN#", NullS);
}
7111
#endif
7112