ha_ndbcluster.cc 209 KB
Newer Older
1
/* Copyright (C) 2000-2003 MySQL AB
2 3 4 5 6 7 8 9 10 11 12 13 14

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
15
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 17 18 19 20 21 22
*/

/*
  This file defines the NDB Cluster handler: the interface between MySQL and
  NDB Cluster
*/

23
#ifdef USE_PRAGMA_IMPLEMENTATION
24
#pragma implementation				// gcc: Class implementation
25 26 27 28 29 30 31 32 33 34
#endif

#include "mysql_priv.h"

#ifdef HAVE_NDBCLUSTER_DB
#include <my_dir.h>
#include "ha_ndbcluster.h"
#include <ndbapi/NdbApi.hpp>
#include <ndbapi/NdbScanFilter.hpp>

35 36 37 38
// options from from mysqld.cc
extern my_bool opt_ndb_optimized_node_selection;
extern const char *opt_ndbcluster_connectstring;

39
// Default value for parallelism
40
static const int parallelism= 0;
41

42 43
// Default value for max number of transactions
// createable against NDB from this handler
44 45
static const int max_transactions= 256;

46 47
static const char *ha_ndb_ext=".ndb";

48 49 50 51 52
static int ndbcluster_close_connection(THD *thd);
static int ndbcluster_commit(THD *thd, bool all);
static int ndbcluster_rollback(THD *thd, bool all);

static handlerton ndbcluster_hton = {
serg@serg.mylan's avatar
serg@serg.mylan committed
53
  "ndbcluster",
54 55 56 57 58 59 60 61 62
  0, /* slot */
  0, /* savepoint size */
  ndbcluster_close_connection,
  NULL, /* savepoint_set */
  NULL, /* savepoint_rollback */
  NULL, /* savepoint_release */
  ndbcluster_commit,
  ndbcluster_rollback,
  NULL, /* prepare */
63 64 65
  NULL, /* recover */
  NULL, /* commit_by_xid */
  NULL  /* rollback_by_xid */
66 67
};

68
#define NDB_HIDDEN_PRIMARY_KEY_LENGTH 8
69

70
#define NDB_FAILED_AUTO_INCREMENT ~(Uint64)0
71
#define NDB_AUTO_INCREMENT_RETRIES 10
72 73

#define ERR_PRINT(err) \
74
  DBUG_PRINT("error", ("%d  message: %s", err.code, err.message))
75

76 77
#define ERR_RETURN(err)                  \
{                                        \
78
  const NdbError& tmp= err;              \
79
  ERR_PRINT(tmp);                        \
80
  DBUG_RETURN(ndb_to_mysql_error(&tmp)); \
81 82 83 84
}

// Typedefs for long names
typedef NdbDictionary::Column NDBCOL;
joreland@mysql.com's avatar
joreland@mysql.com committed
85
typedef NdbDictionary::Table NDBTAB;
86 87 88
typedef NdbDictionary::Index  NDBINDEX;
typedef NdbDictionary::Dictionary  NDBDICT;

89
bool ndbcluster_inited= FALSE;
90

91
static Ndb* g_ndb= NULL;
92
static Ndb_cluster_connection* g_ndb_cluster_connection= NULL;
93

94 95 96 97 98 99 100 101 102 103 104 105 106
// Handler synchronization
pthread_mutex_t ndbcluster_mutex;

// Table lock handling
static HASH ndbcluster_open_tables;

static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length,
                                my_bool not_used __attribute__((unused)));
static NDB_SHARE *get_share(const char *table_name);
static void free_share(NDB_SHARE *share);

static int packfrm(const void *data, uint len, const void **pack_data, uint *pack_len);
static int unpackfrm(const void **data, uint *len,
107
                     const void* pack_data);
108

109
static int ndb_get_table_statistics(Ndb*, const char *, 
110
                                    struct Ndb_statistics *);
111

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
112 113 114 115 116 117
// Util thread variables
static pthread_t ndb_util_thread;
pthread_mutex_t LOCK_ndb_util_thread;
pthread_cond_t COND_ndb_util_thread;
extern "C" pthread_handler_decl(ndb_util_thread_func, arg);
ulong ndb_cache_check_time;
118

119 120 121 122
/*
  Dummy buffer to read zero pack_length fields
  which are mapped to 1 char
*/
123
static uint32 dummy_buf;
124

125 126 127 128 129 130 131 132 133 134 135
/*
  Stats that can be retrieved from ndb
*/

struct Ndb_statistics {
  Uint64 row_count;
  Uint64 commit_count;
  Uint64 row_size;
  Uint64 fragment_memory;
};

136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
/* Status variables shown with 'show status like 'Ndb%' */

static long ndb_cluster_node_id= 0;
static const char * ndb_connected_host= 0;
static long ndb_connected_port= 0;
static long ndb_number_of_replicas= 0;
static long ndb_number_of_storage_nodes= 0;

static int update_status_variables(Ndb_cluster_connection *c)
{
  ndb_cluster_node_id=         c->node_id();
  ndb_connected_port=          c->get_connected_port();
  ndb_connected_host=          c->get_connected_host();
  ndb_number_of_replicas=      0;
  ndb_number_of_storage_nodes= c->no_db_nodes();
  return 0;
}

struct show_var_st ndb_status_variables[]= {
  {"cluster_node_id",        (char*) &ndb_cluster_node_id,         SHOW_LONG},
  {"connected_host",         (char*) &ndb_connected_host,      SHOW_CHAR_PTR},
  {"connected_port",         (char*) &ndb_connected_port,          SHOW_LONG},
//  {"number_of_replicas",     (char*) &ndb_number_of_replicas,      SHOW_LONG},
  {"number_of_storage_nodes",(char*) &ndb_number_of_storage_nodes, SHOW_LONG},
  {NullS, NullS, SHOW_LONG}
};

163 164 165 166 167 168 169 170
/*
  Error handling functions
*/

struct err_code_mapping
{
  int ndb_err;
  int my_err;
171
  int show_warning;
172 173 174 175
};

static const err_code_mapping err_map[]= 
{
176 177
  { 626, HA_ERR_KEY_NOT_FOUND, 0 },
  { 630, HA_ERR_FOUND_DUPP_KEY, 0 },
178
  { 893, HA_ERR_FOUND_DUPP_KEY, 0 },
179 180 181
  { 721, HA_ERR_TABLE_EXIST, 1 },
  { 4244, HA_ERR_TABLE_EXIST, 1 },

182
  { 709, HA_ERR_NO_SUCH_TABLE, 0 },
183 184 185 186 187 188 189 190 191 192 193 194 195 196

  { 266, HA_ERR_LOCK_WAIT_TIMEOUT, 1 },
  { 274, HA_ERR_LOCK_WAIT_TIMEOUT, 1 },
  { 296, HA_ERR_LOCK_WAIT_TIMEOUT, 1 },
  { 297, HA_ERR_LOCK_WAIT_TIMEOUT, 1 },
  { 237, HA_ERR_LOCK_WAIT_TIMEOUT, 1 },

  { 623, HA_ERR_RECORD_FILE_FULL, 1 },
  { 624, HA_ERR_RECORD_FILE_FULL, 1 },
  { 625, HA_ERR_RECORD_FILE_FULL, 1 },
  { 826, HA_ERR_RECORD_FILE_FULL, 1 },
  { 827, HA_ERR_RECORD_FILE_FULL, 1 },
  { 832, HA_ERR_RECORD_FILE_FULL, 1 },

197 198
  { 284, HA_ERR_TABLE_DEF_CHANGED, 0 },

199 200 201
  { 0, 1, 0 },

  { -1, -1, 1 }
202 203 204 205 206 207
};


static int ndb_to_mysql_error(const NdbError *err)
{
  uint i;
208 209
  for (i=0; err_map[i].ndb_err != err->code && err_map[i].my_err != -1; i++);
  if (err_map[i].show_warning)
210
  {
211 212
    // Push the NDB error message as warning
    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
213 214
                        ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
                        err->code, err->message, "NDB");
215
  }
216 217
  if (err_map[i].my_err == -1)
    return err->code;
218 219 220 221
  return err_map[i].my_err;
}


222 223

inline
224
int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans)
225
{
226
#ifdef NOT_USED
227
  int m_batch_execute= 0;
228
  if (m_batch_execute)
229
    return 0;
230
#endif
231
  return trans->execute(NdbTransaction::NoCommit,
232 233
                        NdbTransaction::AbortOnError,
                        h->m_force_send);
234 235 236
}

inline
237
int execute_commit(ha_ndbcluster *h, NdbTransaction *trans)
238
{
239
#ifdef NOT_USED
240
  int m_batch_execute= 0;
241
  if (m_batch_execute)
242
    return 0;
243
#endif
244
  return trans->execute(NdbTransaction::Commit,
245 246
                        NdbTransaction::AbortOnError,
                        h->m_force_send);
247 248 249
}

inline
250
int execute_commit(THD *thd, NdbTransaction *trans)
251 252
{
#ifdef NOT_USED
253
  int m_batch_execute= 0;
254 255 256
  if (m_batch_execute)
    return 0;
#endif
257
  return trans->execute(NdbTransaction::Commit,
258 259
                        NdbTransaction::AbortOnError,
                        thd->variables.ndb_force_send);
260 261 262
}

inline
263
int execute_no_commit_ie(ha_ndbcluster *h, NdbTransaction *trans)
264
{
265
#ifdef NOT_USED
266
  int m_batch_execute= 0;
267
  if (m_batch_execute)
268
    return 0;
269
#endif
270
  return trans->execute(NdbTransaction::NoCommit,
271 272
                        NdbTransaction::AO_IgnoreError,
                        h->m_force_send);
273 274
}

275 276 277
/*
  Place holder for ha_ndbcluster thread specific data
*/
278 279
Thd_ndb::Thd_ndb()
{
280
  ndb= new Ndb(g_ndb_cluster_connection, "");
281 282
  lock_count= 0;
  count= 0;
283 284
  all= NULL;
  stmt= NULL;
285
  error= 0;
286 287 288 289
}

Thd_ndb::~Thd_ndb()
{
290 291
  if (ndb)
    delete ndb;
292 293
  ndb= NULL;
  changed_tables.empty();
294 295
}

296 297 298 299 300 301 302 303
inline
Thd_ndb *
get_thd_ndb(THD *thd) { return (Thd_ndb *) thd->ha_data[ndbcluster_hton.slot]; }

inline
void
set_thd_ndb(THD *thd, Thd_ndb *thd_ndb) { thd->ha_data[ndbcluster_hton.slot]= thd_ndb; }

304 305 306
inline
Ndb *ha_ndbcluster::get_ndb()
{
307
  return get_thd_ndb(current_thd)->ndb;
308 309 310 311 312 313
}

/*
 * manage uncommitted insert/deletes during transactio to get records correct
 */

314
struct Ndb_local_table_statistics {
315
  int no_uncommitted_rows_count;
316
  ulong last_count;
317 318 319
  ha_rows records;
};

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
320 321 322
void ha_ndbcluster::set_rec_per_key()
{
  DBUG_ENTER("ha_ndbcluster::get_status_const");
323
  for (uint i=0 ; i < table->s->keys ; i++)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
324 325 326 327 328 329
  {
    table->key_info[i].rec_per_key[table->key_info[i].key_parts-1]= 1;
  }
  DBUG_VOID_RETURN;
}

330 331
void ha_ndbcluster::records_update()
{
332 333
  if (m_ha_not_exact_count)
    return;
334
  DBUG_ENTER("ha_ndbcluster::records_update");
335 336
  struct Ndb_local_table_statistics *info= 
    (struct Ndb_local_table_statistics *)m_table_info;
337
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
338 339
                      ((const NDBTAB *)m_table)->getTableId(),
                      info->no_uncommitted_rows_count));
340
  //  if (info->records == ~(ha_rows)0)
341
  {
342
    Ndb *ndb= get_ndb();
343 344 345 346 347
    struct Ndb_statistics stat;
    if(ndb_get_table_statistics(ndb, m_tabname, &stat) == 0){
      mean_rec_length= stat.row_size;
      data_file_length= stat.fragment_memory;
      info->records= stat.row_count;
348 349
    }
  }
350 351
  {
    THD *thd= current_thd;
352
    if (get_thd_ndb(thd)->error)
353 354
      info->no_uncommitted_rows_count= 0;
  }
355 356 357 358
  records= info->records+ info->no_uncommitted_rows_count;
  DBUG_VOID_RETURN;
}

359 360
void ha_ndbcluster::no_uncommitted_rows_execute_failure()
{
361 362
  if (m_ha_not_exact_count)
    return;
363
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
364
  get_thd_ndb(current_thd)->error= 1;
365 366 367
  DBUG_VOID_RETURN;
}

368 369
void ha_ndbcluster::no_uncommitted_rows_init(THD *thd)
{
370 371
  if (m_ha_not_exact_count)
    return;
372
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_init");
373 374
  struct Ndb_local_table_statistics *info= 
    (struct Ndb_local_table_statistics *)m_table_info;
375
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
376
  if (info->last_count != thd_ndb->count)
377
  {
378
    info->last_count= thd_ndb->count;
379 380 381
    info->no_uncommitted_rows_count= 0;
    info->records= ~(ha_rows)0;
    DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
382 383
                        ((const NDBTAB *)m_table)->getTableId(),
                        info->no_uncommitted_rows_count));
384 385 386 387 388 389
  }
  DBUG_VOID_RETURN;
}

void ha_ndbcluster::no_uncommitted_rows_update(int c)
{
390 391
  if (m_ha_not_exact_count)
    return;
392
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
393 394
  struct Ndb_local_table_statistics *info=
    (struct Ndb_local_table_statistics *)m_table_info;
395 396
  info->no_uncommitted_rows_count+= c;
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
397 398
                      ((const NDBTAB *)m_table)->getTableId(),
                      info->no_uncommitted_rows_count));
399 400 401 402 403
  DBUG_VOID_RETURN;
}

void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd)
{
404 405
  if (m_ha_not_exact_count)
    return;
406
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset");
407 408 409
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  thd_ndb->count++;
  thd_ndb->error= 0;
410 411 412
  DBUG_VOID_RETURN;
}

413 414
/*
  Take care of the error that occured in NDB
415

416
  RETURN
417
    0   No error
418 419 420
    #   The mapped error code
*/

421
void ha_ndbcluster::invalidate_dictionary_cache(bool global)
422 423
{
  NDBDICT *dict= get_ndb()->getDictionary();
424
  DBUG_ENTER("invalidate_dictionary_cache");
425
  DBUG_PRINT("info", ("invalidating %s", m_tabname));
426

427
  if (global)
428
  {
429 430 431 432
    const NDBTAB *tab= dict->getTable(m_tabname);
    if (!tab)
      DBUG_VOID_RETURN;
    if (tab->getObjectStatus() == NdbDictionary::Object::Invalid)
433 434 435 436 437 438 439 440
    {
      // Global cache has already been invalidated
      dict->removeCachedTable(m_tabname);
      global= FALSE;
    }
    else
      dict->invalidateTable(m_tabname);
  }
441 442
  else
    dict->removeCachedTable(m_tabname);
443
  table->s->version=0L;			/* Free when thread is ready */
444
  /* Invalidate indexes */
445
  for (uint i= 0; i < table->s->keys; i++)
446 447 448 449 450
  {
    NDBINDEX *index = (NDBINDEX *) m_index[i].index;
    NDBINDEX *unique_index = (NDBINDEX *) m_index[i].unique_index;
    NDB_INDEX_TYPE idx_type= m_index[i].type;

451
    switch(idx_type) {
452 453
    case(PRIMARY_KEY_ORDERED_INDEX):
    case(ORDERED_INDEX):
454 455 456 457
      if (global)
        dict->invalidateIndex(index->getName(), m_tabname);
      else
        dict->removeCachedIndex(index->getName(), m_tabname);
serg@serg.mylan's avatar
serg@serg.mylan committed
458
      break;
459
    case(UNIQUE_ORDERED_INDEX):
460 461 462 463
      if (global)
        dict->invalidateIndex(index->getName(), m_tabname);
      else
        dict->removeCachedIndex(index->getName(), m_tabname);
464
    case(UNIQUE_INDEX):
465 466 467 468
      if (global)
        dict->invalidateIndex(unique_index->getName(), m_tabname);
      else
        dict->removeCachedIndex(unique_index->getName(), m_tabname);
469 470 471 472 473 474
      break;
    case(PRIMARY_KEY_INDEX):
    case(UNDEFINED_INDEX):
      break;
    }
  }
475
  DBUG_VOID_RETURN;
476
}
477

478
int ha_ndbcluster::ndb_err(NdbTransaction *trans)
479
{
480
  int res;
481
  NdbError err= trans->getNdbError();
482 483 484 485 486
  DBUG_ENTER("ndb_err");
  
  ERR_PRINT(err);
  switch (err.classification) {
  case NdbError::SchemaError:
487
    invalidate_dictionary_cache(TRUE);
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503

    if (err.code==284)
    {
      /*
         Check if the table is _really_ gone or if the table has
         been alterend and thus changed table id
       */
      NDBDICT *dict= get_ndb()->getDictionary();
      DBUG_PRINT("info", ("Check if table %s is really gone", m_tabname));
      if (!(dict->getTable(m_tabname)))
      {
        err= dict->getNdbError();
        DBUG_PRINT("info", ("Table not found, error: %d", err.code));
        if (err.code != 709)
          DBUG_RETURN(1);
      }
504
      DBUG_PRINT("info", ("Table exists but must have changed"));
505
    }
506 507 508 509
    break;
  default:
    break;
  }
510 511
  res= ndb_to_mysql_error(&err);
  DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d", 
512
                      err.code, res));
513
  if (res == HA_ERR_FOUND_DUPP_KEY)
514
    m_dupkey= table->s->primary_key;
515 516
  
  DBUG_RETURN(res);
517 518 519
}


520
/*
521
  Override the default get_error_message in order to add the 
522 523 524
  error message of NDB 
 */

525
bool ha_ndbcluster::get_error_message(int error, 
526
                                      String *buf)
527
{
528
  DBUG_ENTER("ha_ndbcluster::get_error_message");
529
  DBUG_PRINT("enter", ("error: %d", error));
530

531
  Ndb *ndb= get_ndb();
532
  if (!ndb)
533
    DBUG_RETURN(FALSE);
534

535
  const NdbError err= ndb->getNdbError(error);
536 537 538 539
  bool temporary= err.status==NdbError::TemporaryError;
  buf->set(err.message, strlen(err.message), &my_charset_bin);
  DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary));
  DBUG_RETURN(temporary);
540 541 542
}


tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
543
#ifndef DBUG_OFF
pekka@mysql.com's avatar
pekka@mysql.com committed
544 545 546 547
/*
  Check if type is supported by NDB.
*/

tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
548
static bool ndb_supported_type(enum_field_types type)
pekka@mysql.com's avatar
pekka@mysql.com committed
549 550
{
  switch (type) {
pekka@mysql.com's avatar
pekka@mysql.com committed
551 552 553 554 555 556 557
  case MYSQL_TYPE_TINY:        
  case MYSQL_TYPE_SHORT:
  case MYSQL_TYPE_LONG:
  case MYSQL_TYPE_INT24:       
  case MYSQL_TYPE_LONGLONG:
  case MYSQL_TYPE_FLOAT:
  case MYSQL_TYPE_DOUBLE:
558 559
  case MYSQL_TYPE_DECIMAL:    
  case MYSQL_TYPE_NEWDECIMAL:
pekka@mysql.com's avatar
pekka@mysql.com committed
560 561 562 563 564 565 566 567
  case MYSQL_TYPE_TIMESTAMP:
  case MYSQL_TYPE_DATETIME:    
  case MYSQL_TYPE_DATE:
  case MYSQL_TYPE_NEWDATE:
  case MYSQL_TYPE_TIME:        
  case MYSQL_TYPE_YEAR:        
  case MYSQL_TYPE_STRING:      
  case MYSQL_TYPE_VAR_STRING:
pekka@mysql.com's avatar
pekka@mysql.com committed
568
  case MYSQL_TYPE_VARCHAR:
pekka@mysql.com's avatar
pekka@mysql.com committed
569 570 571 572 573 574
  case MYSQL_TYPE_TINY_BLOB:
  case MYSQL_TYPE_BLOB:    
  case MYSQL_TYPE_MEDIUM_BLOB:   
  case MYSQL_TYPE_LONG_BLOB:  
  case MYSQL_TYPE_ENUM:
  case MYSQL_TYPE_SET:         
575
  case MYSQL_TYPE_BIT:
576
    return TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
577 578
  case MYSQL_TYPE_NULL:   
  case MYSQL_TYPE_GEOMETRY:
pekka@mysql.com's avatar
pekka@mysql.com committed
579
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
580
  }
581
  return FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
582
}
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
583
#endif /* !DBUG_OFF */
pekka@mysql.com's avatar
pekka@mysql.com committed
584 585


586 587 588 589 590
/*
  Instruct NDB to set the value of the hidden primary key
*/

bool ha_ndbcluster::set_hidden_key(NdbOperation *ndb_op,
591
                                   uint fieldnr, const byte *field_ptr)
592 593 594
{
  DBUG_ENTER("set_hidden_key");
  DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr,
595
                            NDB_HIDDEN_PRIMARY_KEY_LENGTH) != 0);
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
}


/*
  Instruct NDB to set the value of one primary key attribute
*/

int ha_ndbcluster::set_ndb_key(NdbOperation *ndb_op, Field *field,
                               uint fieldnr, const byte *field_ptr)
{
  uint32 pack_len= field->pack_length();
  DBUG_ENTER("set_ndb_key");
  DBUG_PRINT("enter", ("%d: %s, ndb_type: %u, len=%d", 
                       fieldnr, field->field_name, field->type(),
                       pack_len));
  DBUG_DUMP("key", (char*)field_ptr, pack_len);
  
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
613 614 615 616
  DBUG_ASSERT(ndb_supported_type(field->type()));
  DBUG_ASSERT(! (field->flags & BLOB_FLAG));
  // Common implementation for most field types
  DBUG_RETURN(ndb_op->equal(fieldnr, (char*) field_ptr, pack_len) != 0);
617 618 619 620 621 622 623 624
}


/*
 Instruct NDB to set the value of one attribute
*/

int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, 
625
                                 uint fieldnr, bool *set_blob_value)
626 627 628 629 630 631 632 633
{
  const byte* field_ptr= field->ptr;
  uint32 pack_len=  field->pack_length();
  DBUG_ENTER("set_ndb_value");
  DBUG_PRINT("enter", ("%d: %s, type: %u, len=%d, is_null=%s", 
                       fieldnr, field->field_name, field->type(), 
                       pack_len, field->is_null()?"Y":"N"));
  DBUG_DUMP("value", (char*) field_ptr, pack_len);
pekka@mysql.com's avatar
pekka@mysql.com committed
634

tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
635
  DBUG_ASSERT(ndb_supported_type(field->type()));
636
  {
637
    // ndb currently does not support size 0
638
    uint32 empty_field;
639 640
    if (pack_len == 0)
    {
641 642 643
      pack_len= sizeof(empty_field);
      field_ptr= (byte *)&empty_field;
      if (field->is_null())
644
        empty_field= 0;
645
      else
646
        empty_field= 1;
647
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
648 649
    if (! (field->flags & BLOB_FLAG))
    {
650 651
      if (field->type() != MYSQL_TYPE_BIT)
      {
652 653 654 655 656 657 658
        if (field->is_null())
          // Set value to NULL
          DBUG_RETURN((ndb_op->setValue(fieldnr, 
                                        (char*)NULL, pack_len) != 0));
        // Common implementation for most field types
        DBUG_RETURN(ndb_op->setValue(fieldnr, 
                                     (char*)field_ptr, pack_len) != 0);
659 660 661
      }
      else // if (field->type() == MYSQL_TYPE_BIT)
      {
662
        longlong bits= field->val_int();
663
 
664 665
        // Round up bit field length to nearest word boundry
        pack_len= ((pack_len + 3) >> 2) << 2;
666 667 668 669 670
        DBUG_ASSERT(pack_len <= 8);
        if (field->is_null())
          // Set value to NULL
          DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL, pack_len) != 0));
        DBUG_PRINT("info", ("bit field"));
671
        DBUG_DUMP("value", (char*)&bits, pack_len);
672
#ifdef WORDS_BIGENDIAN
673 674 675 676 677
        if (pack_len < 5)
        {
          DBUG_RETURN(ndb_op->setValue(fieldnr, 
                                       ((char*)&bits)+4, pack_len) != 0);
        }
678
#endif
679
        DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits, pack_len) != 0);
680
      }
pekka@mysql.com's avatar
pekka@mysql.com committed
681 682
    }
    // Blob type
683
    NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr);
pekka@mysql.com's avatar
pekka@mysql.com committed
684 685 686 687 688 689 690 691 692 693 694 695
    if (ndb_blob != NULL)
    {
      if (field->is_null())
        DBUG_RETURN(ndb_blob->setNull() != 0);

      Field_blob *field_blob= (Field_blob*)field;

      // Get length and pointer to data
      uint32 blob_len= field_blob->get_length(field_ptr);
      char* blob_ptr= NULL;
      field_blob->get_ptr(&blob_ptr);

696 697 698
      // Looks like NULL ptr signals length 0 blob
      if (blob_ptr == NULL) {
        DBUG_ASSERT(blob_len == 0);
699
        blob_ptr= (char*)"";
700
      }
pekka@mysql.com's avatar
pekka@mysql.com committed
701 702 703 704 705

      DBUG_PRINT("value", ("set blob ptr=%x len=%u",
                           (unsigned)blob_ptr, blob_len));
      DBUG_DUMP("value", (char*)blob_ptr, min(blob_len, 26));

706
      if (set_blob_value)
707
        *set_blob_value= TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
708 709 710 711
      // No callback needed to write value
      DBUG_RETURN(ndb_blob->setValue(blob_ptr, blob_len) != 0);
    }
    DBUG_RETURN(1);
712
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
}


/*
  Callback to read all blob values.
  - not done in unpack_record because unpack_record is valid
    after execute(Commit) but reading blobs is not
  - may only generate read operations; they have to be executed
    somewhere before the data is available
  - due to single buffer for all blobs, we let the last blob
    process all blobs (last so that all are active)
  - null bit is still set in unpack_record
  - TODO allocate blob part aligned buffers
*/

728
NdbBlob::ActiveHook g_get_ndb_blobs_value;
pekka@mysql.com's avatar
pekka@mysql.com committed
729

730
int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)
pekka@mysql.com's avatar
pekka@mysql.com committed
731
{
732
  DBUG_ENTER("g_get_ndb_blobs_value");
pekka@mysql.com's avatar
pekka@mysql.com committed
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
  if (ndb_blob->blobsNextBlob() != NULL)
    DBUG_RETURN(0);
  ha_ndbcluster *ha= (ha_ndbcluster *)arg;
  DBUG_RETURN(ha->get_ndb_blobs_value(ndb_blob));
}

int ha_ndbcluster::get_ndb_blobs_value(NdbBlob *last_ndb_blob)
{
  DBUG_ENTER("get_ndb_blobs_value");

  // Field has no field number so cannot use TABLE blob_field
  // Loop twice, first only counting total buffer size
  for (int loop= 0; loop <= 1; loop++)
  {
    uint32 offset= 0;
748
    for (uint i= 0; i < table->s->fields; i++)
pekka@mysql.com's avatar
pekka@mysql.com committed
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764
    {
      Field *field= table->field[i];
      NdbValue value= m_value[i];
      if (value.ptr != NULL && (field->flags & BLOB_FLAG))
      {
        Field_blob *field_blob= (Field_blob *)field;
        NdbBlob *ndb_blob= value.blob;
        Uint64 blob_len= 0;
        if (ndb_blob->getLength(blob_len) != 0)
          DBUG_RETURN(-1);
        // Align to Uint64
        uint32 blob_size= blob_len;
        if (blob_size % 8 != 0)
          blob_size+= 8 - blob_size % 8;
        if (loop == 1)
        {
765
          char *buf= m_blobs_buffer + offset;
pekka@mysql.com's avatar
pekka@mysql.com committed
766 767
          uint32 len= 0xffffffff;  // Max uint32
          DBUG_PRINT("value", ("read blob ptr=%x len=%u",
joreland@mysql.com's avatar
joreland@mysql.com committed
768
                               (UintPtr)buf, (uint)blob_len));
pekka@mysql.com's avatar
pekka@mysql.com committed
769 770 771 772 773 774 775 776
          if (ndb_blob->readData(buf, len) != 0)
            DBUG_RETURN(-1);
          DBUG_ASSERT(len == blob_len);
          field_blob->set_ptr(len, buf);
        }
        offset+= blob_size;
      }
    }
777
    if (loop == 0 && offset > m_blobs_buffer_size)
pekka@mysql.com's avatar
pekka@mysql.com committed
778
    {
779 780
      my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
      m_blobs_buffer_size= 0;
pekka@mysql.com's avatar
pekka@mysql.com committed
781
      DBUG_PRINT("value", ("allocate blobs buffer size %u", offset));
782 783
      m_blobs_buffer= my_malloc(offset, MYF(MY_WME));
      if (m_blobs_buffer == NULL)
pekka@mysql.com's avatar
pekka@mysql.com committed
784
        DBUG_RETURN(-1);
785
      m_blobs_buffer_size= offset;
pekka@mysql.com's avatar
pekka@mysql.com committed
786
    }
787
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
788
  DBUG_RETURN(0);
789 790 791 792 793
}


/*
  Instruct NDB to fetch one field
pekka@mysql.com's avatar
pekka@mysql.com committed
794 795
  - data is read directly into buffer provided by field
    if field is NULL, data is read into memory provided by NDBAPI
796 797
*/

pekka@mysql.com's avatar
pekka@mysql.com committed
798
int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field,
799
                                 uint fieldnr, byte* buf)
800 801
{
  DBUG_ENTER("get_ndb_value");
pekka@mysql.com's avatar
pekka@mysql.com committed
802 803 804 805 806
  DBUG_PRINT("enter", ("fieldnr: %d flags: %o", fieldnr,
                       (int)(field != NULL ? field->flags : 0)));

  if (field != NULL)
  {
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
807 808
      DBUG_ASSERT(buf);
      DBUG_ASSERT(ndb_supported_type(field->type()));
pekka@mysql.com's avatar
pekka@mysql.com committed
809 810
      DBUG_ASSERT(field->ptr != NULL);
      if (! (field->flags & BLOB_FLAG))
811
      { 
812 813
        if (field->type() != MYSQL_TYPE_BIT)
        {
814 815 816 817 818 819 820 821
          byte *field_buf;
          if (field->pack_length() != 0)
            field_buf= buf + (field->ptr - table->record[0]);
          else
            field_buf= (byte *)&dummy_buf;
          m_value[fieldnr].rec= ndb_op->getValue(fieldnr, 
                                                 field_buf);
        }
822 823 824 825
        else // if (field->type() == MYSQL_TYPE_BIT)
        {
          m_value[fieldnr].rec= ndb_op->getValue(fieldnr);
        }
pekka@mysql.com's avatar
pekka@mysql.com committed
826 827 828 829 830 831 832 833 834 835
        DBUG_RETURN(m_value[fieldnr].rec == NULL);
      }

      // Blob type
      NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr);
      m_value[fieldnr].blob= ndb_blob;
      if (ndb_blob != NULL)
      {
        // Set callback
        void *arg= (void *)this;
836
        DBUG_RETURN(ndb_blob->setActiveHook(g_get_ndb_blobs_value, arg) != 0);
pekka@mysql.com's avatar
pekka@mysql.com committed
837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
      }
      DBUG_RETURN(1);
  }

  // Used for hidden key only
  m_value[fieldnr].rec= ndb_op->getValue(fieldnr, NULL);
  DBUG_RETURN(m_value[fieldnr].rec == NULL);
}


/*
  Check if any set or get of blob value in current query.
*/
bool ha_ndbcluster::uses_blob_value(bool all_fields)
{
852
  if (table->s->blob_fields == 0)
853
    return FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
854
  if (all_fields)
855
    return TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
856
  {
857
    uint no_fields= table->s->fields;
pekka@mysql.com's avatar
pekka@mysql.com committed
858
    int i;
859
    THD *thd= current_thd;
pekka@mysql.com's avatar
pekka@mysql.com committed
860 861 862 863 864 865
    // They always put blobs at the end..
    for (i= no_fields - 1; i >= 0; i--)
    {
      Field *field= table->field[i];
      if (thd->query_id == field->query_id)
      {
866
        return TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
867 868 869
      }
    }
  }
870
  return FALSE;
871 872 873 874 875 876 877 878 879 880 881 882 883
}


/*
  Get metadata for this table from NDB 

  IMPLEMENTATION
    - check that frm-file on disk is equal to frm-file
      of table accessed in NDB
*/

int ha_ndbcluster::get_metadata(const char *path)
{
884 885
  Ndb *ndb= get_ndb();
  NDBDICT *dict= ndb->getDictionary();
886 887
  const NDBTAB *tab;
  int error;
888
  bool invalidating_ndb_table= FALSE;
889

890 891 892
  DBUG_ENTER("get_metadata");
  DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));

893 894 895 896 897 898
  do {
    const void *data, *pack_data;
    uint length, pack_length;

    if (!(tab= dict->getTable(m_tabname)))
      ERR_RETURN(dict->getNdbError());
899
    // Check if thread has stale local cache
900 901 902 903 904 905 906
    if (tab->getObjectStatus() == NdbDictionary::Object::Invalid)
    {
      invalidate_dictionary_cache(FALSE);
      if (!(tab= dict->getTable(m_tabname)))
         ERR_RETURN(dict->getNdbError());
      DBUG_PRINT("info", ("Table schema version: %d", tab->getObjectVersion()));
    }
907 908 909 910 911
    /*
      Compare FrmData in NDB with frm file from disk.
    */
    error= 0;
    if (readfrm(path, &data, &length) ||
912
        packfrm(data, length, &pack_data, &pack_length))
913 914 915 916 917
    {
      my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
      my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
      DBUG_RETURN(1);
    }
918
    
919
    if ((pack_length != tab->getFrmLength()) || 
920
        (memcmp(pack_data, tab->getFrmData(), pack_length)))
921 922 923
    {
      if (!invalidating_ndb_table)
      {
924
        DBUG_PRINT("info", ("Invalidating table"));
925
        invalidate_dictionary_cache(TRUE);
926
        invalidating_ndb_table= TRUE;
927 928 929
      }
      else
      {
930 931 932 933 934 935 936 937
        DBUG_PRINT("error", 
                   ("metadata, pack_length: %d getFrmLength: %d memcmp: %d", 
                    pack_length, tab->getFrmLength(),
                    memcmp(pack_data, tab->getFrmData(), pack_length)));      
        DBUG_DUMP("pack_data", (char*)pack_data, pack_length);
        DBUG_DUMP("frm", (char*)tab->getFrmData(), tab->getFrmLength());
        error= 3;
        invalidating_ndb_table= FALSE;
938 939 940 941
      }
    }
    else
    {
942
      invalidating_ndb_table= FALSE;
943 944 945 946 947
    }
    my_free((char*)data, MYF(0));
    my_free((char*)pack_data, MYF(0));
  } while (invalidating_ndb_table);

948 949
  if (error)
    DBUG_RETURN(error);
950
  
951
  m_table_version= tab->getObjectVersion();
952 953 954 955
  m_table= (void *)tab; 
  m_table_info= NULL; // Set in external lock
  
  DBUG_RETURN(build_index_list(ndb, table, ILBP_OPEN));
956
}
957

958
static int fix_unique_index_attr_order(NDB_INDEX_DATA &data,
959 960
                                       const NDBINDEX *index,
                                       KEY *key_info)
961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979
{
  DBUG_ENTER("fix_unique_index_attr_order");
  unsigned sz= index->getNoOfIndexColumns();

  if (data.unique_index_attrid_map)
    my_free((char*)data.unique_index_attrid_map, MYF(0));
  data.unique_index_attrid_map= (unsigned char*)my_malloc(sz,MYF(MY_WME));

  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  DBUG_ASSERT(key_info->key_parts == sz);
  for (unsigned i= 0; key_part != end; key_part++, i++) 
  {
    const char *field_name= key_part->field->field_name;
#ifndef DBUG_OFF
   data.unique_index_attrid_map[i]= 255;
#endif
    for (unsigned j= 0; j < sz; j++)
    {
980
      const NDBCOL *c= index->getColumn(j);
msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
981
      if (strcmp(field_name, c->getName()) == 0)
982
      {
983 984
        data.unique_index_attrid_map[i]= j;
        break;
985 986 987 988 989 990
      }
    }
    DBUG_ASSERT(data.unique_index_attrid_map[i] != 255);
  }
  DBUG_RETURN(0);
}
991

992
int ha_ndbcluster::build_index_list(Ndb *ndb, TABLE *tab, enum ILBP phase)
993
{
994
  uint i;
995
  int error= 0;
996
  const char *index_name;
997
  char unique_index_name[FN_LEN];
998
  static const char* unique_suffix= "$unique";
999
  KEY* key_info= tab->key_info;
1000
  const char **key_name= tab->s->keynames.type_names;
1001
  NDBDICT *dict= ndb->getDictionary();
1002
  DBUG_ENTER("ha_ndbcluster::build_index_list");
1003
  
1004
  // Save information about all known indexes
1005
  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
1006
  {
1007
    index_name= *key_name;
1008
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
1009
    m_index[i].type= idx_type;
1010
    if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
1011
    {
1012 1013
      strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);
      DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d",
1014
                          unique_index_name, i));
1015
    }
1016 1017 1018
    // Create secondary indexes if in create phase
    if (phase == ILBP_CREATE)
    {
1019 1020
      DBUG_PRINT("info", ("Creating index %u: %s", i, index_name));      
      switch (idx_type){
1021
        
1022
      case PRIMARY_KEY_INDEX:
1023 1024
        // Do nothing, already created
        break;
1025
      case PRIMARY_KEY_ORDERED_INDEX:
1026 1027
        error= create_ordered_index(index_name, key_info);
        break;
1028
      case UNIQUE_ORDERED_INDEX:
1029 1030 1031
        if (!(error= create_ordered_index(index_name, key_info)))
          error= create_unique_index(unique_index_name, key_info);
        break;
1032
      case UNIQUE_INDEX:
1033 1034 1035
        if (!(error= check_index_fields_not_null(i)))
          error= create_unique_index(unique_index_name, key_info);
        break;
1036
      case ORDERED_INDEX:
1037 1038
        error= create_ordered_index(index_name, key_info);
        break;
1039
      default:
1040 1041
        DBUG_ASSERT(FALSE);
        break;
1042 1043 1044
      }
      if (error)
      {
1045 1046 1047
        DBUG_PRINT("error", ("Failed to create index %u", i));
        drop_table();
        break;
1048 1049 1050
      }
    }
    // Add handles to index objects
1051
    if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX)
1052
    {
1053
      DBUG_PRINT("info", ("Get handle to index %s", index_name));
1054
      const NDBINDEX *index= dict->getIndex(index_name, m_tabname);
1055
      if (!index) DBUG_RETURN(1);
mskold@mysql.com's avatar
mskold@mysql.com committed
1056
      m_index[i].index= (void *) index;
1057
    }
1058
    if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
1059
    {
1060 1061
      DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name));
      const NDBINDEX *index= dict->getIndex(unique_index_name, m_tabname);
1062
      if (!index) DBUG_RETURN(1);
mskold@mysql.com's avatar
mskold@mysql.com committed
1063
      m_index[i].unique_index= (void *) index;
1064 1065
      error= fix_unique_index_attr_order(m_index[i], index, key_info);
    }
1066
  }
1067 1068
  
  DBUG_RETURN(error);
1069 1070
}

1071

1072 1073 1074 1075
/*
  Decode the type of an index from information 
  provided in table object
*/
1076
NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const
1077
{
1078
  bool is_hash_index=  (table->key_info[inx].algorithm == HA_KEY_ALG_HASH);
1079
  if (inx == table->s->primary_key)
1080
    return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX;
1081 1082 1083 1084

  return ((table->key_info[inx].flags & HA_NOSAME) ? 
          (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) :
          ORDERED_INDEX);
1085
} 
1086

1087 1088 1089 1090 1091
int ha_ndbcluster::check_index_fields_not_null(uint inx)
{
  KEY* key_info= table->key_info + inx;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
1092
  DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null");
1093 1094 1095 1096 1097 1098
  
  for (; key_part != end; key_part++) 
    {
      Field* field= key_part->field;
      if (field->maybe_null())
      {
1099 1100 1101
        my_printf_error(ER_NULL_COLUMN_IN_INDEX,ER(ER_NULL_COLUMN_IN_INDEX),
                        MYF(0),field->field_name);
        DBUG_RETURN(ER_NULL_COLUMN_IN_INDEX);
1102 1103 1104 1105 1106
      }
    }
  
  DBUG_RETURN(0);
}
1107 1108 1109

void ha_ndbcluster::release_metadata()
{
1110
  uint i;
1111

1112 1113 1114 1115
  DBUG_ENTER("release_metadata");
  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));

  m_table= NULL;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1116
  m_table_info= NULL;
1117

1118
  // Release index list 
1119 1120
  for (i= 0; i < MAX_KEY; i++)
  {
1121 1122
    m_index[i].unique_index= NULL;      
    m_index[i].index= NULL;      
1123 1124 1125 1126 1127
    if (m_index[i].unique_index_attrid_map)
    {
      my_free((char *)m_index[i].unique_index_attrid_map, MYF(0));
      m_index[i].unique_index_attrid_map= NULL;
    }
1128 1129
  }

1130 1131 1132
  DBUG_VOID_RETURN;
}

pekka@mysql.com's avatar
pekka@mysql.com committed
1133
int ha_ndbcluster::get_ndb_lock_type(enum thr_lock_type type)
1134
{
1135
  if (type >= TL_WRITE_ALLOW_WRITE)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1136
    return NdbOperation::LM_Exclusive;
1137
  else if (uses_blob_value(m_retrieve_all_fields))
1138
    return NdbOperation::LM_Read;
pekka@mysql.com's avatar
pekka@mysql.com committed
1139
  else
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1140
    return NdbOperation::LM_CommittedRead;
1141 1142
}

1143 1144 1145 1146 1147 1148
static const ulong index_type_flags[]=
{
  /* UNDEFINED_INDEX */
  0,                         

  /* PRIMARY_KEY_INDEX */
1149
  HA_ONLY_WHOLE_INDEX, 
1150 1151

  /* PRIMARY_KEY_ORDERED_INDEX */
1152
  /* 
mskold@mysql.com's avatar
mskold@mysql.com committed
1153
     Enable HA_KEYREAD_ONLY when "sorted" indexes are supported, 
1154 1155 1156
     thus ORDERD BY clauses can be optimized by reading directly 
     through the index.
  */
mskold@mysql.com's avatar
mskold@mysql.com committed
1157
  // HA_KEYREAD_ONLY | 
1158
  HA_READ_NEXT |
1159
  HA_READ_PREV |
1160 1161
  HA_READ_RANGE |
  HA_READ_ORDER,
1162 1163

  /* UNIQUE_INDEX */
1164
  HA_ONLY_WHOLE_INDEX,
1165

1166
  /* UNIQUE_ORDERED_INDEX */
1167
  HA_READ_NEXT |
1168
  HA_READ_PREV |
1169 1170
  HA_READ_RANGE |
  HA_READ_ORDER,
1171

1172
  /* ORDERED_INDEX */
1173
  HA_READ_NEXT |
1174
  HA_READ_PREV |
1175 1176
  HA_READ_RANGE |
  HA_READ_ORDER
1177 1178 1179 1180 1181 1182 1183
};

static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong);

inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const
{
  DBUG_ASSERT(idx_no < MAX_KEY);
1184
  return m_index[idx_no].type;
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
}


/*
  Get the flags for an index

  RETURN
    flags depending on the type of the index.
*/

1195 1196
inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part,
                                        bool all_parts) const 
1197
{ 
1198
  DBUG_ENTER("ha_ndbcluster::index_flags");
1199
  DBUG_PRINT("info", ("idx_no: %d", idx_no));
1200 1201 1202 1203
  DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size);
  DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)]);
}

pekka@mysql.com's avatar
pekka@mysql.com committed
1204 1205 1206 1207
static void shrink_varchar(Field* field, const byte* & ptr, char* buf)
{
  if (field->type() == MYSQL_TYPE_VARCHAR) {
    Field_varstring* f= (Field_varstring*)field;
pekka@mysql.com's avatar
pekka@mysql.com committed
1208
    if (f->length_bytes == 1) {
pekka@mysql.com's avatar
pekka@mysql.com committed
1209 1210 1211 1212 1213
      uint pack_len= field->pack_length();
      DBUG_ASSERT(1 <= pack_len && pack_len <= 256);
      if (ptr[1] == 0) {
        buf[0]= ptr[0];
      } else {
1214
        DBUG_ASSERT(FALSE);
pekka@mysql.com's avatar
pekka@mysql.com committed
1215 1216 1217 1218 1219 1220 1221
        buf[0]= 255;
      }
      memmove(buf + 1, ptr + 2, pack_len - 1);
      ptr= buf;
    }
  }
}
1222 1223 1224

int ha_ndbcluster::set_primary_key(NdbOperation *op, const byte *key)
{
1225
  KEY* key_info= table->key_info + table->s->primary_key;
1226 1227 1228 1229 1230 1231 1232
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  DBUG_ENTER("set_primary_key");

  for (; key_part != end; key_part++) 
  {
    Field* field= key_part->field;
pekka@mysql.com's avatar
pekka@mysql.com committed
1233 1234 1235
    const byte* ptr= key;
    char buf[256];
    shrink_varchar(field, ptr, buf);
1236
    if (set_ndb_key(op, field, 
1237
                    key_part->fieldnr-1, ptr))
1238
      ERR_RETURN(op->getNdbError());
pekka@mysql.com's avatar
pekka@mysql.com committed
1239
    key += key_part->store_length;
1240 1241 1242 1243 1244
  }
  DBUG_RETURN(0);
}


1245 1246
int ha_ndbcluster::set_primary_key_from_old_data(NdbOperation *op, const byte *old_data)
{
1247
  KEY* key_info= table->key_info + table->s->primary_key;
1248 1249 1250 1251 1252 1253 1254 1255
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  DBUG_ENTER("set_primary_key_from_old_data");

  for (; key_part != end; key_part++) 
  {
    Field* field= key_part->field;
    if (set_ndb_key(op, field, 
1256
                    key_part->fieldnr-1, old_data+key_part->offset))
1257 1258 1259 1260 1261 1262
      ERR_RETURN(op->getNdbError());
  }
  DBUG_RETURN(0);
}


1263 1264 1265
int ha_ndbcluster::set_primary_key(NdbOperation *op)
{
  DBUG_ENTER("set_primary_key");
1266
  KEY* key_info= table->key_info + table->s->primary_key;
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;

  for (; key_part != end; key_part++) 
  {
    Field* field= key_part->field;
    if (set_ndb_key(op, field, 
                    key_part->fieldnr-1, field->ptr))
      ERR_RETURN(op->getNdbError());
  }
  DBUG_RETURN(0);
}

1280 1281
int 
ha_ndbcluster::set_index_key(NdbOperation *op, 
1282 1283
                             const KEY *key_info, 
                             const byte * key_ptr)
1284
{
1285
  DBUG_ENTER("ha_ndbcluster::set_index_key");
1286 1287 1288 1289 1290 1291
  uint i;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  
  for (i= 0; key_part != end; key_part++, i++) 
  {
pekka@mysql.com's avatar
pekka@mysql.com committed
1292 1293 1294 1295
    Field* field= key_part->field;
    const byte* ptr= key_part->null_bit ? key_ptr + 1 : key_ptr;
    char buf[256];
    shrink_varchar(field, ptr, buf);
tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
1296
    if (set_ndb_key(op, field, m_index[active_index].unique_index_attrid_map[i], ptr))
1297 1298 1299 1300 1301
      ERR_RETURN(m_active_trans->getNdbError());
    key_ptr+= key_part->store_length;
  }
  DBUG_RETURN(0);
}
1302

1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315
inline 
int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op)
{
  uint i;
  THD *thd= current_thd;

  DBUG_ENTER("define_read_attrs");  

  // Define attributes to read
  for (i= 0; i < table->s->fields; i++) 
  {
    Field *field= table->field[i];
    if ((thd->query_id == field->query_id) ||
1316 1317
        ((field->flags & PRI_KEY_FLAG)) || 
        m_retrieve_all_fields)
1318 1319
    {      
      if (get_ndb_value(op, field, i, buf))
1320
        ERR_RETURN(op->getNdbError());
1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343
    } 
    else 
    {
      m_value[i].ptr= NULL;
    }
  }
    
  if (table->s->primary_key == MAX_KEY) 
  {
    DBUG_PRINT("info", ("Getting hidden key"));
    // Scanning table with no primary key
    int hidden_no= table->s->fields;      
#ifndef DBUG_OFF
    const NDBTAB *tab= (const NDBTAB *) m_table;    
    if (!tab->getColumn(hidden_no))
      DBUG_RETURN(1);
#endif
    if (get_ndb_value(op, NULL, hidden_no, NULL))
      ERR_RETURN(op->getNdbError());
  }
  DBUG_RETURN(0);
} 

1344 1345 1346 1347
/*
  Read one record from NDB using primary key
*/

1348
int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) 
1349
{
1350
  uint no_fields= table->s->fields;
1351 1352
  NdbConnection *trans= m_active_trans;
  NdbOperation *op;
1353

1354 1355 1356 1357
  int res;
  DBUG_ENTER("pk_read");
  DBUG_PRINT("enter", ("key_len: %u", key_len));
  DBUG_DUMP("key", (char*)key, key_len);
1358

1359 1360
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
joreland@mysql.com's avatar
joreland@mysql.com committed
1361
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || 
1362
      op->readTuple(lm) != 0)
1363
    ERR_RETURN(trans->getNdbError());
1364
  
1365
  if (table->s->primary_key == MAX_KEY) 
1366 1367 1368 1369 1370
  {
    // This table has no primary key, use "hidden" primary key
    DBUG_PRINT("info", ("Using hidden key"));
    DBUG_DUMP("key", (char*)key, 8);    
    if (set_hidden_key(op, no_fields, key))
1371
      ERR_RETURN(trans->getNdbError());
1372
    
1373
    // Read key at the same time, for future reference
1374
    if (get_ndb_value(op, NULL, no_fields, NULL))
1375
      ERR_RETURN(trans->getNdbError());
1376 1377 1378 1379 1380 1381 1382
  } 
  else 
  {
    if ((res= set_primary_key(op, key)))
      return res;
  }
  
1383 1384
  if((res= define_read_attrs(buf, op)))
    DBUG_RETURN(res);
1385
  
1386
  if (execute_no_commit_ie(this,trans) != 0) 
1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }

  // The value have now been fetched from NDB  
  unpack_record(buf);
  table->status= 0;     
  DBUG_RETURN(0);
}

1398 1399 1400 1401 1402 1403
/*
  Read one complementing record from NDB using primary key from old_data
*/

int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data)
{
1404
  uint no_fields= table->s->fields, i;
1405
  NdbTransaction *trans= m_active_trans;
1406 1407 1408 1409
  NdbOperation *op;
  THD *thd= current_thd;
  DBUG_ENTER("complemented_pk_read");

1410
  if (m_retrieve_all_fields)
1411 1412 1413
    // We have allready retrieved all fields, nothing to complement
    DBUG_RETURN(0);

1414 1415
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
joreland@mysql.com's avatar
joreland@mysql.com committed
1416
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || 
1417
      op->readTuple(lm) != 0)
1418
    ERR_RETURN(trans->getNdbError());
1419 1420 1421 1422 1423
  
  int res;
  if ((res= set_primary_key_from_old_data(op, old_data)))
    ERR_RETURN(trans->getNdbError());
  
1424 1425 1426 1427
  // Read all unreferenced non-key field(s)
  for (i= 0; i < no_fields; i++) 
  {
    Field *field= table->field[i];
1428
    if (!((field->flags & PRI_KEY_FLAG) ||
1429
          (thd->query_id == field->query_id)))
1430
    {
1431
      if (get_ndb_value(op, field, i, new_data))
1432
        ERR_RETURN(trans->getNdbError());
1433 1434 1435
    }
  }
  
1436
  if (execute_no_commit(this,trans) != 0) 
1437 1438 1439 1440 1441 1442 1443 1444
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }

  // The value have now been fetched from NDB  
  unpack_record(new_data);
  table->status= 0;     
1445 1446 1447 1448 1449 1450 1451 1452

  /**
   * restore m_value
   */
  for (i= 0; i < no_fields; i++) 
  {
    Field *field= table->field[i];
    if (!((field->flags & PRI_KEY_FLAG) ||
1453
          (thd->query_id == field->query_id)))
1454 1455 1456 1457 1458
    {
      m_value[i].ptr= NULL;
    }
  }
  
1459 1460 1461
  DBUG_RETURN(0);
}

1462 1463 1464 1465 1466 1467
/*
  Peek to check if a particular row already exists
*/

int ha_ndbcluster::peek_row()
{
1468
  NdbTransaction *trans= m_active_trans;
1469 1470
  NdbOperation *op;
  DBUG_ENTER("peek_row");
1471

1472 1473 1474 1475 1476
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) ||
      op->readTuple(lm) != 0)
    ERR_RETURN(trans->getNdbError());
1477

1478 1479 1480
  int res;
  if ((res= set_primary_key(op)))
    ERR_RETURN(trans->getNdbError());
1481

1482
  if (execute_no_commit_ie(this,trans) != 0)
1483 1484 1485 1486
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  } 
1487 1488
  DBUG_RETURN(0);
}
1489

1490 1491 1492 1493 1494
/*
  Read one record from NDB using unique secondary index
*/

int ha_ndbcluster::unique_index_read(const byte *key,
1495
                                     uint key_len, byte *buf)
1496
{
1497
  int res;
1498
  NdbTransaction *trans= m_active_trans;
1499
  NdbIndexOperation *op;
1500
  DBUG_ENTER("ha_ndbcluster::unique_index_read");
1501 1502 1503
  DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index));
  DBUG_DUMP("key", (char*)key, key_len);
  
1504 1505
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
1506
  if (!(op= trans->getNdbIndexOperation((NDBINDEX *) 
1507
                                        m_index[active_index].unique_index, 
joreland@mysql.com's avatar
joreland@mysql.com committed
1508
                                        (const NDBTAB *) m_table)) ||
1509
      op->readTuple(lm) != 0)
1510 1511 1512
    ERR_RETURN(trans->getNdbError());
  
  // Set secondary index key(s)
1513 1514 1515
  if((res= set_index_key(op, table->key_info + active_index, key)))
    DBUG_RETURN(res);
  
1516 1517
  if((res= define_read_attrs(buf, op)))
    DBUG_RETURN(res);
1518

1519
  if (execute_no_commit_ie(this,trans) != 0) 
1520 1521 1522 1523 1524 1525 1526 1527 1528 1529
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }
  // The value have now been fetched from NDB
  unpack_record(buf);
  table->status= 0;
  DBUG_RETURN(0);
}

1530
inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor)
1531 1532
{
  DBUG_ENTER("fetch_next");
1533
  int check;
1534
  NdbTransaction *trans= m_active_trans;
1535
  
1536
  bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE;
1537 1538
  do {
    DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb));
pekka@mysql.com's avatar
pekka@mysql.com committed
1539 1540 1541
    /*
      We can only handle one tuple with blobs at a time.
    */
1542
    if (m_ops_pending && m_blobs_pending)
pekka@mysql.com's avatar
pekka@mysql.com committed
1543
    {
1544
      if (execute_no_commit(this,trans) != 0)
1545
        DBUG_RETURN(ndb_err(trans));
1546 1547
      m_ops_pending= 0;
      m_blobs_pending= FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
1548
    }
1549 1550
    
    if ((check= cursor->nextResult(contact_ndb, m_force_send)) == 0)
1551 1552 1553 1554 1555 1556 1557
    {
      DBUG_RETURN(0);
    } 
    else if (check == 1 || check == 2)
    {
      // 1: No more records
      // 2: No more cached records
1558
      
1559
      /*
1560 1561 1562
        Before fetching more rows and releasing lock(s),
        all pending update or delete operations should 
        be sent to NDB
1563
      */
1564 1565
      DBUG_PRINT("info", ("ops_pending: %d", m_ops_pending));    
      if (m_ops_pending)
1566
      {
1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
        if (m_transaction_on)
        {
          if (execute_no_commit(this,trans) != 0)
            DBUG_RETURN(-1);
        }
        else
        {
          if  (execute_commit(this,trans) != 0)
            DBUG_RETURN(-1);
          if(trans->restart() != 0)
          {
            DBUG_ASSERT(0);
            DBUG_RETURN(-1);
          }
        }
        m_ops_pending= 0;
1583
      }
1584 1585
      contact_ndb= (check == 2);
    }
1586 1587 1588 1589
    else
    {
      DBUG_RETURN(-1);
    }
1590
  } while (check == 2);
1591

1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602
  DBUG_RETURN(1);
}

/*
  Get the next record of a started scan. Try to fetch
  it locally from NdbApi cached records if possible, 
  otherwise ask NDB for more.

  NOTE
  If this is a update/delete make sure to not contact 
  NDB before any pending ops have been sent to NDB.
1603

1604 1605 1606 1607 1608 1609 1610
*/

inline int ha_ndbcluster::next_result(byte *buf)
{  
  int res;
  DBUG_ENTER("next_result");
    
1611 1612 1613
  if (!m_active_cursor)
    DBUG_RETURN(HA_ERR_END_OF_FILE);
  
1614
  if((res= fetch_next(m_active_cursor)) == 0)
1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
  {
    DBUG_PRINT("info", ("One more record found"));    
    
    unpack_record(buf);
    table->status= 0;
    DBUG_RETURN(0);
  }
  else if(res == 1)
  {
    // No more records
    table->status= STATUS_NOT_FOUND;
    
    DBUG_PRINT("info", ("No more records"));
    DBUG_RETURN(HA_ERR_END_OF_FILE);
  }
  else
  {
    DBUG_RETURN(ndb_err(m_active_trans));
  }
1634 1635
}

1636
/*
1637
  Set bounds for ordered index scan.
1638 1639
*/

joreland@mysql.com's avatar
joreland@mysql.com committed
1640
int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op,
1641 1642
                              const key_range *keys[2],
                              uint range_no)
1643
{
1644 1645 1646 1647
  const KEY *const key_info= table->key_info + active_index;
  const uint key_parts= key_info->key_parts;
  uint key_tot_len[2];
  uint tot_len;
1648
  uint i, j;
1649 1650

  DBUG_ENTER("set_bounds");
1651
  DBUG_PRINT("info", ("key_parts=%d", key_parts));
1652

1653
  for (j= 0; j <= 1; j++)
1654
  {
1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667
    const key_range *key= keys[j];
    if (key != NULL)
    {
      // for key->flag see ha_rkey_function
      DBUG_PRINT("info", ("key %d length=%d flag=%d",
                          j, key->length, key->flag));
      key_tot_len[j]= key->length;
    }
    else
    {
      DBUG_PRINT("info", ("key %d not present", j));
      key_tot_len[j]= 0;
    }
1668 1669
  }
  tot_len= 0;
1670

1671 1672 1673 1674
  for (i= 0; i < key_parts; i++)
  {
    KEY_PART_INFO *key_part= &key_info->key_part[i];
    Field *field= key_part->field;
1675
#ifndef DBUG_OFF
1676
    uint part_len= key_part->length;
1677
#endif
1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691
    uint part_store_len= key_part->store_length;
    // Info about each key part
    struct part_st {
      bool part_last;
      const key_range *key;
      const byte *part_ptr;
      bool part_null;
      int bound_type;
      const char* bound_ptr;
    };
    struct part_st part[2];

    for (j= 0; j <= 1; j++)
    {
1692
      struct part_st &p= part[j];
1693 1694 1695 1696 1697 1698 1699
      p.key= NULL;
      p.bound_type= -1;
      if (tot_len < key_tot_len[j])
      {
        p.part_last= (tot_len + part_store_len >= key_tot_len[j]);
        p.key= keys[j];
        p.part_ptr= &p.key->key[tot_len];
joreland@mysql.com's avatar
joreland@mysql.com committed
1700
        p.part_null= key_part->null_bit && *p.part_ptr;
1701
        p.bound_ptr= (const char *)
joreland@mysql.com's avatar
joreland@mysql.com committed
1702
          p.part_null ? 0 : key_part->null_bit ? p.part_ptr + 1 : p.part_ptr;
1703 1704 1705 1706 1707 1708 1709 1710

        if (j == 0)
        {
          switch (p.key->flag)
          {
            case HA_READ_KEY_EXACT:
              p.bound_type= NdbIndexScanOperation::BoundEQ;
              break;
1711
            // ascending
1712 1713 1714 1715 1716 1717 1718 1719 1720
            case HA_READ_KEY_OR_NEXT:
              p.bound_type= NdbIndexScanOperation::BoundLE;
              break;
            case HA_READ_AFTER_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundLE;
              else
                p.bound_type= NdbIndexScanOperation::BoundLT;
              break;
1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733
            // descending
            case HA_READ_PREFIX_LAST:           // weird
              p.bound_type= NdbIndexScanOperation::BoundEQ;
              break;
            case HA_READ_PREFIX_LAST_OR_PREV:   // weird
              p.bound_type= NdbIndexScanOperation::BoundGE;
              break;
            case HA_READ_BEFORE_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundGE;
              else
                p.bound_type= NdbIndexScanOperation::BoundGT;
              break;
1734 1735 1736 1737 1738 1739 1740
            default:
              break;
          }
        }
        if (j == 1) {
          switch (p.key->flag)
          {
1741
            // ascending
1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752
            case HA_READ_BEFORE_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundGE;
              else
                p.bound_type= NdbIndexScanOperation::BoundGT;
              break;
            case HA_READ_AFTER_KEY:     // weird
              p.bound_type= NdbIndexScanOperation::BoundGE;
              break;
            default:
              break;
1753
            // descending strangely sets no end key
1754 1755
          }
        }
1756

1757 1758 1759
        if (p.bound_type == -1)
        {
          DBUG_PRINT("error", ("key %d unknown flag %d", j, p.key->flag));
1760
          DBUG_ASSERT(FALSE);
1761
          // Stop setting bounds but continue with what we have
1762
          op->end_of_bound(range_no);
1763 1764 1765 1766
          DBUG_RETURN(0);
        }
      }
    }
1767

1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784
    // Seen with e.g. b = 1 and c > 1
    if (part[0].bound_type == NdbIndexScanOperation::BoundLE &&
        part[1].bound_type == NdbIndexScanOperation::BoundGE &&
        memcmp(part[0].part_ptr, part[1].part_ptr, part_store_len) == 0)
    {
      DBUG_PRINT("info", ("replace LE/GE pair by EQ"));
      part[0].bound_type= NdbIndexScanOperation::BoundEQ;
      part[1].bound_type= -1;
    }
    // Not seen but was in previous version
    if (part[0].bound_type == NdbIndexScanOperation::BoundEQ &&
        part[1].bound_type == NdbIndexScanOperation::BoundGE &&
        memcmp(part[0].part_ptr, part[1].part_ptr, part_store_len) == 0)
    {
      DBUG_PRINT("info", ("remove GE from EQ/GE pair"));
      part[1].bound_type= -1;
    }
1785

1786 1787
    for (j= 0; j <= 1; j++)
    {
1788
      struct part_st &p= part[j];
1789 1790 1791 1792 1793 1794 1795 1796 1797
      // Set bound if not done with this key
      if (p.key != NULL)
      {
        DBUG_PRINT("info", ("key %d:%d offset=%d length=%d last=%d bound=%d",
                            j, i, tot_len, part_len, p.part_last, p.bound_type));
        DBUG_DUMP("info", (const char*)p.part_ptr, part_store_len);

        // Set bound if not cancelled via type -1
        if (p.bound_type != -1)
1798
        {
pekka@mysql.com's avatar
pekka@mysql.com committed
1799 1800 1801
          const char* ptr= p.bound_ptr;
          char buf[256];
          shrink_varchar(field, ptr, buf);
tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
1802
          if (op->setBound(i, p.bound_type, ptr))
1803
            ERR_RETURN(op->getNdbError());
1804
        }
1805 1806 1807 1808
      }
    }

    tot_len+= part_store_len;
1809
  }
1810
  op->end_of_bound(range_no);
1811 1812 1813
  DBUG_RETURN(0);
}

1814
/*
1815
  Start ordered index scan in NDB
1816 1817
*/

1818
int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
1819 1820
                                      const key_range *end_key,
                                      bool sorted, bool descending, byte* buf)
1821
{  
1822
  int res;
joreland@mysql.com's avatar
joreland@mysql.com committed
1823
  bool restart;
1824
  NdbTransaction *trans= m_active_trans;
joreland@mysql.com's avatar
joreland@mysql.com committed
1825
  NdbIndexScanOperation *op;
1826

1827 1828 1829
  DBUG_ENTER("ha_ndbcluster::ordered_index_scan");
  DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d",
             active_index, sorted, descending));  
1830
  DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));
pekka@mysql.com's avatar
pekka@mysql.com committed
1831

1832 1833
  // Check that sorted seems to be initialised
  DBUG_ASSERT(sorted == 0 || sorted == 1);
1834
  
1835
  if (m_active_cursor == 0)
joreland@mysql.com's avatar
joreland@mysql.com committed
1836
  {
1837
    restart= FALSE;
joreland@mysql.com's avatar
joreland@mysql.com committed
1838 1839 1840
    NdbOperation::LockMode lm=
      (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
    if (!(op= trans->getNdbIndexScanOperation((NDBINDEX *)
1841 1842 1843
                                              m_index[active_index].index, 
                                              (const NDBTAB *) m_table)) ||
        op->readTuples(lm, 0, parallelism, sorted, descending))
joreland@mysql.com's avatar
joreland@mysql.com committed
1844
      ERR_RETURN(trans->getNdbError());
1845
    m_active_cursor= op;
joreland@mysql.com's avatar
joreland@mysql.com committed
1846
  } else {
1847
    restart= TRUE;
1848
    op= (NdbIndexScanOperation*)m_active_cursor;
joreland@mysql.com's avatar
joreland@mysql.com committed
1849 1850 1851
    
    DBUG_ASSERT(op->getSorted() == sorted);
    DBUG_ASSERT(op->getLockMode() == 
1852
                (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
1853
    if(op->reset_bounds(m_force_send))
joreland@mysql.com's avatar
joreland@mysql.com committed
1854 1855
      DBUG_RETURN(ndb_err(m_active_trans));
  }
1856
  
1857
  {
1858
    const key_range *keys[2]= { start_key, end_key };
1859 1860 1861
    res= set_bounds(op, keys);
    if (res)
      DBUG_RETURN(res);
1862
  }
1863 1864 1865

  if (!restart && generate_scan_filter(m_cond_stack, op))
    DBUG_RETURN(ndb_err(trans));
1866
  
1867
  if (!restart && (res= define_read_attrs(buf, op)))
1868
  {
1869
    DBUG_RETURN(res);
joreland@mysql.com's avatar
joreland@mysql.com committed
1870
  }
1871 1872 1873 1874 1875 1876

  if (execute_no_commit(this,trans) != 0)
    DBUG_RETURN(ndb_err(trans));
  
  DBUG_RETURN(next_result(buf));
}
1877 1878

/*
1879
  Start full table scan in NDB
1880 1881 1882 1883
 */

int ha_ndbcluster::full_table_scan(byte *buf)
{
1884
  int res;
1885
  NdbScanOperation *op;
1886
  NdbTransaction *trans= m_active_trans;
1887 1888 1889 1890

  DBUG_ENTER("full_table_scan");  
  DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));

1891 1892 1893
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
  if (!(op=trans->getNdbScanOperation((const NDBTAB *) m_table)) ||
1894
      op->readTuples(lm, 0, parallelism))
1895
    ERR_RETURN(trans->getNdbError());
1896
  m_active_cursor= op;
1897 1898
  if (generate_scan_filter(m_cond_stack, op))
    DBUG_RETURN(ndb_err(trans));
1899 1900 1901 1902 1903 1904 1905
  if((res= define_read_attrs(buf, op)))
    DBUG_RETURN(res);

  if (execute_no_commit(this,trans) != 0)
    DBUG_RETURN(ndb_err(trans));
  DBUG_PRINT("exit", ("Scan started successfully"));
  DBUG_RETURN(next_result(buf));
1906 1907
}

1908 1909 1910 1911 1912
/*
  Insert one record into NDB
*/
int ha_ndbcluster::write_row(byte *record)
{
mskold@mysql.com's avatar
mskold@mysql.com committed
1913
  bool has_auto_increment;
1914
  uint i;
1915
  NdbTransaction *trans= m_active_trans;
1916 1917
  NdbOperation *op;
  int res;
1918 1919
  THD *thd= current_thd;

1920
  DBUG_ENTER("write_row");
1921

1922
  if (m_ignore_dup_key && table->s->primary_key != MAX_KEY)
1923
  {
1924 1925 1926 1927
    int peek_res= peek_row();
    
    if (!peek_res) 
    {
1928
      m_dupkey= table->s->primary_key;
1929 1930 1931 1932
      DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
    }
    if (peek_res != HA_ERR_KEY_NOT_FOUND)
      DBUG_RETURN(peek_res);
1933
  }
1934

1935
  statistic_increment(thd->status_var.ha_write_count, &LOCK_status);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1936 1937
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
    table->timestamp_field->set_time();
1938
  has_auto_increment= (table->next_number_field && record == table->record[0]);
1939

joreland@mysql.com's avatar
joreland@mysql.com committed
1940
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)))
1941 1942 1943 1944 1945 1946
    ERR_RETURN(trans->getNdbError());

  res= (m_use_write) ? op->writeTuple() :op->insertTuple(); 
  if (res != 0)
    ERR_RETURN(trans->getNdbError());  
 
1947
  if (table->s->primary_key == MAX_KEY) 
1948 1949
  {
    // Table has hidden primary key
1950
    Ndb *ndb= get_ndb();
1951 1952 1953 1954 1955 1956 1957
    Uint64 auto_value= NDB_FAILED_AUTO_INCREMENT;
    uint retries= NDB_AUTO_INCREMENT_RETRIES;
    do {
      auto_value= ndb->getAutoIncrementValue((const NDBTAB *) m_table);
    } while (auto_value == NDB_FAILED_AUTO_INCREMENT && 
             --retries &&
             ndb->getNdbError().status == NdbError::TemporaryError);
1958 1959
    if (auto_value == NDB_FAILED_AUTO_INCREMENT)
      ERR_RETURN(ndb->getNdbError());
1960
    if (set_hidden_key(op, table->s->fields, (const byte*)&auto_value))
1961 1962 1963 1964 1965
      ERR_RETURN(op->getNdbError());
  } 
  else 
  {
    int res;
1966

1967 1968
    if (has_auto_increment) 
    {
1969 1970
      THD *thd= table->in_use;

1971
      m_skip_auto_increment= FALSE;
1972
      update_auto_increment();
1973 1974
      /* Ensure that handler is always called for auto_increment values */
      thd->next_insert_id= 0;
1975
      m_skip_auto_increment= !auto_increment_column_changed;
1976
    }
1977

1978 1979 1980 1981
    if ((res= (m_primary_key_update ?
               set_primary_key_from_old_data(op, record)
               : set_primary_key(op))))
      return res;  
1982 1983 1984
  }

  // Set non-key attribute(s)
1985
  bool set_blob_value= FALSE;
1986
  for (i= 0; i < table->s->fields; i++) 
1987 1988 1989
  {
    Field *field= table->field[i];
    if (!(field->flags & PRI_KEY_FLAG) &&
1990
        set_ndb_value(op, field, i, &set_blob_value))
1991
    {
1992
      m_skip_auto_increment= TRUE;
1993
      ERR_RETURN(op->getNdbError());
1994
    }
1995 1996
  }

1997 1998
  m_rows_changed++;

1999 2000 2001 2002 2003 2004 2005
  /*
    Execute write operation
    NOTE When doing inserts with many values in 
    each INSERT statement it should not be necessary
    to NoCommit the transaction between each row.
    Find out how this is detected!
  */
2006
  m_rows_inserted++;
2007
  no_uncommitted_rows_update(1);
2008
  m_bulk_insert_not_flushed= TRUE;
2009
  if ((m_rows_to_insert == (ha_rows) 1) || 
2010
      ((m_rows_inserted % m_bulk_insert_rows) == 0) ||
2011
      m_primary_key_update ||
2012
      set_blob_value)
2013 2014 2015
  {
    // Send rows to NDB
    DBUG_PRINT("info", ("Sending inserts to NDB, "\
2016 2017
                        "rows_inserted:%d, bulk_insert_rows: %d", 
                        (int)m_rows_inserted, (int)m_bulk_insert_rows));
2018

2019
    m_bulk_insert_not_flushed= FALSE;
2020
    if (m_transaction_on)
2021
    {
2022
      if (execute_no_commit(this,trans) != 0)
2023
      {
2024 2025 2026
        m_skip_auto_increment= TRUE;
        no_uncommitted_rows_execute_failure();
        DBUG_RETURN(ndb_err(trans));
2027
      }
2028 2029
    }
    else
2030
    {
2031
      if (execute_commit(this,trans) != 0)
2032
      {
2033 2034 2035
        m_skip_auto_increment= TRUE;
        no_uncommitted_rows_execute_failure();
        DBUG_RETURN(ndb_err(trans));
2036
      }
2037 2038
      if(trans->restart() != 0)
      {
2039 2040
        DBUG_ASSERT(0);
        DBUG_RETURN(-1);
2041
      }
2042
    }
2043
  }
2044
  if ((has_auto_increment) && (m_skip_auto_increment))
mskold@mysql.com's avatar
mskold@mysql.com committed
2045
  {
2046
    Ndb *ndb= get_ndb();
2047
    Uint64 next_val= (Uint64) table->next_number_field->val_int() + 1;
mskold@mysql.com's avatar
mskold@mysql.com committed
2048
    DBUG_PRINT("info", 
2049
               ("Trying to set next auto increment value to %lu",
2050
                (ulong) next_val));
2051
    if (ndb->setAutoIncrementValue((const NDBTAB *) m_table, next_val, TRUE))
mskold@mysql.com's avatar
mskold@mysql.com committed
2052
      DBUG_PRINT("info", 
2053
                 ("Setting next auto increment value to %u", next_val));  
2054
  }
2055
  m_skip_auto_increment= TRUE;
2056

2057 2058 2059 2060 2061 2062 2063
  DBUG_RETURN(0);
}


/* Compare if a key in a row has changed */

int ha_ndbcluster::key_cmp(uint keynr, const byte * old_row,
2064
                           const byte * new_row)
2065 2066 2067 2068 2069 2070 2071 2072 2073
{
  KEY_PART_INFO *key_part=table->key_info[keynr].key_part;
  KEY_PART_INFO *end=key_part+table->key_info[keynr].key_parts;

  for (; key_part != end ; key_part++)
  {
    if (key_part->null_bit)
    {
      if ((old_row[key_part->null_offset] & key_part->null_bit) !=
2074 2075
          (new_row[key_part->null_offset] & key_part->null_bit))
        return 1;
2076
    }
2077
    if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
2078 2079 2080
    {

      if (key_part->field->cmp_binary((char*) (old_row + key_part->offset),
2081 2082 2083
                                      (char*) (new_row + key_part->offset),
                                      (ulong) key_part->length))
        return 1;
2084 2085 2086 2087
    }
    else
    {
      if (memcmp(old_row+key_part->offset, new_row+key_part->offset,
2088 2089
                 key_part->length))
        return 1;
2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101
    }
  }
  return 0;
}

/*
  Update one record in NDB using primary key
*/

int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
{
  THD *thd= current_thd;
2102
  NdbTransaction *trans= m_active_trans;
2103
  NdbScanOperation* cursor= m_active_cursor;
2104 2105 2106 2107
  NdbOperation *op;
  uint i;
  DBUG_ENTER("update_row");
  
2108
  statistic_increment(thd->status_var.ha_update_count, &LOCK_status);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2109
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
2110
  {
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2111
    table->timestamp_field->set_time();
2112 2113 2114
    // Set query_id so that field is really updated
    table->timestamp_field->query_id= thd->query_id;
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2115

2116
  /* Check for update of primary key for special handling */  
2117 2118
  if ((table->s->primary_key != MAX_KEY) &&
      (key_cmp(table->s->primary_key, old_data, new_data)))
2119
  {
2120
    int read_res, insert_res, delete_res;
2121

2122
    DBUG_PRINT("info", ("primary key update, doing pk read+delete+insert"));
2123
    // Get all old fields, since we optimize away fields not in query
2124
    read_res= complemented_pk_read(old_data, new_data);
2125 2126 2127 2128 2129
    if (read_res)
    {
      DBUG_PRINT("info", ("pk read failed"));
      DBUG_RETURN(read_res);
    }
2130
    // Delete old row
2131
    m_primary_key_update= TRUE;
2132
    delete_res= delete_row(old_data);
2133
    m_primary_key_update= FALSE;
2134 2135 2136
    if (delete_res)
    {
      DBUG_PRINT("info", ("delete failed"));
2137
      DBUG_RETURN(delete_res);
2138
    }     
2139 2140
    // Insert new row
    DBUG_PRINT("info", ("delete succeded"));
2141
    m_primary_key_update= TRUE;
2142
    insert_res= write_row(new_data);
2143
    m_primary_key_update= FALSE;
2144 2145 2146 2147 2148
    if (insert_res)
    {
      DBUG_PRINT("info", ("insert failed"));
      if (trans->commitStatus() == NdbConnection::Started)
      {
2149
      // Undo write_row(new_data)
2150 2151 2152 2153 2154 2155 2156
        m_primary_key_update= TRUE;
        insert_res= write_row((byte *)old_data);
        m_primary_key_update= FALSE;
      }
      DBUG_RETURN(insert_res);
    }
    DBUG_PRINT("info", ("delete+insert succeeded"));
2157
    DBUG_RETURN(0);
2158
  }
2159

2160
  if (cursor)
2161
  {
2162 2163 2164 2165 2166 2167 2168 2169
    /*
      We are scanning records and want to update the record
      that was just found, call updateTuple on the cursor 
      to take over the lock to a new update operation
      And thus setting the primary key of the record from 
      the active record in cursor
    */
    DBUG_PRINT("info", ("Calling updateTuple on cursor"));
2170
    if (!(op= cursor->updateCurrentTuple()))
2171
      ERR_RETURN(trans->getNdbError());
2172
    m_ops_pending++;
2173
    if (uses_blob_value(FALSE))
2174
      m_blobs_pending= TRUE;
2175 2176 2177
  }
  else
  {  
joreland@mysql.com's avatar
joreland@mysql.com committed
2178
    if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) ||
2179
        op->updateTuple() != 0)
2180 2181
      ERR_RETURN(trans->getNdbError());  
    
2182
    if (table->s->primary_key == MAX_KEY) 
2183 2184 2185 2186 2187 2188
    {
      // This table has no primary key, use "hidden" primary key
      DBUG_PRINT("info", ("Using hidden key"));
      
      // Require that the PK for this record has previously been 
      // read into m_value
2189
      uint no_fields= table->s->fields;
2190
      const NdbRecAttr* rec= m_value[no_fields].rec;
2191 2192 2193 2194
      DBUG_ASSERT(rec);
      DBUG_DUMP("key", (char*)rec->aRef(), NDB_HIDDEN_PRIMARY_KEY_LENGTH);
      
      if (set_hidden_key(op, no_fields, rec->aRef()))
2195
        ERR_RETURN(op->getNdbError());
2196 2197 2198 2199
    } 
    else 
    {
      int res;
2200
      if ((res= set_primary_key_from_old_data(op, old_data)))
2201
        DBUG_RETURN(res);
2202
    }
2203 2204
  }

2205 2206
  m_rows_changed++;

2207
  // Set non-key attribute(s)
2208
  for (i= 0; i < table->s->fields; i++) 
2209 2210
  {
    Field *field= table->field[i];
2211
    if (((thd->query_id == field->query_id) || m_retrieve_all_fields) &&
2212
        (!(field->flags & PRI_KEY_FLAG)) &&
2213
        set_ndb_value(op, field, i))
2214 2215
      ERR_RETURN(op->getNdbError());
  }
2216

2217
  // Execute update operation
2218
  if (!cursor && execute_no_commit(this,trans) != 0) {
2219
    no_uncommitted_rows_execute_failure();
2220
    DBUG_RETURN(ndb_err(trans));
2221
  }
2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232
  
  DBUG_RETURN(0);
}


/*
  Delete one record from NDB, using primary key 
*/

int ha_ndbcluster::delete_row(const byte *record)
{
2233
  THD *thd= current_thd;
2234
  NdbTransaction *trans= m_active_trans;
2235
  NdbScanOperation* cursor= m_active_cursor;
2236 2237 2238
  NdbOperation *op;
  DBUG_ENTER("delete_row");

2239
  statistic_increment(thd->status_var.ha_delete_count,&LOCK_status);
2240
  m_rows_changed++;
2241

2242
  if (cursor)
2243
  {
2244
    /*
2245
      We are scanning records and want to delete the record
2246
      that was just found, call deleteTuple on the cursor 
2247
      to take over the lock to a new delete operation
2248 2249 2250 2251
      And thus setting the primary key of the record from 
      the active record in cursor
    */
    DBUG_PRINT("info", ("Calling deleteTuple on cursor"));
2252
    if (cursor->deleteCurrentTuple() != 0)
2253
      ERR_RETURN(trans->getNdbError());     
2254
    m_ops_pending++;
2255

2256 2257
    no_uncommitted_rows_update(-1);

2258 2259 2260
    if (!m_primary_key_update)
      // If deleting from cursor, NoCommit will be handled in next_result
      DBUG_RETURN(0);
2261 2262
  }
  else
2263
  {
2264
    
joreland@mysql.com's avatar
joreland@mysql.com committed
2265
    if (!(op=trans->getNdbOperation((const NDBTAB *) m_table)) || 
2266
        op->deleteTuple() != 0)
2267 2268
      ERR_RETURN(trans->getNdbError());
    
2269 2270
    no_uncommitted_rows_update(-1);
    
2271
    if (table->s->primary_key == MAX_KEY) 
2272 2273 2274
    {
      // This table has no primary key, use "hidden" primary key
      DBUG_PRINT("info", ("Using hidden key"));
2275
      uint no_fields= table->s->fields;
2276
      const NdbRecAttr* rec= m_value[no_fields].rec;
2277 2278 2279
      DBUG_ASSERT(rec != NULL);
      
      if (set_hidden_key(op, no_fields, rec->aRef()))
2280
        ERR_RETURN(op->getNdbError());
2281 2282 2283 2284
    } 
    else 
    {
      int res;
2285
      if ((res= (m_primary_key_update ?
2286 2287 2288
                 set_primary_key_from_old_data(op, record)
                 : set_primary_key(op))))
          return res;  
2289
    }
2290
  }
2291

2292
  // Execute delete operation
2293
  if (execute_no_commit(this,trans) != 0) {
2294
    no_uncommitted_rows_execute_failure();
2295
    DBUG_RETURN(ndb_err(trans));
2296
  }
2297 2298
  DBUG_RETURN(0);
}
2299
  
2300 2301 2302 2303 2304
/*
  Unpack a record read from NDB 

  SYNOPSIS
    unpack_record()
2305
    buf                 Buffer to store read row
2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317

  NOTE
    The data for each row is read directly into the
    destination buffer. This function is primarily 
    called in order to check if any fields should be 
    set to null.
*/

void ha_ndbcluster::unpack_record(byte* buf)
{
  uint row_offset= (uint) (buf - table->record[0]);
  Field **field, **end;
pekka@mysql.com's avatar
pekka@mysql.com committed
2318
  NdbValue *value= m_value;
2319
  DBUG_ENTER("unpack_record");
2320

joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
2321
  end= table->field + table->s->fields;
2322 2323
  
  // Set null flag(s)
2324
  bzero(buf, table->s->null_bytes);
joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
2325
  for (field= table->field;
2326 2327 2328
       field < end;
       field++, value++)
  {
pekka@mysql.com's avatar
pekka@mysql.com committed
2329 2330 2331 2332 2333 2334
    if ((*value).ptr)
    {
      if (! ((*field)->flags & BLOB_FLAG))
      {
        if ((*value).rec->isNULL())
         (*field)->set_null(row_offset);
2335 2336 2337 2338 2339 2340
        else if ((*field)->type() == MYSQL_TYPE_BIT)
        {
          uint pack_len= (*field)->pack_length();
          if (pack_len < 5)
          {
            DBUG_PRINT("info", ("bit field H'%.8X", 
2341
                                (*value).rec->u_32_value()));
2342
            ((Field_bit *) *field)->store((longlong) 
2343
                                          (*value).rec->u_32_value());
2344 2345 2346 2347 2348 2349 2350
          }
          else
          {
            DBUG_PRINT("info", ("bit field H'%.8X%.8X",
                                *(Uint32 *)(*value).rec->aRef(),
                                *((Uint32 *)(*value).rec->aRef()+1)));
            ((Field_bit *) *field)->store((longlong)
2351
                                          (*value).rec->u_64_value());          }
2352
        }
pekka@mysql.com's avatar
pekka@mysql.com committed
2353 2354 2355 2356
      }
      else
      {
        NdbBlob* ndb_blob= (*value).blob;
2357
        bool isNull= TRUE;
2358 2359 2360
#ifndef DBUG_OFF
        int ret= 
#endif
2361
          ndb_blob->getNull(isNull);
pekka@mysql.com's avatar
pekka@mysql.com committed
2362 2363
        DBUG_ASSERT(ret == 0);
        if (isNull)
2364
          (*field)->set_null(row_offset);
pekka@mysql.com's avatar
pekka@mysql.com committed
2365 2366
      }
    }
2367
  }
2368
  
2369 2370
#ifndef DBUG_OFF
  // Read and print all values that was fetched
2371
  if (table->s->primary_key == MAX_KEY)
2372 2373
  {
    // Table with hidden primary key
2374
    int hidden_no= table->s->fields;
joreland@mysql.com's avatar
joreland@mysql.com committed
2375
    const NDBTAB *tab= (const NDBTAB *) m_table;
2376
    const NDBCOL *hidden_col= tab->getColumn(hidden_no);
2377
    const NdbRecAttr* rec= m_value[hidden_no].rec;
2378 2379 2380 2381
    DBUG_ASSERT(rec);
    DBUG_PRINT("hidden", ("%d: %s \"%llu\"", hidden_no, 
                          hidden_col->getName(), rec->u_64_value()));
  } 
2382
  //print_results();
2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395
#endif
  DBUG_VOID_RETURN;
}

/*
  Utility function to print/dump the fetched field
 */

void ha_ndbcluster::print_results()
{
  DBUG_ENTER("print_results");

#ifndef DBUG_OFF
2396
  const NDBTAB *tab= (const NDBTAB*) m_table;
2397

2398 2399
  if (!_db_on_)
    DBUG_VOID_RETURN;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2400

2401
  char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH];
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2402
  String type(buf_type, sizeof(buf_type), &my_charset_bin);
2403
  String val(buf_val, sizeof(buf_val), &my_charset_bin);
2404
  for (uint f= 0; f < table->s->fields; f++)
2405
  {
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2406
    /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */
2407
    char buf[2000];
2408
    Field *field;
2409
    void* ptr;
pekka@mysql.com's avatar
pekka@mysql.com committed
2410
    NdbValue value;
2411

2412
    buf[0]= 0;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2413
    field= table->field[f];
pekka@mysql.com's avatar
pekka@mysql.com committed
2414
    if (!(value= m_value[f]).ptr)
2415
    {
2416 2417
      my_snprintf(buf, sizeof(buf), "not read");
      goto print_value;
2418
    }
2419

2420
    ptr= field->ptr;
pekka@mysql.com's avatar
pekka@mysql.com committed
2421 2422

    if (! (field->flags & BLOB_FLAG))
2423
    {
pekka@mysql.com's avatar
pekka@mysql.com committed
2424 2425
      if (value.rec->isNULL())
      {
2426 2427
        my_snprintf(buf, sizeof(buf), "NULL");
        goto print_value;
pekka@mysql.com's avatar
pekka@mysql.com committed
2428
      }
2429 2430 2431 2432 2433
      type.length(0);
      val.length(0);
      field->sql_type(type);
      field->val_str(&val);
      my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr());
pekka@mysql.com's avatar
pekka@mysql.com committed
2434 2435 2436
    }
    else
    {
2437
      NdbBlob *ndb_blob= value.blob;
2438
      bool isNull= TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
2439 2440
      ndb_blob->getNull(isNull);
      if (isNull) {
2441 2442
        my_snprintf(buf, sizeof(buf), "NULL");
        goto print_value;
pekka@mysql.com's avatar
pekka@mysql.com committed
2443
      }
2444
    }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2445

2446
print_value:
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2447
    DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf));
2448 2449 2450 2451 2452 2453 2454 2455
  }
#endif
  DBUG_VOID_RETURN;
}


int ha_ndbcluster::index_init(uint index)
{
2456
  DBUG_ENTER("ha_ndbcluster::index_init");
2457 2458 2459 2460 2461 2462 2463
  DBUG_PRINT("enter", ("index: %u", index));
  DBUG_RETURN(handler::index_init(index));
}


int ha_ndbcluster::index_end()
{
2464
  DBUG_ENTER("ha_ndbcluster::index_end");
2465
  DBUG_RETURN(close_scan());
2466 2467
}

2468 2469 2470 2471 2472 2473 2474 2475
/**
 * Check if key contains null
 */
static
int
check_null_in_key(const KEY* key_info, const byte *key, uint key_len)
{
  KEY_PART_INFO *curr_part, *end_part;
2476
  const byte* end_ptr= key + key_len;
2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489
  curr_part= key_info->key_part;
  end_part= curr_part + key_info->key_parts;
  

  for (; curr_part != end_part && key < end_ptr; curr_part++)
  {
    if(curr_part->null_bit && *key)
      return 1;

    key += curr_part->store_length;
  }
  return 0;
}
2490 2491

int ha_ndbcluster::index_read(byte *buf,
2492 2493
                              const byte *key, uint key_len, 
                              enum ha_rkey_function find_flag)
2494
{
2495
  DBUG_ENTER("ha_ndbcluster::index_read");
2496 2497 2498
  DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", 
                       active_index, key_len, find_flag));

joreland@mysql.com's avatar
joreland@mysql.com committed
2499
  int error;
2500 2501
  ndb_index_type type= get_index_type(active_index);
  const KEY* key_info= table->key_info+active_index;
joreland@mysql.com's avatar
joreland@mysql.com committed
2502 2503 2504 2505 2506
  switch (type){
  case PRIMARY_KEY_ORDERED_INDEX:
  case PRIMARY_KEY_INDEX:
    if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len)
    {
2507
      if(m_active_cursor && (error= close_scan()))
2508
        DBUG_RETURN(error);
joreland@mysql.com's avatar
joreland@mysql.com committed
2509 2510 2511 2512 2513 2514 2515 2516 2517
      DBUG_RETURN(pk_read(key, key_len, buf));
    }
    else if (type == PRIMARY_KEY_INDEX)
    {
      DBUG_RETURN(1);
    }
    break;
  case UNIQUE_ORDERED_INDEX:
  case UNIQUE_INDEX:
2518
    if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len &&
2519
        !check_null_in_key(key_info, key, key_len))
joreland@mysql.com's avatar
joreland@mysql.com committed
2520
    {
2521
      if(m_active_cursor && (error= close_scan()))
2522
        DBUG_RETURN(error);
joreland@mysql.com's avatar
joreland@mysql.com committed
2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533
      DBUG_RETURN(unique_index_read(key, key_len, buf));
    }
    else if (type == UNIQUE_INDEX)
    {
      DBUG_RETURN(1);
    }
    break;
  case ORDERED_INDEX:
    break;
  default:
  case UNDEFINED_INDEX:
2534
    DBUG_ASSERT(FALSE);
2535
    DBUG_RETURN(1);
joreland@mysql.com's avatar
joreland@mysql.com committed
2536 2537 2538
    break;
  }
  
2539
  key_range start_key;
2540 2541 2542
  start_key.key= key;
  start_key.length= key_len;
  start_key.flag= find_flag;
2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554
  bool descending= FALSE;
  switch (find_flag) {
  case HA_READ_KEY_OR_PREV:
  case HA_READ_BEFORE_KEY:
  case HA_READ_PREFIX_LAST:
  case HA_READ_PREFIX_LAST_OR_PREV:
    descending= TRUE;
    break;
  default:
    break;
  }
  error= ordered_index_scan(&start_key, 0, TRUE, descending, buf);  
joreland@mysql.com's avatar
joreland@mysql.com committed
2555
  DBUG_RETURN(error == HA_ERR_END_OF_FILE ? HA_ERR_KEY_NOT_FOUND : error);
2556 2557 2558 2559
}


int ha_ndbcluster::index_read_idx(byte *buf, uint index_no, 
2560 2561
                              const byte *key, uint key_len, 
                              enum ha_rkey_function find_flag)
2562
{
2563
  statistic_increment(current_thd->status_var.ha_read_key_count, &LOCK_status);
2564
  DBUG_ENTER("ha_ndbcluster::index_read_idx");
2565 2566 2567 2568 2569 2570 2571 2572
  DBUG_PRINT("enter", ("index_no: %u, key_len: %u", index_no, key_len));  
  index_init(index_no);  
  DBUG_RETURN(index_read(buf, key, key_len, find_flag));
}


int ha_ndbcluster::index_next(byte *buf)
{
2573
  DBUG_ENTER("ha_ndbcluster::index_next");
2574
  statistic_increment(current_thd->status_var.ha_read_next_count,
2575
                      &LOCK_status);
2576
  DBUG_RETURN(next_result(buf));
2577 2578 2579 2580 2581
}


int ha_ndbcluster::index_prev(byte *buf)
{
2582
  DBUG_ENTER("ha_ndbcluster::index_prev");
2583
  statistic_increment(current_thd->status_var.ha_read_prev_count,
2584
                      &LOCK_status);
2585
  DBUG_RETURN(next_result(buf));
2586 2587 2588 2589 2590
}


int ha_ndbcluster::index_first(byte *buf)
{
2591
  DBUG_ENTER("ha_ndbcluster::index_first");
2592
  statistic_increment(current_thd->status_var.ha_read_first_count,
2593
                      &LOCK_status);
2594 2595 2596
  // Start the ordered index scan and fetch the first row

  // Only HA_READ_ORDER indexes get called by index_first
2597
  DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf));
2598 2599 2600 2601 2602
}


int ha_ndbcluster::index_last(byte *buf)
{
2603
  DBUG_ENTER("ha_ndbcluster::index_last");
2604
  statistic_increment(current_thd->status_var.ha_read_last_count,&LOCK_status);
2605
  DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf));
2606 2607
}

2608 2609 2610 2611 2612
int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len)
{
  DBUG_ENTER("ha_ndbcluster::index_read_last");
  DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
}
2613

2614 2615
inline
int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
2616 2617 2618
                                           const key_range *end_key,
                                           bool eq_r, bool sorted,
                                           byte* buf)
2619
{
2620
  KEY* key_info;
2621 2622
  int error= 1; 
  DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
2623
  DBUG_PRINT("info", ("eq_r: %d, sorted: %d", eq_r, sorted));
2624

2625
  switch (get_index_type(active_index)){
2626
  case PRIMARY_KEY_ORDERED_INDEX:
2627
  case PRIMARY_KEY_INDEX:
2628 2629
    key_info= table->key_info + active_index;
    if (start_key && 
2630 2631
        start_key->length == key_info->key_length &&
        start_key->flag == HA_READ_KEY_EXACT)
2632
    {
2633
      if(m_active_cursor && (error= close_scan()))
2634
        DBUG_RETURN(error);
2635 2636 2637
      error= pk_read(start_key->key, start_key->length, buf);      
      DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
    }
2638
    break;
2639
  case UNIQUE_ORDERED_INDEX:
2640
  case UNIQUE_INDEX:
2641
    key_info= table->key_info + active_index;
2642
    if (start_key && start_key->length == key_info->key_length &&
2643 2644
        start_key->flag == HA_READ_KEY_EXACT && 
        !check_null_in_key(key_info, start_key->key, start_key->length))
2645
    {
2646
      if(m_active_cursor && (error= close_scan()))
2647
        DBUG_RETURN(error);
2648 2649 2650
      error= unique_index_read(start_key->key, start_key->length, buf);
      DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
    }
2651 2652 2653 2654
    break;
  default:
    break;
  }
2655 2656

  // Start the ordered index scan and fetch the first row
2657
  error= ordered_index_scan(start_key, end_key, sorted, FALSE, buf);
2658 2659 2660
  DBUG_RETURN(error);
}

2661

joreland@mysql.com's avatar
joreland@mysql.com committed
2662
int ha_ndbcluster::read_range_first(const key_range *start_key,
2663 2664
                                    const key_range *end_key,
                                    bool eq_r, bool sorted)
joreland@mysql.com's avatar
joreland@mysql.com committed
2665 2666 2667 2668 2669
{
  byte* buf= table->record[0];
  DBUG_ENTER("ha_ndbcluster::read_range_first");
  
  DBUG_RETURN(read_range_first_to_buf(start_key,
2670 2671 2672 2673
                                      end_key,
                                      eq_r, 
                                      sorted,
                                      buf));
joreland@mysql.com's avatar
joreland@mysql.com committed
2674 2675
}

2676
int ha_ndbcluster::read_range_next()
2677 2678 2679 2680 2681 2682
{
  DBUG_ENTER("ha_ndbcluster::read_range_next");
  DBUG_RETURN(next_result(table->record[0]));
}


2683 2684
int ha_ndbcluster::rnd_init(bool scan)
{
2685
  NdbScanOperation *cursor= m_active_cursor;
2686 2687
  DBUG_ENTER("rnd_init");
  DBUG_PRINT("enter", ("scan: %d", scan));
2688
  // Check if scan is to be restarted
mskold@mysql.com's avatar
mskold@mysql.com committed
2689 2690 2691 2692
  if (cursor)
  {
    if (!scan)
      DBUG_RETURN(1);
2693 2694 2695 2696 2697
    if(cursor->restart(m_force_send) != 0)
    {
      DBUG_ASSERT(0);
      DBUG_RETURN(-1);
    }
mskold@mysql.com's avatar
mskold@mysql.com committed
2698
  }
2699
  index_init(table->s->primary_key);
2700 2701 2702
  DBUG_RETURN(0);
}

2703 2704
int ha_ndbcluster::close_scan()
{
2705
  NdbTransaction *trans= m_active_trans;
2706 2707
  DBUG_ENTER("close_scan");

2708 2709
  m_multi_cursor= 0;
  if (!m_active_cursor && !m_multi_cursor)
2710 2711
    DBUG_RETURN(1);

2712
  NdbScanOperation *cursor= m_active_cursor ? m_active_cursor : m_multi_cursor;
2713
  
2714
  if (m_ops_pending)
2715 2716 2717 2718 2719
  {
    /*
      Take over any pending transactions to the 
      deleteing/updating transaction before closing the scan    
    */
2720
    DBUG_PRINT("info", ("ops_pending: %d", m_ops_pending));    
2721
    if (execute_no_commit(this,trans) != 0) {
2722
      no_uncommitted_rows_execute_failure();
2723
      DBUG_RETURN(ndb_err(trans));
2724
    }
2725
    m_ops_pending= 0;
2726 2727
  }
  
2728
  cursor->close(m_force_send, TRUE);
2729
  m_active_cursor= m_multi_cursor= NULL;
mskold@mysql.com's avatar
mskold@mysql.com committed
2730
  DBUG_RETURN(0);
2731
}
2732 2733 2734 2735

int ha_ndbcluster::rnd_end()
{
  DBUG_ENTER("rnd_end");
2736
  DBUG_RETURN(close_scan());
2737 2738 2739 2740 2741 2742
}


int ha_ndbcluster::rnd_next(byte *buf)
{
  DBUG_ENTER("rnd_next");
2743
  statistic_increment(current_thd->status_var.ha_read_rnd_next_count,
2744
                      &LOCK_status);
2745

2746
  if (!m_active_cursor)
2747 2748
    DBUG_RETURN(full_table_scan(buf));
  DBUG_RETURN(next_result(buf));
2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761
}


/*
  An "interesting" record has been found and it's pk 
  retrieved by calling position
  Now it's time to read the record from db once 
  again
*/

int ha_ndbcluster::rnd_pos(byte *buf, byte *pos)
{
  DBUG_ENTER("rnd_pos");
2762
  statistic_increment(current_thd->status_var.ha_read_rnd_count,
2763
                      &LOCK_status);
2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783
  // The primary key for the record is stored in pos
  // Perform a pk_read using primary key "index"
  DBUG_RETURN(pk_read(pos, ref_length, buf));  
}


/*
  Store the primary key of this record in ref 
  variable, so that the row can be retrieved again later
  using "reference" in rnd_pos
*/

void ha_ndbcluster::position(const byte *record)
{
  KEY *key_info;
  KEY_PART_INFO *key_part;
  KEY_PART_INFO *end;
  byte *buff;
  DBUG_ENTER("position");

2784
  if (table->s->primary_key != MAX_KEY) 
2785
  {
2786
    key_info= table->key_info + table->s->primary_key;
2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809
    key_part= key_info->key_part;
    end= key_part + key_info->key_parts;
    buff= ref;
    
    for (; key_part != end; key_part++) 
    {
      if (key_part->null_bit) {
        /* Store 0 if the key part is a NULL part */      
        if (record[key_part->null_offset]
            & key_part->null_bit) {
          *buff++= 1;
          continue;
        }      
        *buff++= 0;
      }
      memcpy(buff, record + key_part->offset, key_part->length);
      buff += key_part->length;
    }
  } 
  else 
  {
    // No primary key, get hidden key
    DBUG_PRINT("info", ("Getting hidden key"));
2810
    int hidden_no= table->s->fields;
2811
    const NdbRecAttr* rec= m_value[hidden_no].rec;
2812 2813
    memcpy(ref, (const void*)rec->aRef(), ref_length);
#ifndef DBUG_OFF
joreland@mysql.com's avatar
joreland@mysql.com committed
2814
    const NDBTAB *tab= (const NDBTAB *) m_table;  
2815 2816 2817 2818 2819
    const NDBCOL *hidden_col= tab->getColumn(hidden_no);
    DBUG_ASSERT(hidden_col->getPrimaryKey() && 
                hidden_col->getAutoIncrement() &&
                rec != NULL && 
                ref_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH);
2820
#endif
2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839
  }
  
  DBUG_DUMP("ref", (char*)ref, ref_length);
  DBUG_VOID_RETURN;
}


void ha_ndbcluster::info(uint flag)
{
  DBUG_ENTER("info");
  DBUG_PRINT("enter", ("flag: %d", flag));
  
  if (flag & HA_STATUS_POS)
    DBUG_PRINT("info", ("HA_STATUS_POS"));
  if (flag & HA_STATUS_NO_LOCK)
    DBUG_PRINT("info", ("HA_STATUS_NO_LOCK"));
  if (flag & HA_STATUS_TIME)
    DBUG_PRINT("info", ("HA_STATUS_TIME"));
  if (flag & HA_STATUS_VARIABLE)
2840
  {
2841
    DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
2842 2843
    if (m_table_info)
    {
2844
      if (m_ha_not_exact_count)
2845
        records= 100;
2846
      else
2847
        records_update();
2848 2849 2850
    }
    else
    {
2851 2852 2853
      if ((my_errno= check_ndb_connection()))
        DBUG_VOID_RETURN;
      Ndb *ndb= get_ndb();
2854 2855
      struct Ndb_statistics stat;
      if (current_thd->variables.ndb_use_exact_count &&
2856
          ndb_get_table_statistics(ndb, m_tabname, &stat) == 0)
2857
      {
2858 2859 2860
        mean_rec_length= stat.row_size;
        data_file_length= stat.fragment_memory;
        records= stat.row_count;
2861 2862 2863
      }
      else
      {
2864 2865
        mean_rec_length= 0;
        records= 100;
2866
      }
2867
    }
2868
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2869 2870 2871 2872 2873
  if (flag & HA_STATUS_CONST)
  {
    DBUG_PRINT("info", ("HA_STATUS_CONST"));
    set_rec_per_key();
  }
2874
  if (flag & HA_STATUS_ERRKEY)
2875
  {
2876
    DBUG_PRINT("info", ("HA_STATUS_ERRKEY"));
2877
    errkey= m_dupkey;
2878
  }
2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896
  if (flag & HA_STATUS_AUTO)
    DBUG_PRINT("info", ("HA_STATUS_AUTO"));
  DBUG_VOID_RETURN;
}


int ha_ndbcluster::extra(enum ha_extra_function operation)
{
  DBUG_ENTER("extra");
  switch (operation) {
  case HA_EXTRA_NORMAL:              /* Optimize for space (def) */
    DBUG_PRINT("info", ("HA_EXTRA_NORMAL"));
    break;
  case HA_EXTRA_QUICK:                 /* Optimize for speed */
    DBUG_PRINT("info", ("HA_EXTRA_QUICK"));
    break;
  case HA_EXTRA_RESET:                 /* Reset database to after open */
    DBUG_PRINT("info", ("HA_EXTRA_RESET"));
2897 2898
    DBUG_PRINT("info", ("Clearing condition stack"));
    cond_clear();
2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967
    break;
  case HA_EXTRA_CACHE:                 /* Cash record in HA_rrnd() */
    DBUG_PRINT("info", ("HA_EXTRA_CACHE"));
    break;
  case HA_EXTRA_NO_CACHE:              /* End cacheing of records (def) */
    DBUG_PRINT("info", ("HA_EXTRA_NO_CACHE"));
    break;
  case HA_EXTRA_NO_READCHECK:          /* No readcheck on update */
    DBUG_PRINT("info", ("HA_EXTRA_NO_READCHECK"));
    break;
  case HA_EXTRA_READCHECK:             /* Use readcheck (def) */
    DBUG_PRINT("info", ("HA_EXTRA_READCHECK"));
    break;
  case HA_EXTRA_KEYREAD:               /* Read only key to database */
    DBUG_PRINT("info", ("HA_EXTRA_KEYREAD"));
    break;
  case HA_EXTRA_NO_KEYREAD:            /* Normal read of records (def) */
    DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD"));
    break;
  case HA_EXTRA_NO_USER_CHANGE:        /* No user is allowed to write */
    DBUG_PRINT("info", ("HA_EXTRA_NO_USER_CHANGE"));
    break;
  case HA_EXTRA_KEY_CACHE:
    DBUG_PRINT("info", ("HA_EXTRA_KEY_CACHE"));
    break;
  case HA_EXTRA_NO_KEY_CACHE:
    DBUG_PRINT("info", ("HA_EXTRA_NO_KEY_CACHE"));
    break;
  case HA_EXTRA_WAIT_LOCK:            /* Wait until file is avalably (def) */
    DBUG_PRINT("info", ("HA_EXTRA_WAIT_LOCK"));
    break;
  case HA_EXTRA_NO_WAIT_LOCK:         /* If file is locked, return quickly */
    DBUG_PRINT("info", ("HA_EXTRA_NO_WAIT_LOCK"));
    break;
  case HA_EXTRA_WRITE_CACHE:           /* Use write cache in ha_write() */
    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CACHE"));
    break;
  case HA_EXTRA_FLUSH_CACHE:           /* flush write_record_cache */
    DBUG_PRINT("info", ("HA_EXTRA_FLUSH_CACHE"));
    break;
  case HA_EXTRA_NO_KEYS:               /* Remove all update of keys */
    DBUG_PRINT("info", ("HA_EXTRA_NO_KEYS"));
    break;
  case HA_EXTRA_KEYREAD_CHANGE_POS:         /* Keyread, but change pos */
    DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_CHANGE_POS")); /* xxxxchk -r must be used */
    break;                                  
  case HA_EXTRA_REMEMBER_POS:          /* Remember pos for next/prev */
    DBUG_PRINT("info", ("HA_EXTRA_REMEMBER_POS"));
    break;
  case HA_EXTRA_RESTORE_POS:
    DBUG_PRINT("info", ("HA_EXTRA_RESTORE_POS"));
    break;
  case HA_EXTRA_REINIT_CACHE:          /* init cache from current record */
    DBUG_PRINT("info", ("HA_EXTRA_REINIT_CACHE"));
    break;
  case HA_EXTRA_FORCE_REOPEN:          /* Datafile have changed on disk */
    DBUG_PRINT("info", ("HA_EXTRA_FORCE_REOPEN"));
    break;
  case HA_EXTRA_FLUSH:                 /* Flush tables to disk */
    DBUG_PRINT("info", ("HA_EXTRA_FLUSH"));
    break;
  case HA_EXTRA_NO_ROWS:               /* Don't write rows */
    DBUG_PRINT("info", ("HA_EXTRA_NO_ROWS"));
    break;
  case HA_EXTRA_RESET_STATE:           /* Reset positions */
    DBUG_PRINT("info", ("HA_EXTRA_RESET_STATE"));
    break;
  case HA_EXTRA_IGNORE_DUP_KEY:       /* Dup keys don't rollback everything*/
    DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));
2968 2969 2970 2971 2972 2973
    if (current_thd->lex->sql_command == SQLCOM_REPLACE)
    {
      DBUG_PRINT("info", ("Turning ON use of write instead of insert"));
      m_use_write= TRUE;
    } else 
    {
2974 2975
      DBUG_PRINT("info", ("Ignoring duplicate key"));
      m_ignore_dup_key= TRUE;
2976
    }
2977 2978 2979 2980
    break;
  case HA_EXTRA_NO_IGNORE_DUP_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY"));
    DBUG_PRINT("info", ("Turning OFF use of write instead of insert"));
2981
    m_use_write= FALSE;
2982
    m_ignore_dup_key= FALSE;
2983 2984
    break;
  case HA_EXTRA_RETRIEVE_ALL_COLS:    /* Retrieve all columns, not just those
2985 2986
                                         where field->query_id is the same as
                                         the current query id */
2987
    DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_ALL_COLS"));
2988
    m_retrieve_all_fields= TRUE;
2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000
    break;
  case HA_EXTRA_PREPARE_FOR_DELETE:
    DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_DELETE"));
    break;
  case HA_EXTRA_PREPARE_FOR_UPDATE:     /* Remove read cache if problems */
    DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_UPDATE"));
    break;
  case HA_EXTRA_PRELOAD_BUFFER_SIZE: 
    DBUG_PRINT("info", ("HA_EXTRA_PRELOAD_BUFFER_SIZE"));
    break;
  case HA_EXTRA_RETRIEVE_PRIMARY_KEY: 
    DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_PRIMARY_KEY"));
3001
    m_retrieve_primary_key= TRUE;
3002 3003 3004 3005 3006 3007
    break;
  case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: 
    DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_UNIQUE"));
    break;
  case HA_EXTRA_CHANGE_KEY_TO_DUP: 
    DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_DUP"));
3008 3009
  case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
    DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_PRESERVE_FIELDS"));
3010 3011 3012 3013 3014 3015 3016
    break;

  }
  
  DBUG_RETURN(0);
}

3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029
/* 
   Start of an insert, remember number of rows to be inserted, it will
   be used in write_row and get_autoincrement to send an optimal number
   of rows in each roundtrip to the server

   SYNOPSIS
   rows     number of rows to insert, 0 if unknown

*/

void ha_ndbcluster::start_bulk_insert(ha_rows rows)
{
  int bytes, batch;
joreland@mysql.com's avatar
joreland@mysql.com committed
3030
  const NDBTAB *tab= (const NDBTAB *) m_table;    
3031 3032

  DBUG_ENTER("start_bulk_insert");
pekka@mysql.com's avatar
pekka@mysql.com committed
3033
  DBUG_PRINT("enter", ("rows: %d", (int)rows));
3034
  
3035 3036
  m_rows_inserted= (ha_rows) 0;
  if (rows == (ha_rows) 0)
3037
  {
3038 3039
    /* We don't know how many will be inserted, guess */
    m_rows_to_insert= m_autoincrement_prefetch;
3040
  }
3041 3042
  else
    m_rows_to_insert= rows; 
3043 3044 3045 3046 3047 3048 3049 3050

  /* 
    Calculate how many rows that should be inserted
    per roundtrip to NDB. This is done in order to minimize the 
    number of roundtrips as much as possible. However performance will 
    degrade if too many bytes are inserted, thus it's limited by this 
    calculation.   
  */
3051
  const int bytesperbatch= 8192;
3052
  bytes= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns();
3053
  batch= bytesperbatch/bytes;
3054 3055
  batch= batch == 0 ? 1 : batch;
  DBUG_PRINT("info", ("batch: %d, bytes: %d", batch, bytes));
3056
  m_bulk_insert_rows= batch;
3057 3058 3059 3060 3061 3062 3063 3064 3065

  DBUG_VOID_RETURN;
}

/*
  End of an insert
 */
int ha_ndbcluster::end_bulk_insert()
{
3066 3067
  int error= 0;

3068
  DBUG_ENTER("end_bulk_insert");
3069
  // Check if last inserts need to be flushed
3070
  if (m_bulk_insert_not_flushed)
3071
  {
3072
    NdbTransaction *trans= m_active_trans;
3073 3074 3075
    // Send rows to NDB
    DBUG_PRINT("info", ("Sending inserts to NDB, "\
                        "rows_inserted:%d, bulk_insert_rows: %d", 
3076
                        (int) m_rows_inserted, (int) m_bulk_insert_rows)); 
3077
    m_bulk_insert_not_flushed= FALSE;
3078
    if (execute_no_commit(this,trans) != 0) {
3079
      no_uncommitted_rows_execute_failure();
3080
      my_errno= error= ndb_err(trans);
3081
    }
3082 3083
  }

3084 3085
  m_rows_inserted= (ha_rows) 0;
  m_rows_to_insert= (ha_rows) 1;
3086
  DBUG_RETURN(error);
3087 3088
}

3089 3090 3091 3092

int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size)
{
  DBUG_ENTER("extra_opt");
pekka@mysql.com's avatar
pekka@mysql.com committed
3093
  DBUG_PRINT("enter", ("cache_size: %lu", cache_size));
3094 3095 3096
  DBUG_RETURN(extra(operation));
}

3097 3098 3099 3100
static const char *ha_ndbcluster_exts[] = {
 ha_ndb_ext,
 NullS
};
3101

3102
const char** ha_ndbcluster::bas_ext() const
3103 3104 3105
{
  return ha_ndbcluster_exts;
}
3106 3107 3108 3109 3110 3111 3112 3113 3114

/*
  How many seeks it will take to read through the table
  This is to be comparable to the number returned by records_in_range so
  that we can decide if we should scan the table or use keys.
*/

double ha_ndbcluster::scan_time()
{
3115 3116 3117
  DBUG_ENTER("ha_ndbcluster::scan_time()");
  double res= rows2double(records*1000);
  DBUG_PRINT("exit", ("table: %s value: %f", 
3118
                      m_tabname, res));
3119
  DBUG_RETURN(res);
3120 3121 3122 3123 3124 3125 3126 3127 3128 3129
}


THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd,
                                          THR_LOCK_DATA **to,
                                          enum thr_lock_type lock_type)
{
  DBUG_ENTER("store_lock");
  if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK) 
  {
3130

3131 3132 3133
    /* If we are not doing a LOCK TABLE, then allow multiple
       writers */
    
3134 3135 3136
    /* Since NDB does not currently have table locks
       this is treated as a ordinary lock */

3137
    if ((lock_type >= TL_WRITE_ALLOW_WRITE &&
3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152
         lock_type <= TL_WRITE) && !thd->in_lock_tables)      
      lock_type= TL_WRITE_ALLOW_WRITE;
    
    /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
       MySQL would use the lock TL_READ_NO_INSERT on t2, and that
       would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
       to t2. Convert the lock to a normal read lock to allow
       concurrent inserts to t2. */
    
    if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
      lock_type= TL_READ;
    
    m_lock.type=lock_type;
  }
  *to++= &m_lock;
3153 3154

  DBUG_PRINT("exit", ("lock_type: %d", lock_type));
3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176
  
  DBUG_RETURN(to);
}

#ifndef DBUG_OFF
#define PRINT_OPTION_FLAGS(t) { \
      if (t->options & OPTION_NOT_AUTOCOMMIT) \
        DBUG_PRINT("thd->options", ("OPTION_NOT_AUTOCOMMIT")); \
      if (t->options & OPTION_BEGIN) \
        DBUG_PRINT("thd->options", ("OPTION_BEGIN")); \
      if (t->options & OPTION_TABLE_LOCK) \
        DBUG_PRINT("thd->options", ("OPTION_TABLE_LOCK")); \
}
#else
#define PRINT_OPTION_FLAGS(t)
#endif


/*
  As MySQL will execute an external lock for every new table it uses
  we can use this to start the transactions.
  If we are in auto_commit mode we just need to start a transaction
3177
  for the statement, this will be stored in thd_ndb.stmt.
3178
  If not, we have to start a master transaction if there doesn't exist
3179
  one from before, this will be stored in thd_ndb.all
3180 3181 3182
 
  When a table lock is held one transaction will be started which holds
  the table lock and for each statement a hupp transaction will be started  
3183
  If we are locking the table then:
3184
  - save the NdbDictionary::Table for easy access
3185 3186
  - save reference to table statistics
  - refresh list of the indexes for the table if needed (if altered)
3187 3188 3189 3190 3191
 */

int ha_ndbcluster::external_lock(THD *thd, int lock_type)
{
  int error=0;
3192
  NdbTransaction* trans= NULL;
3193 3194 3195 3196 3197 3198

  DBUG_ENTER("external_lock");
  /*
    Check that this handler instance has a connection
    set up to the Ndb object of thd
   */
3199
  if (check_ndb_connection(thd))
3200
    DBUG_RETURN(1);
3201

3202
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
3203
  Ndb *ndb= thd_ndb->ndb;
3204

3205 3206
  DBUG_PRINT("enter", ("thd: %x, thd_ndb: %x, thd_ndb->lock_count: %d",
                       thd, thd_ndb, thd_ndb->lock_count));
3207

3208 3209
  if (lock_type != F_UNLCK)
  {
3210
    DBUG_PRINT("info", ("lock_type != F_UNLCK"));
3211
    if (!thd_ndb->lock_count++)
3212 3213 3214 3215 3216
    {
      PRINT_OPTION_FLAGS(thd);
      if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN | OPTION_TABLE_LOCK))) 
      {
        // Autocommit transaction
3217
        DBUG_ASSERT(!thd_ndb->stmt);
3218 3219
        DBUG_PRINT("trans",("Starting transaction stmt"));      

3220
        trans= ndb->startTransaction();
3221
        if (trans == NULL)
3222
          ERR_RETURN(ndb->getNdbError());
3223
        no_uncommitted_rows_reset(thd);
3224 3225
        thd_ndb->stmt= trans;
        trans_register_ha(thd, FALSE, &ndbcluster_hton);
3226 3227 3228
      } 
      else 
      { 
3229
        if (!thd_ndb->all)
3230
        {
3231 3232 3233 3234
          // Not autocommit transaction
          // A "master" transaction ha not been started yet
          DBUG_PRINT("trans",("starting transaction, all"));
          
3235
          trans= ndb->startTransaction();
3236
          if (trans == NULL)
3237
            ERR_RETURN(ndb->getNdbError());
3238
          no_uncommitted_rows_reset(thd);
3239 3240
          thd_ndb->all= trans; 
          trans_register_ha(thd, TRUE, &ndbcluster_hton);
3241 3242 3243 3244 3245 3246 3247 3248

          /*
            If this is the start of a LOCK TABLE, a table look 
            should be taken on the table in NDB
           
            Check if it should be read or write lock
           */
          if (thd->options & (OPTION_TABLE_LOCK))
3249
          {
3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268
            //lockThisTable();
            DBUG_PRINT("info", ("Locking the table..." ));
          }

        }
      }
    }
    /*
      This is the place to make sure this handler instance
      has a started transaction.
     
      The transaction is started by the first handler on which 
      MySQL Server calls external lock
     
      Other handlers in the same stmt or transaction should use 
      the same NDB transaction. This is done by setting up the m_active_trans
      pointer to point to the NDB transaction. 
     */

3269 3270 3271
    // store thread specific data first to set the right context
    m_force_send=          thd->variables.ndb_force_send;
    m_ha_not_exact_count= !thd->variables.ndb_use_exact_count;
3272 3273
    m_autoincrement_prefetch= 
      (ha_rows) thd->variables.ndb_autoincrement_prefetch_sz;
3274 3275 3276 3277 3278
    if (!thd->transaction.on)
      m_transaction_on= FALSE;
    else
      m_transaction_on= thd->variables.ndb_use_transactions;

3279
    m_active_trans= thd_ndb->all ? thd_ndb->all : thd_ndb->stmt;
3280
    DBUG_ASSERT(m_active_trans);
3281
    // Start of transaction
3282
    m_rows_changed= 0;
3283
    m_retrieve_all_fields= FALSE;
3284
    m_retrieve_primary_key= FALSE;
3285
    m_ops_pending= 0;
3286
    {
3287
      NDBDICT *dict= ndb->getDictionary();
3288 3289 3290
      const NDBTAB *tab;
      void *tab_info;
      if (!(tab= dict->getTable(m_tabname, &tab_info)))
3291
        ERR_RETURN(dict->getNdbError());
3292 3293 3294 3295 3296 3297
      DBUG_PRINT("info", ("Table schema version: %d", 
                          tab->getObjectVersion()));
      // Check if thread has stale local cache
      if (tab->getObjectStatus() == NdbDictionary::Object::Invalid)
      {
        invalidate_dictionary_cache(FALSE);
3298
        if (!(tab= dict->getTable(m_tabname, &tab_info)))
3299 3300 3301 3302 3303
          ERR_RETURN(dict->getNdbError());
        DBUG_PRINT("info", ("Table schema version: %d", 
                            tab->getObjectVersion()));
      }
      if (m_table != (void *)tab || m_table_version < tab->getObjectVersion())
3304 3305 3306 3307 3308 3309
      {
        /*
          The table has been altered, refresh the index list
        */
        build_index_list(ndb, table, ILBP_OPEN);  
        m_table= (void *)tab;
3310
        m_table_version = tab->getObjectVersion();
3311
      }
3312 3313
      m_table_info= tab_info;
    }
3314
    no_uncommitted_rows_init(thd);
3315 3316
  }
  else
3317
  {
3318
    DBUG_PRINT("info", ("lock_type == F_UNLCK"));
3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336

    if (ndb_cache_check_time && m_rows_changed)
    {
      DBUG_PRINT("info", ("Rows has changed and util thread is running"));
      if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
      {
        DBUG_PRINT("info", ("Add share to list of tables to be invalidated"));
        /* NOTE push_back allocates memory using transactions mem_root! */
        thd_ndb->changed_tables.push_back(m_share, &thd->transaction.mem_root);
      }

      pthread_mutex_lock(&m_share->mutex);
      DBUG_PRINT("info", ("Invalidating commit_count"));
      m_share->commit_count= 0;
      m_share->commit_count_lock++;
      pthread_mutex_unlock(&m_share->mutex);
    }

3337
    if (!--thd_ndb->lock_count)
3338 3339 3340 3341
    {
      DBUG_PRINT("trans", ("Last external_lock"));
      PRINT_OPTION_FLAGS(thd);

3342
      if (thd_ndb->stmt)
3343 3344 3345 3346 3347 3348 3349
      {
        /*
          Unlock is done without a transaction commit / rollback.
          This happens if the thread didn't update any rows
          We must in this case close the transaction to release resources
        */
        DBUG_PRINT("trans",("ending non-updating transaction"));
3350
        ndb->closeTransaction(m_active_trans);
3351
        thd_ndb->stmt= NULL;
3352 3353
      }
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
3354
    m_table_info= NULL;
3355

3356 3357 3358 3359 3360 3361 3362 3363 3364
    /*
      This is the place to make sure this handler instance
      no longer are connected to the active transaction.

      And since the handler is no longer part of the transaction 
      it can't have open cursors, ops or blobs pending.
    */
    m_active_trans= NULL;    

3365 3366
    if (m_active_cursor)
      DBUG_PRINT("warning", ("m_active_cursor != NULL"));
3367 3368
    m_active_cursor= NULL;

3369 3370 3371 3372
    if (m_multi_cursor)
      DBUG_PRINT("warning", ("m_multi_cursor != NULL"));
    m_multi_cursor= NULL;
    
3373
    if (m_blobs_pending)
3374
      DBUG_PRINT("warning", ("blobs_pending != 0"));
3375
    m_blobs_pending= 0;
3376
    
3377
    if (m_ops_pending)
3378
      DBUG_PRINT("warning", ("ops_pending != 0L"));
3379
    m_ops_pending= 0;
3380 3381 3382 3383 3384 3385 3386 3387
  }
  DBUG_RETURN(error);
}

/*
  When using LOCK TABLE's external_lock is only called when the actual
  TABLE LOCK is done.
  Under LOCK TABLES, each used tables will force a call to start_stmt.
joreland@mysql.com's avatar
joreland@mysql.com committed
3388 3389
  Ndb doesn't currently support table locks, and will do ordinary
  startTransaction for each transaction/statement.
3390 3391 3392 3393 3394 3395 3396 3397
*/

int ha_ndbcluster::start_stmt(THD *thd)
{
  int error=0;
  DBUG_ENTER("start_stmt");
  PRINT_OPTION_FLAGS(thd);

3398 3399
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  NdbTransaction *trans= thd_ndb->stmt;
3400
  if (!trans){
3401
    Ndb *ndb= thd_ndb->ndb;
3402
    DBUG_PRINT("trans",("Starting transaction stmt"));  
3403 3404

#if 0    
3405
    NdbTransaction *tablock_trans= thd_ndb->all;
joreland@mysql.com's avatar
joreland@mysql.com committed
3406
    DBUG_PRINT("info", ("tablock_trans: %x", (UintPtr)tablock_trans));
joreland@mysql.com's avatar
joreland@mysql.com committed
3407
    DBUG_ASSERT(tablock_trans);
3408
//    trans= ndb->hupp(tablock_trans);
3409
#endif
3410
    trans= ndb->startTransaction();
3411
    if (trans == NULL)
3412
      ERR_RETURN(ndb->getNdbError());
3413
    no_uncommitted_rows_reset(thd);
3414 3415
    thd_ndb->stmt= trans;
    trans_register_ha(thd, FALSE, &ndbcluster_hton);
3416 3417
  }
  m_active_trans= trans;
3418

3419
  // Start of statement
3420
  m_retrieve_all_fields= FALSE;
3421
  m_retrieve_primary_key= FALSE;
3422
  m_ops_pending= 0;    
3423 3424 3425 3426 3427 3428
  
  DBUG_RETURN(error);
}


/*
3429
  Commit a transaction started in NDB
3430 3431
 */

3432
int ndbcluster_commit(THD *thd, bool all)
3433 3434
{
  int res= 0;
3435 3436 3437
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NdbTransaction *trans= all ? thd_ndb->all : thd_ndb->stmt;
3438 3439 3440

  DBUG_ENTER("ndbcluster_commit");
  DBUG_PRINT("transaction",("%s",
3441
                            trans == thd_ndb->stmt ?
3442 3443 3444
                            "stmt" : "all"));
  DBUG_ASSERT(ndb && trans);

3445
  if (execute_commit(thd,trans) != 0)
3446 3447
  {
    const NdbError err= trans->getNdbError();
3448
    const NdbOperation *error_op= trans->getNdbErrorOperation();
3449
    ERR_PRINT(err);
3450
    res= ndb_to_mysql_error(&err);
3451
    if (res != -1)
3452
      ndbcluster_print_error(res, error_op);
3453
  }
3454
  ndb->closeTransaction(trans);
3455

3456 3457 3458 3459
  if(all)
    thd_ndb->all= NULL;
  else
    thd_ndb->stmt= NULL;
3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473

  /* Clear commit_count for tables changed by transaction */
  NDB_SHARE* share;
  List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
  while ((share= it++))
  {
    pthread_mutex_lock(&share->mutex);
    DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %d ", share->table_name, share->commit_count));
    share->commit_count= 0;
    share->commit_count_lock++;
    pthread_mutex_unlock(&share->mutex);
  }
  thd_ndb->changed_tables.empty();

3474 3475 3476 3477 3478 3479 3480 3481
  DBUG_RETURN(res);
}


/*
  Rollback a transaction started in NDB
 */

3482
int ndbcluster_rollback(THD *thd, bool all)
3483 3484
{
  int res= 0;
3485 3486 3487
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NdbTransaction *trans= all ? thd_ndb->all : thd_ndb->stmt;
3488 3489 3490

  DBUG_ENTER("ndbcluster_rollback");
  DBUG_PRINT("transaction",("%s",
3491
                            trans == thd_ndb->stmt ? 
3492 3493 3494
                            "stmt" : "all"));
  DBUG_ASSERT(ndb && trans);

3495
  if (trans->execute(NdbTransaction::Rollback) != 0)
3496 3497
  {
    const NdbError err= trans->getNdbError();
3498
    const NdbOperation *error_op= trans->getNdbErrorOperation();
3499 3500
    ERR_PRINT(err);     
    res= ndb_to_mysql_error(&err);
3501 3502
    if (res != -1) 
      ndbcluster_print_error(res, error_op);
3503 3504
  }
  ndb->closeTransaction(trans);
3505 3506 3507 3508 3509 3510

  if(all)
    thd_ndb->all= NULL;
  else
    thd_ndb->stmt= NULL;

3511 3512 3513
  /* Clear list of tables changed by transaction */
  thd_ndb->changed_tables.empty();

3514
  DBUG_RETURN(res);
3515 3516 3517 3518
}


/*
pekka@mysql.com's avatar
pekka@mysql.com committed
3519 3520 3521
  Define NDB column based on Field.
  Returns 0 or mysql error code.
  Not member of ha_ndbcluster because NDBCOL cannot be declared.
pekka@mysql.com's avatar
pekka@mysql.com committed
3522 3523 3524

  MySQL text types with character set "binary" are mapped to true
  NDB binary types without a character set.  This may change.
3525 3526
 */

pekka@mysql.com's avatar
pekka@mysql.com committed
3527 3528 3529
static int create_ndb_column(NDBCOL &col,
                             Field *field,
                             HA_CREATE_INFO *info)
3530
{
pekka@mysql.com's avatar
pekka@mysql.com committed
3531
  // Set name
msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
3532
  col.setName(field->field_name);
pekka@mysql.com's avatar
pekka@mysql.com committed
3533 3534
  // Get char set
  CHARSET_INFO *cs= field->charset();
pekka@mysql.com's avatar
pekka@mysql.com committed
3535 3536 3537 3538
  // Set type and sizes
  const enum enum_field_types mysql_type= field->real_type();
  switch (mysql_type) {
  // Numeric types
3539
  case MYSQL_TYPE_TINY:        
pekka@mysql.com's avatar
pekka@mysql.com committed
3540 3541 3542 3543 3544 3545
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Tinyunsigned);
    else
      col.setType(NDBCOL::Tinyint);
    col.setLength(1);
    break;
3546
  case MYSQL_TYPE_SHORT:
pekka@mysql.com's avatar
pekka@mysql.com committed
3547 3548 3549 3550 3551 3552
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Smallunsigned);
    else
      col.setType(NDBCOL::Smallint);
    col.setLength(1);
    break;
3553
  case MYSQL_TYPE_LONG:
pekka@mysql.com's avatar
pekka@mysql.com committed
3554 3555 3556 3557 3558 3559
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Unsigned);
    else
      col.setType(NDBCOL::Int);
    col.setLength(1);
    break;
3560
  case MYSQL_TYPE_INT24:       
pekka@mysql.com's avatar
pekka@mysql.com committed
3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Mediumunsigned);
    else
      col.setType(NDBCOL::Mediumint);
    col.setLength(1);
    break;
  case MYSQL_TYPE_LONGLONG:
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Bigunsigned);
    else
      col.setType(NDBCOL::Bigint);
    col.setLength(1);
3573 3574
    break;
  case MYSQL_TYPE_FLOAT:
pekka@mysql.com's avatar
pekka@mysql.com committed
3575 3576 3577
    col.setType(NDBCOL::Float);
    col.setLength(1);
    break;
3578
  case MYSQL_TYPE_DOUBLE:
pekka@mysql.com's avatar
pekka@mysql.com committed
3579 3580 3581
    col.setType(NDBCOL::Double);
    col.setLength(1);
    break;
3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601
  case MYSQL_TYPE_DECIMAL:    
    {
      Field_decimal *f= (Field_decimal*)field;
      uint precision= f->pack_length();
      uint scale= f->decimals();
      if (field->flags & UNSIGNED_FLAG)
      {
        col.setType(NDBCOL::Olddecimalunsigned);
        precision-= (scale > 0);
      }
      else
      {
        col.setType(NDBCOL::Olddecimal);
        precision-= 1 + (scale > 0);
      }
      col.setPrecision(precision);
      col.setScale(scale);
      col.setLength(1);
    }
    break;
3602 3603 3604
  case MYSQL_TYPE_NEWDECIMAL:    
    {
      Field_new_decimal *f= (Field_new_decimal*)field;
3605
      uint precision= f->precision;
3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619
      uint scale= f->decimals();
      if (field->flags & UNSIGNED_FLAG)
      {
        col.setType(NDBCOL::Decimalunsigned);
      }
      else
      {
        col.setType(NDBCOL::Decimal);
      }
      col.setPrecision(precision);
      col.setScale(scale);
      col.setLength(1);
    }
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
3620 3621 3622 3623 3624
  // Date types
  case MYSQL_TYPE_DATETIME:    
    col.setType(NDBCOL::Datetime);
    col.setLength(1);
    break;
3625 3626 3627 3628
  case MYSQL_TYPE_DATE: // ?
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
3629
  case MYSQL_TYPE_NEWDATE:
3630 3631 3632
    col.setType(NDBCOL::Date);
    col.setLength(1);
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
3633
  case MYSQL_TYPE_TIME:        
3634 3635 3636
    col.setType(NDBCOL::Time);
    col.setLength(1);
    break;
3637 3638 3639 3640 3641 3642 3643
  case MYSQL_TYPE_YEAR:
    col.setType(NDBCOL::Year);
    col.setLength(1);
    break;
  case MYSQL_TYPE_TIMESTAMP:
    col.setType(NDBCOL::Timestamp);
    col.setLength(1);
pekka@mysql.com's avatar
pekka@mysql.com committed
3644 3645 3646
    break;
  // Char types
  case MYSQL_TYPE_STRING:      
3647
    if (field->pack_length() == 0)
3648 3649 3650 3651
    {
      col.setType(NDBCOL::Bit);
      col.setLength(1);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
3652
    else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
3653
    {
pekka@mysql.com's avatar
pekka@mysql.com committed
3654
      col.setType(NDBCOL::Binary);
3655
      col.setLength(field->pack_length());
pekka@mysql.com's avatar
pekka@mysql.com committed
3656
    }
3657
    else
3658 3659 3660
    {
      col.setType(NDBCOL::Char);
      col.setCharset(cs);
3661
      col.setLength(field->pack_length());
3662
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
3663
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
3664 3665 3666 3667 3668 3669
  case MYSQL_TYPE_VAR_STRING: // ?
  case MYSQL_TYPE_VARCHAR:
    {
      Field_varstring* f= (Field_varstring*)field;
      if (f->length_bytes == 1)
      {
pekka@mysql.com's avatar
pekka@mysql.com committed
3670
        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
3671 3672 3673 3674 3675 3676 3677 3678
          col.setType(NDBCOL::Varbinary);
        else {
          col.setType(NDBCOL::Varchar);
          col.setCharset(cs);
        }
      }
      else if (f->length_bytes == 2)
      {
pekka@mysql.com's avatar
pekka@mysql.com committed
3679
        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690
          col.setType(NDBCOL::Longvarbinary);
        else {
          col.setType(NDBCOL::Longvarchar);
          col.setCharset(cs);
        }
      }
      else
      {
        return HA_ERR_UNSUPPORTED;
      }
      col.setLength(field->field_length);
pekka@mysql.com's avatar
pekka@mysql.com committed
3691
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
3692 3693 3694 3695
    break;
  // Blob types (all come in as MYSQL_TYPE_BLOB)
  mysql_type_tiny_blob:
  case MYSQL_TYPE_TINY_BLOB:
pekka@mysql.com's avatar
pekka@mysql.com committed
3696
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
3697
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
3698
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
3699
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
3700 3701
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
3702 3703 3704 3705 3706
    col.setInlineSize(256);
    // No parts
    col.setPartSize(0);
    col.setStripeSize(0);
    break;
3707
  //mysql_type_blob:
pekka@mysql.com's avatar
pekka@mysql.com committed
3708
  case MYSQL_TYPE_BLOB:    
pekka@mysql.com's avatar
pekka@mysql.com committed
3709
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
3710
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
3711
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
3712
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
3713 3714
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730
    // Use "<=" even if "<" is the exact condition
    if (field->max_length() <= (1 << 8))
      goto mysql_type_tiny_blob;
    else if (field->max_length() <= (1 << 16))
    {
      col.setInlineSize(256);
      col.setPartSize(2000);
      col.setStripeSize(16);
    }
    else if (field->max_length() <= (1 << 24))
      goto mysql_type_medium_blob;
    else
      goto mysql_type_long_blob;
    break;
  mysql_type_medium_blob:
  case MYSQL_TYPE_MEDIUM_BLOB:   
pekka@mysql.com's avatar
pekka@mysql.com committed
3731
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
3732
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
3733
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
3734
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
3735 3736
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
3737 3738 3739 3740 3741 3742
    col.setInlineSize(256);
    col.setPartSize(4000);
    col.setStripeSize(8);
    break;
  mysql_type_long_blob:
  case MYSQL_TYPE_LONG_BLOB:  
pekka@mysql.com's avatar
pekka@mysql.com committed
3743
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
3744
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
3745
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
3746
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
3747 3748
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761
    col.setInlineSize(256);
    col.setPartSize(8000);
    col.setStripeSize(4);
    break;
  // Other types
  case MYSQL_TYPE_ENUM:
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
  case MYSQL_TYPE_SET:         
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
3762 3763 3764 3765 3766 3767 3768 3769 3770
  case MYSQL_TYPE_BIT: {
    int no_of_bits= field->field_length*8 + ((Field_bit *) field)->bit_len;
    col.setType(NDBCOL::Bit);
    if (!no_of_bits)
      col.setLength(1);
      else
        col.setLength(no_of_bits);
    break;
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
3771 3772 3773 3774 3775 3776
  case MYSQL_TYPE_NULL:        
  case MYSQL_TYPE_GEOMETRY:
    goto mysql_type_unsupported;
  mysql_type_unsupported:
  default:
    return HA_ERR_UNSUPPORTED;
3777
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
3778 3779 3780 3781 3782 3783 3784 3785
  // Set nullable and pk
  col.setNullable(field->maybe_null());
  col.setPrimaryKey(field->flags & PRI_KEY_FLAG);
  // Set autoincrement
  if (field->flags & AUTO_INCREMENT_FLAG) 
  {
    col.setAutoIncrement(TRUE);
    ulonglong value= info->auto_increment_value ?
3786
      info->auto_increment_value : (ulonglong) 1;
pekka@mysql.com's avatar
pekka@mysql.com committed
3787 3788
    DBUG_PRINT("info", ("Autoincrement key, initial: %llu", value));
    col.setAutoIncrementInitialValue(value);
3789
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
3790
  else
3791
    col.setAutoIncrement(FALSE);
pekka@mysql.com's avatar
pekka@mysql.com committed
3792
  return 0;
3793 3794 3795 3796 3797 3798
}

/*
  Create a table in NDB Cluster
 */

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3799 3800
static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length)
{
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3801
  if (form->s->max_rows == (ha_rows) 0) /* default setting, don't set fragmentation */
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823
    return;
  /**
   * get the number of fragments right
   */
  uint no_fragments;
  {
#if MYSQL_VERSION_ID >= 50000
    uint acc_row_size= 25 + /*safety margin*/ 2;
#else
    uint acc_row_size= pk_length*4;
    /* add acc overhead */
    if (pk_length <= 8)  /* main page will set the limit */
      acc_row_size+= 25 + /*safety margin*/ 2;
    else                /* overflow page will set the limit */
      acc_row_size+= 4 + /*safety margin*/ 4;
#endif
    ulonglong acc_fragment_size= 512*1024*1024;
    ulonglong max_rows= form->s->max_rows;
#if MYSQL_VERSION_ID >= 50100
    no_fragments= (max_rows*acc_row_size)/acc_fragment_size+1;
#else
    no_fragments= ((max_rows*acc_row_size)/acc_fragment_size+1
3824
                   +1/*correct rounding*/)/2;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3825 3826 3827 3828 3829 3830 3831 3832 3833
#endif
  }
  {
    uint no_nodes= g_ndb_cluster_connection->no_db_nodes();
    NDBTAB::FragmentType ftype;
    if (no_fragments > 2*no_nodes)
    {
      ftype= NDBTAB::FragAllLarge;
      if (no_fragments > 4*no_nodes)
3834 3835
        push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
                     "Ndb might have problems storing the max amount of rows specified");
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3836 3837 3838 3839 3840 3841 3842 3843 3844
    }
    else if (no_fragments > no_nodes)
      ftype= NDBTAB::FragAllMedium;
    else
      ftype= NDBTAB::FragAllSmall;
    tab.setFragmentType(ftype);
  }
}

3845
int ha_ndbcluster::create(const char *name, 
3846 3847
                          TABLE *form, 
                          HA_CREATE_INFO *info)
3848 3849 3850
{
  NDBTAB tab;
  NDBCOL col;
joreland@mysql.com's avatar
joreland@mysql.com committed
3851
  uint pack_length, length, i, pk_length= 0;
3852 3853
  const void *data, *pack_data;
  char name2[FN_HEADLEN];
3854
  bool create_from_engine= (info->table_options & HA_CREATE_FROM_ENGINE);
3855
   
pekka@mysql.com's avatar
pekka@mysql.com committed
3856
  DBUG_ENTER("ha_ndbcluster::create");
3857 3858 3859
  DBUG_PRINT("enter", ("name: %s", name));
  fn_format(name2, name, "", "",2);       // Remove the .frm extension
  set_dbname(name2);
3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871
  set_tabname(name2);    

  if (create_from_engine)
  {
    /*
      Table alreay exists in NDB and frm file has been created by 
      caller.
      Do Ndb specific stuff, such as create a .ndb file
    */
    my_errno= write_ndb_file();
    DBUG_RETURN(my_errno);
  }
3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887

  DBUG_PRINT("table", ("name: %s", m_tabname));  
  tab.setName(m_tabname);
  tab.setLogging(!(info->options & HA_LEX_CREATE_TMP_TABLE));    
   
  // Save frm data for this table
  if (readfrm(name, &data, &length))
    DBUG_RETURN(1);
  if (packfrm(data, length, &pack_data, &pack_length))
    DBUG_RETURN(2);
  
  DBUG_PRINT("info", ("setFrm data=%x, len=%d", pack_data, pack_length));
  tab.setFrm(pack_data, pack_length);      
  my_free((char*)data, MYF(0));
  my_free((char*)pack_data, MYF(0));
  
3888
  for (i= 0; i < form->s->fields; i++) 
3889 3890 3891 3892
  {
    Field *field= form->field[i];
    DBUG_PRINT("info", ("name: %s, type: %u, pack_length: %d", 
                        field->field_name, field->real_type(),
3893
                        field->pack_length()));
3894
    if ((my_errno= create_ndb_column(col, field, info)))
pekka@mysql.com's avatar
pekka@mysql.com committed
3895
      DBUG_RETURN(my_errno);
3896
    tab.addColumn(col);
joreland@mysql.com's avatar
joreland@mysql.com committed
3897 3898
    if(col.getPrimaryKey())
      pk_length += (field->pack_length() + 3) / 4;
3899 3900 3901
  }
  
  // No primary key, create shadow key as 64 bit, auto increment  
3902
  if (form->s->primary_key == MAX_KEY) 
3903 3904 3905 3906 3907
  {
    DBUG_PRINT("info", ("Generating shadow key"));
    col.setName("$PK");
    col.setType(NdbDictionary::Column::Bigunsigned);
    col.setLength(1);
3908
    col.setNullable(FALSE);
3909 3910 3911
    col.setPrimaryKey(TRUE);
    col.setAutoIncrement(TRUE);
    tab.addColumn(col);
joreland@mysql.com's avatar
joreland@mysql.com committed
3912 3913 3914 3915
    pk_length += 2;
  }
  
  // Make sure that blob tables don't have to big part size
3916
  for (i= 0; i < form->s->fields; i++) 
joreland@mysql.com's avatar
joreland@mysql.com committed
3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927
  {
    /**
     * The extra +7 concists
     * 2 - words from pk in blob table
     * 5 - from extra words added by tup/dict??
     */
    switch (form->field[i]->real_type()) {
    case MYSQL_TYPE_BLOB:    
    case MYSQL_TYPE_MEDIUM_BLOB:   
    case MYSQL_TYPE_LONG_BLOB: 
    {
3928 3929
      NdbDictionary::Column * col= tab.getColumn(i);
      int size= pk_length + (col->getPartSize()+3)/4 + 7;
joreland@mysql.com's avatar
joreland@mysql.com committed
3930
      if(size > NDB_MAX_TUPLE_SIZE_IN_WORDS && 
3931
         (pk_length+7) < NDB_MAX_TUPLE_SIZE_IN_WORDS)
joreland@mysql.com's avatar
joreland@mysql.com committed
3932
      {
3933 3934
        size= NDB_MAX_TUPLE_SIZE_IN_WORDS - pk_length - 7;
        col->setPartSize(4*size);
joreland@mysql.com's avatar
joreland@mysql.com committed
3935 3936 3937 3938 3939 3940 3941 3942 3943 3944
      }
      /**
       * If size > NDB_MAX and pk_length+7 >= NDB_MAX
       *   then the table can't be created anyway, so skip
       *   changing part size, and have error later
       */ 
    }
    default:
      break;
    }
3945
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
3946 3947 3948

  ndb_set_fragmentation(tab, form, pk_length);

3949
  if ((my_errno= check_ndb_connection()))
3950 3951 3952
    DBUG_RETURN(my_errno);
  
  // Create the table in NDB     
3953 3954
  Ndb *ndb= get_ndb();
  NDBDICT *dict= ndb->getDictionary();
3955
  if (dict->createTable(tab) != 0) 
3956 3957 3958 3959 3960 3961 3962 3963
  {
    const NdbError err= dict->getNdbError();
    ERR_PRINT(err);
    my_errno= ndb_to_mysql_error(&err);
    DBUG_RETURN(my_errno);
  }
  DBUG_PRINT("info", ("Table %s/%s created successfully", 
                      m_dbname, m_tabname));
3964

3965
  // Create secondary indexes
3966
  my_errno= build_index_list(ndb, form, ILBP_CREATE);
3967

3968 3969 3970
  if (!my_errno)
    my_errno= write_ndb_file();

3971 3972 3973 3974
  DBUG_RETURN(my_errno);
}


3975
int ha_ndbcluster::create_ordered_index(const char *name, 
3976
                                        KEY *key_info)
3977
{
3978
  DBUG_ENTER("ha_ndbcluster::create_ordered_index");
3979
  DBUG_RETURN(create_index(name, key_info, FALSE));
3980 3981 3982
}

int ha_ndbcluster::create_unique_index(const char *name, 
3983
                                       KEY *key_info)
3984 3985
{

3986
  DBUG_ENTER("ha_ndbcluster::create_unique_index");
3987
  DBUG_RETURN(create_index(name, key_info, TRUE));
3988 3989 3990
}


3991 3992 3993 3994 3995
/*
  Create an index in NDB Cluster
 */

int ha_ndbcluster::create_index(const char *name, 
3996 3997
                                KEY *key_info,
                                bool unique)
3998
{
3999 4000
  Ndb *ndb= get_ndb();
  NdbDictionary::Dictionary *dict= ndb->getDictionary();
4001 4002 4003
  KEY_PART_INFO *key_part= key_info->key_part;
  KEY_PART_INFO *end= key_part + key_info->key_parts;
  
4004
  DBUG_ENTER("ha_ndbcluster::create_index");
4005
  DBUG_PRINT("enter", ("name: %s ", name));
4006

4007
  NdbDictionary::Index ndb_index(name);
4008
  if (unique)
4009 4010 4011 4012 4013
    ndb_index.setType(NdbDictionary::Index::UniqueHashIndex);
  else 
  {
    ndb_index.setType(NdbDictionary::Index::OrderedIndex);
    // TODO Only temporary ordered indexes supported
4014
    ndb_index.setLogging(FALSE); 
4015 4016 4017 4018 4019 4020 4021
  }
  ndb_index.setTable(m_tabname);

  for (; key_part != end; key_part++) 
  {
    Field *field= key_part->field;
    DBUG_PRINT("info", ("attr: %s", field->field_name));
msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
4022
    ndb_index.addColumnName(field->field_name);
4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039
  }
  
  if (dict->createIndex(ndb_index))
    ERR_RETURN(dict->getNdbError());

  // Success
  DBUG_PRINT("info", ("Created index %s", name));
  DBUG_RETURN(0);  
}


/*
  Rename a table in NDB Cluster
*/

int ha_ndbcluster::rename_table(const char *from, const char *to)
{
4040
  NDBDICT *dict;
4041
  char new_tabname[FN_HEADLEN];
4042 4043
  const NDBTAB *orig_tab;
  int result;
4044 4045

  DBUG_ENTER("ha_ndbcluster::rename_table");
4046
  DBUG_PRINT("info", ("Renaming %s to %s", from, to));
4047 4048 4049 4050
  set_dbname(from);
  set_tabname(from);
  set_tabname(to, new_tabname);

4051 4052 4053
  if (check_ndb_connection())
    DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION);

mskold@mysql.com's avatar
mskold@mysql.com committed
4054 4055
  Ndb *ndb= get_ndb();
  dict= ndb->getDictionary();
4056 4057
  if (!(orig_tab= dict->getTable(m_tabname)))
    ERR_RETURN(dict->getNdbError());
4058 4059 4060 4061 4062 4063 4064
  // Check if thread has stale local cache
  if (orig_tab->getObjectStatus() == NdbDictionary::Object::Invalid)
  {
    dict->removeCachedTable(m_tabname);
    if (!(orig_tab= dict->getTable(m_tabname)))
      ERR_RETURN(dict->getNdbError());
  }
4065 4066 4067
  m_table= (void *)orig_tab;
  // Change current database to that of target table
  set_dbname(to);
mskold@mysql.com's avatar
mskold@mysql.com committed
4068
  ndb->setDatabaseName(m_dbname);
4069
  if (!(result= alter_table_name(new_tabname)))
4070
  {
4071 4072
    // Rename .ndb file
    result= handler::rename_table(from, to);
4073
  }
4074

4075 4076 4077 4078 4079 4080 4081 4082
  DBUG_RETURN(result);
}


/*
  Rename a table in NDB Cluster using alter table
 */

4083
int ha_ndbcluster::alter_table_name(const char *to)
4084
{
4085 4086
  Ndb *ndb= get_ndb();
  NDBDICT *dict= ndb->getDictionary();
4087
  const NDBTAB *orig_tab= (const NDBTAB *) m_table;
4088 4089
  DBUG_ENTER("alter_table_name_table");

4090
  NdbDictionary::Table new_tab= *orig_tab;
4091 4092
  new_tab.setName(to);
  if (dict->alterTable(new_tab) != 0)
4093 4094 4095
    ERR_RETURN(dict->getNdbError());

  m_table= NULL;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4096
  m_table_info= NULL;
4097 4098 4099 4100 4101 4102
                                                                             
  DBUG_RETURN(0);
}


/*
4103 4104
  Delete table from NDB Cluster

4105 4106 4107 4108
 */

int ha_ndbcluster::delete_table(const char *name)
{
4109
  DBUG_ENTER("ha_ndbcluster::delete_table");
4110 4111 4112
  DBUG_PRINT("enter", ("name: %s", name));
  set_dbname(name);
  set_tabname(name);
4113

4114 4115
  if (check_ndb_connection())
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
4116 4117

  /* Call ancestor function to delete .ndb file */
4118
  handler::delete_table(name);
4119 4120
  
  /* Drop the table from NDB */
4121 4122 4123 4124 4125
  DBUG_RETURN(drop_table());
}


/*
4126
  Drop table in NDB Cluster
4127 4128 4129 4130
 */

int ha_ndbcluster::drop_table()
{
4131 4132
  Ndb *ndb= get_ndb();
  NdbDictionary::Dictionary *dict= ndb->getDictionary();
4133

4134 4135
  DBUG_ENTER("drop_table");
  DBUG_PRINT("enter", ("Deleting %s", m_tabname));
4136

4137
  release_metadata();
4138 4139
  if (dict->dropTable(m_tabname))
    ERR_RETURN(dict->getNdbError());
4140 4141 4142 4143
  DBUG_RETURN(0);
}


4144
ulonglong ha_ndbcluster::get_auto_increment()
4145
{  
4146 4147
  int cache_size;
  Uint64 auto_value;
4148 4149
  DBUG_ENTER("get_auto_increment");
  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
4150
  Ndb *ndb= get_ndb();
4151
   
4152
  if (m_rows_inserted > m_rows_to_insert)
4153
  {
4154 4155
    /* We guessed too low */
    m_rows_to_insert+= m_autoincrement_prefetch;
4156
  }
serg@serg.mylan's avatar
serg@serg.mylan committed
4157
  cache_size= 
4158
    (int)
4159
    (m_rows_to_insert - m_rows_inserted < m_autoincrement_prefetch) ?
4160
    m_rows_to_insert - m_rows_inserted 
4161
    : (m_rows_to_insert > m_autoincrement_prefetch) ? 
4162
    m_rows_to_insert 
4163
    : m_autoincrement_prefetch;
monty@mishka.local's avatar
monty@mishka.local committed
4164
  auto_value= NDB_FAILED_AUTO_INCREMENT;
4165 4166 4167 4168 4169 4170 4171 4172 4173
  uint retries= NDB_AUTO_INCREMENT_RETRIES;
  do {
    auto_value=
      (m_skip_auto_increment) ? 
      ndb->readAutoIncrementValue((const NDBTAB *) m_table)
      : ndb->getAutoIncrementValue((const NDBTAB *) m_table, cache_size);
  } while (auto_value == NDB_FAILED_AUTO_INCREMENT && 
           --retries &&
           ndb->getNdbError().status == NdbError::TemporaryError);
4174 4175
  if (auto_value == NDB_FAILED_AUTO_INCREMENT)
    ERR_RETURN(ndb->getNdbError());
4176
  DBUG_RETURN((longlong)auto_value);
4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188
}


/*
  Constructor for the NDB Cluster table handler 
 */

ha_ndbcluster::ha_ndbcluster(TABLE *table_arg):
  handler(table_arg),
  m_active_trans(NULL),
  m_active_cursor(NULL),
  m_table(NULL),
4189
  m_table_version(-1),
4190
  m_table_info(NULL),
4191
  m_table_flags(HA_REC_NOT_IN_SEQ |
4192 4193 4194 4195 4196
                HA_NULL_IN_KEY |
                HA_AUTO_PART_KEY |
                HA_NO_PREFIX_CHAR_KEYS |
                HA_NEED_READ_RANGE_BUFFER |
                HA_CAN_BIT_FIELD),
4197
  m_share(0),
4198
  m_use_write(FALSE),
4199
  m_ignore_dup_key(FALSE),
4200 4201
  m_primary_key_update(FALSE),
  m_retrieve_all_fields(FALSE),
4202
  m_retrieve_primary_key(FALSE),
4203 4204 4205
  m_rows_to_insert((ha_rows) 1),
  m_rows_inserted((ha_rows) 0),
  m_bulk_insert_rows((ha_rows) 1024),
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4206
  m_rows_changed((ha_rows) 0),
4207 4208 4209 4210 4211 4212
  m_bulk_insert_not_flushed(FALSE),
  m_ops_pending(0),
  m_skip_auto_increment(TRUE),
  m_blobs_pending(0),
  m_blobs_buffer(0),
  m_blobs_buffer_size(0),
4213 4214 4215
  m_dupkey((uint) -1),
  m_ha_not_exact_count(FALSE),
  m_force_send(TRUE),
4216
  m_autoincrement_prefetch((ha_rows) 32),
4217
  m_transaction_on(TRUE),
mskold@mysql.com's avatar
mskold@mysql.com committed
4218 4219
  m_cond_stack(NULL),
  m_multi_cursor(NULL)
4220
{
4221
  int i;
4222
 
4223 4224 4225 4226 4227
  DBUG_ENTER("ha_ndbcluster");

  m_tabname[0]= '\0';
  m_dbname[0]= '\0';

4228
  records= ~(ha_rows)0; // uninitialized
4229 4230
  block_size= 1024;

4231 4232
  for (i= 0; i < MAX_KEY; i++)
  {
4233 4234 4235 4236
    m_index[i].type= UNDEFINED_INDEX;
    m_index[i].unique_index= NULL;
    m_index[i].index= NULL;
    m_index[i].unique_index_attrid_map= NULL;
4237 4238
  }

4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250
  DBUG_VOID_RETURN;
}


/*
  Destructor for NDB Cluster table handler
 */

ha_ndbcluster::~ha_ndbcluster() 
{
  DBUG_ENTER("~ha_ndbcluster");

4251 4252
  if (m_share)
    free_share(m_share);
4253
  release_metadata();
4254 4255
  my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
  m_blobs_buffer= 0;
4256 4257

  // Check for open cursor/transaction
4258 4259
  if (m_active_cursor) {
  }
4260
  DBUG_ASSERT(m_active_cursor == NULL);
4261 4262
  if (m_active_trans) {
  }
4263 4264
  DBUG_ASSERT(m_active_trans == NULL);

4265 4266 4267 4268
  // Discard the condition stack
  DBUG_PRINT("info", ("Clearing condition stack"));
  cond_clear();

4269 4270 4271 4272
  DBUG_VOID_RETURN;
}


mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4273

4274 4275 4276 4277 4278 4279 4280 4281
/*
  Open a table for further use
  - fetch metadata for this table from NDB
  - check that table exists
*/

int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
{
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4282
  int res;
4283 4284 4285 4286 4287 4288 4289 4290
  KEY *key;
  DBUG_ENTER("open");
  DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d",
                       name, mode, test_if_locked));
  
  // Setup ref_length to make room for the whole 
  // primary key to be written in the ref variable
  
4291
  if (table->s->primary_key != MAX_KEY) 
4292
  {
4293
    key= table->key_info+table->s->primary_key;
4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304
    ref_length= key->key_length;
    DBUG_PRINT("info", (" ref_length: %d", ref_length));
  }
  // Init table lock structure 
  if (!(m_share=get_share(name)))
    DBUG_RETURN(1);
  thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0);
  
  set_dbname(name);
  set_tabname(name);
  
4305 4306
  if (check_ndb_connection()) {
    free_share(m_share); m_share= 0;
4307
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
4308
  }
4309
  
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4310 4311 4312
  res= get_metadata(name);
  if (!res)
    info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
4313

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4314
  DBUG_RETURN(res);
4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325
}


/*
  Close the table
  - release resources setup by open()
 */

int ha_ndbcluster::close(void)
{
  DBUG_ENTER("close");  
4326
  free_share(m_share); m_share= 0;
4327 4328 4329 4330 4331
  release_metadata();
  DBUG_RETURN(0);
}


4332
Thd_ndb* ha_ndbcluster::seize_thd_ndb()
4333
{
4334 4335
  Thd_ndb *thd_ndb;
  DBUG_ENTER("seize_thd_ndb");
4336

4337
  thd_ndb= new Thd_ndb();
4338 4339 4340
  thd_ndb->ndb->getDictionary()->set_local_table_data_size(
    sizeof(Ndb_local_table_statistics)
    );
4341
  if (thd_ndb->ndb->init(max_transactions) != 0)
4342
  {
4343
    ERR_PRINT(thd_ndb->ndb->getNdbError());
4344 4345 4346 4347 4348 4349
    /*
      TODO 
      Alt.1 If init fails because to many allocated Ndb 
      wait on condition for a Ndb object to be released.
      Alt.2 Seize/release from pool, wait until next release 
    */
4350 4351
    delete thd_ndb;
    thd_ndb= NULL;
4352
  }
4353
  DBUG_RETURN(thd_ndb);
4354 4355 4356
}


4357
void ha_ndbcluster::release_thd_ndb(Thd_ndb* thd_ndb)
4358
{
4359 4360
  DBUG_ENTER("release_thd_ndb");
  delete thd_ndb;
4361 4362 4363 4364 4365
  DBUG_VOID_RETURN;
}


/*
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
4366
  If this thread already has a Thd_ndb object allocated
4367
  in current THD, reuse it. Otherwise
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
4368
  seize a Thd_ndb object, assign it to current THD and use it.
4369 4370 4371
 
*/

4372
Ndb* check_ndb_in_thd(THD* thd)
4373
{
4374
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
4375
  if (!thd_ndb)
4376
  {
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
4377
    if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb()))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4378
      return NULL;
4379
    set_thd_ndb(thd, thd_ndb);
4380
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4381
  return thd_ndb->ndb;
4382 4383
}

magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
4384

4385

4386
int ha_ndbcluster::check_ndb_connection(THD* thd)
4387
{
4388
  Ndb *ndb;
4389 4390
  DBUG_ENTER("check_ndb_connection");
  
4391
  if (!(ndb= check_ndb_in_thd(thd)))
4392
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
4393
  ndb->setDatabaseName(m_dbname);
4394 4395 4396
  DBUG_RETURN(0);
}

magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
4397

4398
int ndbcluster_close_connection(THD *thd)
4399
{
4400
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
4401
  DBUG_ENTER("ndbcluster_close_connection");
4402 4403
  if (thd_ndb)
  {
4404
    ha_ndbcluster::release_thd_ndb(thd_ndb);
4405
    set_thd_ndb(thd, NULL); // not strictly required but does not hurt either
4406
  }
4407
  DBUG_RETURN(0);
4408 4409 4410 4411 4412 4413 4414
}


/*
  Try to discover one table from NDB
 */

4415
int ndbcluster_discover(THD* thd, const char *db, const char *name,
4416
                        const void** frmblob, uint* frmlen)
4417 4418 4419 4420
{
  uint len;
  const void* data;
  const NDBTAB* tab;
4421
  Ndb* ndb;
4422
  DBUG_ENTER("ndbcluster_discover");
4423
  DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); 
4424

4425 4426 4427
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);  
  ndb->setDatabaseName(db);
4428

4429
  NDBDICT* dict= ndb->getDictionary();
4430
  dict->set_local_table_data_size(sizeof(Ndb_local_table_statistics));
4431 4432 4433 4434 4435 4436 4437
  dict->invalidateTable(name);
  if (!(tab= dict->getTable(name)))
  {    
    const NdbError err= dict->getNdbError();
    if (err.code == 709)
      DBUG_RETURN(1);
    ERR_RETURN(err);
4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458
  }
  DBUG_PRINT("info", ("Found table %s", tab->getName()));
  
  len= tab->getFrmLength();  
  if (len == 0 || tab->getFrmData() == NULL)
  {
    DBUG_PRINT("No frm data found",
               ("Table is probably created via NdbApi")); 
    DBUG_RETURN(2);
  }
  
  if (unpackfrm(&data, &len, tab->getFrmData()))
    DBUG_RETURN(3);

  *frmlen= len;
  *frmblob= data;
  
  DBUG_RETURN(0);
}

/*
4459
  Check if a table exists in NDB
4460

4461
 */
4462

4463 4464 4465 4466 4467
int ndbcluster_table_exists(THD* thd, const char *db, const char *name)
{
  const NDBTAB* tab;
  Ndb* ndb;
  DBUG_ENTER("ndbcluster_table_exists");
4468
  DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
4469 4470

  if (!(ndb= check_ndb_in_thd(thd)))
4471
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
4472 4473 4474
  ndb->setDatabaseName(db);

  NDBDICT* dict= ndb->getDictionary();
4475
  dict->set_local_table_data_size(sizeof(Ndb_local_table_statistics));
4476 4477
  dict->invalidateTable(name);
  if (!(tab= dict->getTable(name)))
4478
  {
4479 4480 4481 4482 4483
    const NdbError err= dict->getNdbError();
    if (err.code == 709)
      DBUG_RETURN(0);
    ERR_RETURN(err);
  }
4484

4485 4486 4487 4488
  DBUG_PRINT("info", ("Found table %s", tab->getName()));
  DBUG_RETURN(1);
}

4489 4490


4491
extern "C" byte* tables_get_key(const char *entry, uint *length,
4492
                                my_bool not_used __attribute__((unused)))
4493 4494 4495 4496 4497 4498
{
  *length= strlen(entry);
  return (byte*) entry;
}


4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512
/*
  Drop a database in NDB Cluster
 */

int ndbcluster_drop_database(const char *path)
{
  DBUG_ENTER("ndbcluster_drop_database");
  THD *thd= current_thd;
  char dbname[FN_HEADLEN];
  Ndb* ndb;
  NdbDictionary::Dictionary::List list;
  uint i;
  char *tabname;
  List<char> drop_list;
4513
  int ret= 0;
4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539
  ha_ndbcluster::set_dbname(path, (char *)&dbname);
  DBUG_PRINT("enter", ("db: %s", dbname));
  
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  
  // List tables in NDB
  NDBDICT *dict= ndb->getDictionary();
  if (dict->listObjects(list, 
                        NdbDictionary::Object::UserTable) != 0)
    ERR_RETURN(dict->getNdbError());
  for (i= 0 ; i < list.count ; i++)
  {
    NdbDictionary::Dictionary::List::Element& t= list.elements[i];
    DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name));     
    
    // Add only tables that belongs to db
    if (my_strcasecmp(system_charset_info, t.database, dbname))
      continue;
    DBUG_PRINT("info", ("%s must be dropped", t.name));     
    drop_list.push_back(thd->strdup(t.name));
  }
  // Drop any tables belonging to database
  ndb->setDatabaseName(dbname);
  List_iterator_fast<char> it(drop_list);
  while ((tabname=it++))
4540
  {
4541
    if (dict->dropTable(tabname))
4542 4543 4544
    {
      const NdbError err= dict->getNdbError();
      if (err.code != 709)
4545 4546
      {
        ERR_PRINT(err);
4547
        ret= ndb_to_mysql_error(&err);
4548
      }
4549 4550 4551
    }
  }
  DBUG_RETURN(ret);      
4552 4553 4554
}


4555
int ndbcluster_find_files(THD *thd,const char *db,const char *path,
4556
                          const char *wild, bool dir, List<char> *files)
4557
{
4558 4559 4560
  DBUG_ENTER("ndbcluster_find_files");
  DBUG_PRINT("enter", ("db: %s", db));
  { // extra bracket to avoid gcc 2.95.3 warning
4561
  uint i;
4562
  Ndb* ndb;
4563
  char name[FN_REFLEN];
4564
  HASH ndb_tables, ok_tables;
4565
  NdbDictionary::Dictionary::List list;
4566 4567 4568 4569

  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

4570
  if (dir)
4571
    DBUG_RETURN(0); // Discover of databases not yet supported
4572

4573
  // List tables in NDB
4574
  NDBDICT *dict= ndb->getDictionary();
4575
  if (dict->listObjects(list, 
4576
                        NdbDictionary::Object::UserTable) != 0)
4577
    ERR_RETURN(dict->getNdbError());
4578

4579
  if (hash_init(&ndb_tables, system_charset_info,list.count,0,0,
4580
                (hash_get_key)tables_get_key,0,0))
4581 4582 4583 4584 4585 4586
  {
    DBUG_PRINT("error", ("Failed to init HASH ndb_tables"));
    DBUG_RETURN(-1);
  }

  if (hash_init(&ok_tables, system_charset_info,32,0,0,
4587
                (hash_get_key)tables_get_key,0,0))
4588 4589 4590 4591 4592 4593
  {
    DBUG_PRINT("error", ("Failed to init HASH ok_tables"));
    hash_free(&ndb_tables);
    DBUG_RETURN(-1);
  }  

4594 4595 4596
  for (i= 0 ; i < list.count ; i++)
  {
    NdbDictionary::Dictionary::List::Element& t= list.elements[i];
4597
    DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name));     
4598

4599 4600 4601
    // Add only tables that belongs to db
    if (my_strcasecmp(system_charset_info, t.database, db))
      continue;
4602

4603
    // Apply wildcard to list of tables in NDB
4604
    if (wild)
4605
    {
4606 4607
      if (lower_case_table_names)
      {
4608 4609
        if (wild_case_compare(files_charset_info, t.name, wild))
          continue;
4610 4611
      }
      else if (wild_compare(t.name,wild,0))
4612
        continue;
4613
    }
4614 4615
    DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", t.name));     
    my_hash_insert(&ndb_tables, (byte*)thd->strdup(t.name));
4616 4617
  }

4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632
  char *file_name;
  List_iterator<char> it(*files);
  List<char> delete_list;
  while ((file_name=it++))
  {
    DBUG_PRINT("info", ("%s", file_name));     
    if (hash_search(&ndb_tables, file_name, strlen(file_name)))
    {
      DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name));
      // File existed in NDB and as frm file, put in ok_tables list
      my_hash_insert(&ok_tables, (byte*)file_name);
      continue;
    }
    
    // File is not in NDB, check for .ndb file with this name
4633
    (void)strxnmov(name, FN_REFLEN, 
4634
                   mysql_data_home,"/",db,"/",file_name,ha_ndb_ext,NullS);
4635
    DBUG_PRINT("info", ("Check access for %s", name));
4636
    if (access(name, F_OK))
4637 4638 4639 4640
    {
      DBUG_PRINT("info", ("%s did not exist on disk", name));     
      // .ndb file did not exist on disk, another table type
      continue;
4641
    }
4642

4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653
    DBUG_PRINT("info", ("%s existed on disk", name));     
    // The .ndb file exists on disk, but it's not in list of tables in ndb
    // Verify that handler agrees table is gone.
    if (ndbcluster_table_exists(thd, db, file_name) == 0)    
    {
      DBUG_PRINT("info", ("NDB says %s does not exists", file_name));     
      it.remove();
      // Put in list of tables to remove from disk
      delete_list.push_back(thd->strdup(file_name));
    }
  }
4654

4655 4656 4657 4658
  // Check for new files to discover
  DBUG_PRINT("info", ("Checking for new files to discover"));       
  List<char> create_list;
  for (i= 0 ; i < ndb_tables.records ; i++)
4659
  {
4660 4661
    file_name= hash_element(&ndb_tables, i);
    if (!hash_search(&ok_tables, file_name, strlen(file_name)))
4662
    {
4663 4664 4665 4666 4667 4668
      DBUG_PRINT("info", ("%s must be discovered", file_name));       
      // File is in list of ndb tables and not in ok_tables
      // This table need to be created
      create_list.push_back(thd->strdup(file_name));
    }
  }
4669

4670 4671
  // Lock mutex before deleting and creating frm files
  pthread_mutex_lock(&LOCK_open);
4672

4673 4674 4675 4676 4677
  if (!global_read_lock)
  {
    // Delete old files
    List_iterator_fast<char> it3(delete_list);
    while ((file_name=it3++))
4678 4679
    {
      DBUG_PRINT("info", ("Remove table %s/%s", db, file_name));
4680 4681 4682 4683
      // Delete the table and all related files
      TABLE_LIST table_list;
      bzero((char*) &table_list,sizeof(table_list));
      table_list.db= (char*) db;
4684
      table_list.alias= table_list.table_name= (char*)file_name;
4685
      (void)mysql_rm_table_part2(thd, &table_list,
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4686 4687 4688 4689
                                                                 /* if_exists */ FALSE,
                                                                 /* drop_temporary */ FALSE,
                                                                 /* drop_view */ FALSE,
                                                                 /* dont_log_query*/ TRUE);
4690 4691
      /* Clear error message that is returned when table is deleted */
      thd->clear_error();
4692 4693 4694
    }
  }

4695 4696 4697 4698 4699
  // Create new files
  List_iterator_fast<char> it2(create_list);
  while ((file_name=it2++))
  {  
    DBUG_PRINT("info", ("Table %s need discovery", name));
4700
    if (ha_create_table_from_engine(thd, db, file_name, TRUE) == 0)
4701
      files->push_back(thd->strdup(file_name)); 
4702 4703 4704 4705 4706
  }

  pthread_mutex_unlock(&LOCK_open);      
  
  hash_free(&ok_tables);
4707
  hash_free(&ndb_tables);
4708
  } // extra bracket to avoid gcc 2.95.3 warning
4709
  DBUG_RETURN(0);    
4710 4711 4712 4713 4714 4715 4716 4717
}


/*
  Initialise all gloal variables before creating 
  a NDB Cluster table handler
 */

4718 4719 4720 4721 4722 4723 4724
/* Call back after cluster connect */
static int connect_callback()
{
  update_status_variables(g_ndb_cluster_connection);
  return 0;
}

4725 4726
handlerton *
ndbcluster_init()
4727
{
4728
  int res;
4729
  DBUG_ENTER("ndbcluster_init");
4730
  // Set connectstring if specified
4731 4732
  if (opt_ndbcluster_connectstring != 0)
    DBUG_PRINT("connectstring", ("%s", opt_ndbcluster_connectstring));     
4733
  if ((g_ndb_cluster_connection=
4734
       new Ndb_cluster_connection(opt_ndbcluster_connectstring)) == 0)
4735
  {
4736
    DBUG_PRINT("error",("Ndb_cluster_connection(%s)",
4737
                        opt_ndbcluster_connectstring));
4738
    goto ndbcluster_init_error;
4739
  }
4740

4741 4742 4743
  g_ndb_cluster_connection->set_optimized_node_selection
    (opt_ndb_optimized_node_selection);

4744
  // Create a Ndb object to open the connection  to NDB
4745 4746 4747 4748 4749
  if ( (g_ndb= new Ndb(g_ndb_cluster_connection, "sys")) == 0 )
  {
    DBUG_PRINT("error", ("failed to create global ndb object"));
    goto ndbcluster_init_error;
  }
4750
  g_ndb->getDictionary()->set_local_table_data_size(sizeof(Ndb_local_table_statistics));
4751 4752 4753
  if (g_ndb->init() != 0)
  {
    ERR_PRINT (g_ndb->getNdbError());
4754
    goto ndbcluster_init_error;
4755
  }
4756

4757
  if ((res= g_ndb_cluster_connection->connect(0,0,0)) == 0)
4758
  {
4759
    connect_callback();
4760
    DBUG_PRINT("info",("NDBCLUSTER storage engine at %s on port %d",
4761 4762
                       g_ndb_cluster_connection->get_connected_host(),
                       g_ndb_cluster_connection->get_connected_port()));
4763
    g_ndb_cluster_connection->wait_until_ready(10,3);
4764
  } 
4765
  else if(res == 1)
4766
  {
4767
    if (g_ndb_cluster_connection->start_connect_thread(connect_callback)) 
4768
    {
4769
      DBUG_PRINT("error", ("g_ndb_cluster_connection->start_connect_thread()"));
4770 4771
      goto ndbcluster_init_error;
    }
4772
#ifndef DBUG_OFF
4773 4774
    {
      char buf[1024];
4775
      DBUG_PRINT("info",
4776 4777 4778 4779
                 ("NDBCLUSTER storage engine not started, "
                  "will connect using %s",
                  g_ndb_cluster_connection->
                  get_connectstring(buf,sizeof(buf))));
4780
    }
4781
#endif
4782
  }
4783
  else
4784 4785 4786
  {
    DBUG_ASSERT(res == -1);
    DBUG_PRINT("error", ("permanent error"));
4787
    goto ndbcluster_init_error;
4788
  }
4789
  
4790 4791 4792
  (void) hash_init(&ndbcluster_open_tables,system_charset_info,32,0,0,
                   (hash_get_key) ndbcluster_get_key,0,0);
  pthread_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4793 4794
  pthread_mutex_init(&LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST);
  pthread_cond_init(&COND_ndb_util_thread, NULL);
4795

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4796 4797 4798 4799 4800 4801

  // Create utility thread
  pthread_t tmp;
  if (pthread_create(&tmp, &connection_attrib, ndb_util_thread_func, 0))
  {
    DBUG_PRINT("error", ("Could not create ndb utility thread"));
4802 4803 4804 4805
    hash_free(&ndbcluster_open_tables);
    pthread_mutex_destroy(&ndbcluster_mutex);
    pthread_mutex_destroy(&LOCK_ndb_util_thread);
    pthread_cond_destroy(&COND_ndb_util_thread);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4806 4807 4808
    goto ndbcluster_init_error;
  }
  
4809
  ndbcluster_inited= 1;
4810
  DBUG_RETURN(&ndbcluster_hton);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4811

4812
 ndbcluster_init_error:
4813 4814 4815 4816 4817 4818
  if(g_ndb)
    delete g_ndb;
  g_ndb= NULL;
  if (g_ndb_cluster_connection)
    delete g_ndb_cluster_connection;
  g_ndb_cluster_connection= NULL;
4819
  DBUG_RETURN(NULL);
4820 4821 4822 4823 4824 4825
}


/*
  End use of the NDB Cluster table handler
  - free all global variables allocated by 
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4826
    ndbcluster_init()
4827 4828 4829 4830 4831
*/

bool ndbcluster_end()
{
  DBUG_ENTER("ndbcluster_end");
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4832

4833 4834 4835
  if (!ndbcluster_inited)
    DBUG_RETURN(0);

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4836 4837 4838 4839 4840 4841
  // Kill ndb utility thread
  (void) pthread_mutex_lock(&LOCK_ndb_util_thread);  
  DBUG_PRINT("exit",("killing ndb util thread: %lx", ndb_util_thread));
  (void) pthread_cond_signal(&COND_ndb_util_thread);
  (void) pthread_mutex_unlock(&LOCK_ndb_util_thread);

4842 4843
  if(g_ndb)
    delete g_ndb;
4844
  g_ndb= NULL;
4845 4846 4847
  if (g_ndb_cluster_connection)
    delete g_ndb_cluster_connection;
  g_ndb_cluster_connection= NULL;
4848

4849 4850
  hash_free(&ndbcluster_open_tables);
  pthread_mutex_destroy(&ndbcluster_mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4851 4852
  pthread_mutex_destroy(&LOCK_ndb_util_thread);
  pthread_cond_destroy(&COND_ndb_util_thread);
4853 4854 4855 4856
  ndbcluster_inited= 0;
  DBUG_RETURN(0);
}

4857 4858 4859 4860 4861
/*
  Static error print function called from
  static handler method ndbcluster_commit
  and ndbcluster_rollback
*/
4862 4863

void ndbcluster_print_error(int error, const NdbOperation *error_op)
4864
{
4865 4866
  DBUG_ENTER("ndbcluster_print_error");
  TABLE tab;
4867
  const char *tab_name= (error_op) ? error_op->getTableName() : "";
4868
  tab.alias= (char *) tab_name;
4869
  ha_ndbcluster error_handler(&tab);
4870
  tab.file= &error_handler;
4871
  error_handler.print_error(error, MYF(0));
ndbdev@ndbmaster.mysql.com's avatar
ndbdev@ndbmaster.mysql.com committed
4872
  DBUG_VOID_RETURN;
4873
}
4874

4875 4876 4877
/**
 * Set a given location from full pathname to database name
 *
4878
 */
4879
void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
4880 4881 4882 4883
{
  char *end, *ptr;
  
  /* Scan name from the end */
4884 4885 4886 4887 4888 4889
  ptr= strend(path_name)-1;
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
  ptr--;
  end= ptr;
4890 4891 4892 4893
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
  uint name_len= end - ptr;
4894 4895
  memcpy(dbname, ptr + 1, name_len);
  dbname[name_len]= '\0';
4896 4897
#ifdef __WIN__
  /* Put to lower case */
4898 4899
  
  ptr= dbname;
4900 4901
  
  while (*ptr != '\0') {
4902
    *ptr= tolower(*ptr);
4903 4904 4905 4906 4907
    ptr++;
  }
#endif
}

4908 4909 4910 4911 4912 4913 4914 4915 4916
/*
  Set m_dbname from full pathname to table file
 */

void ha_ndbcluster::set_dbname(const char *path_name)
{
  set_dbname(path_name, m_dbname);
}

4917 4918 4919 4920 4921 4922 4923 4924 4925 4926
/**
 * Set a given location from full pathname to table file
 *
 */
void
ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
{
  char *end, *ptr;
  
  /* Scan name from the end */
4927 4928
  end= strend(path_name)-1;
  ptr= end;
4929 4930 4931
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
4932
  uint name_len= end - ptr;
4933
  memcpy(tabname, ptr + 1, end - ptr);
4934
  tabname[name_len]= '\0';
4935 4936
#ifdef __WIN__
  /* Put to lower case */
4937
  ptr= tabname;
4938 4939 4940 4941 4942 4943 4944 4945 4946
  
  while (*ptr != '\0') {
    *ptr= tolower(*ptr);
    ptr++;
  }
#endif
}

/*
4947
  Set m_tabname from full pathname to table file 
4948 4949
 */

4950
void ha_ndbcluster::set_tabname(const char *path_name)
4951
{
4952
  set_tabname(path_name, m_tabname);
4953 4954 4955 4956
}


ha_rows 
4957 4958 4959 4960
ha_ndbcluster::records_in_range(uint inx, key_range *min_key,
                                key_range *max_key)
{
  KEY *key_info= table->key_info + inx;
4961
  uint key_length= key_info->key_length;
4962
  NDB_INDEX_TYPE idx_type= get_index_type(inx);  
4963 4964

  DBUG_ENTER("records_in_range");
4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978
  // Prevent partial read of hash indexes by returning HA_POS_ERROR
  if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) &&
      ((min_key && min_key->length < key_length) ||
       (max_key && max_key->length < key_length)))
    DBUG_RETURN(HA_POS_ERROR);
  
  // Read from hash index with full key
  // This is a "const" table which returns only one record!      
  if ((idx_type != ORDERED_INDEX) &&
      ((min_key && min_key->length == key_length) || 
       (max_key && max_key->length == key_length)))
    DBUG_RETURN(1);
  
  DBUG_RETURN(10); /* Good guess when you don't know anything */
4979 4980
}

4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017
ulong ha_ndbcluster::table_flags(void) const
{
  if (m_ha_not_exact_count)
    return m_table_flags | HA_NOT_EXACT_COUNT;
  else
    return m_table_flags;
}
const char * ha_ndbcluster::table_type() const 
{
  return("ndbcluster");
}
uint ha_ndbcluster::max_supported_record_length() const
{ 
  return NDB_MAX_TUPLE_SIZE;
}
uint ha_ndbcluster::max_supported_keys() const
{
  return MAX_KEY;
}
uint ha_ndbcluster::max_supported_key_parts() const 
{
  return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY;
}
uint ha_ndbcluster::max_supported_key_length() const
{
  return NDB_MAX_KEY_SIZE;
}
bool ha_ndbcluster::low_byte_first() const
{ 
#ifdef WORDS_BIGENDIAN
  return FALSE;
#else
  return TRUE;
#endif
}
bool ha_ndbcluster::has_transactions()
{
5018
  return m_transaction_on;
5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032
}
const char* ha_ndbcluster::index_type(uint key_number)
{
  switch (get_index_type(key_number)) {
  case ORDERED_INDEX:
  case UNIQUE_ORDERED_INDEX:
  case PRIMARY_KEY_ORDERED_INDEX:
    return "BTREE";
  case UNIQUE_INDEX:
  case PRIMARY_KEY_INDEX:
  default:
    return "HASH";
  }
}
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5033

5034 5035
uint8 ha_ndbcluster::table_cache_type()
{
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5036 5037 5038 5039 5040 5041
  DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT");
  DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT);
}


uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname,
5042
                         Uint64 *commit_count)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5043 5044 5045
{
  DBUG_ENTER("ndb_get_commitcount");

5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063
  char name[FN_REFLEN];
  NDB_SHARE *share;
  (void)strxnmov(name, FN_REFLEN, "./",dbname,"/",tabname,NullS);
  DBUG_PRINT("enter", ("name: %s", name));
  pthread_mutex_lock(&ndbcluster_mutex);
  if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables,
                                       (byte*) name,
                                       strlen(name))))
  {
    pthread_mutex_unlock(&ndbcluster_mutex);
    DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables",
                        name));
    DBUG_RETURN(1);
  }
  share->use_count++;
  pthread_mutex_unlock(&ndbcluster_mutex);

  pthread_mutex_lock(&share->mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5064 5065
  if (ndb_cache_check_time > 0)
  {
5066
    if (share->commit_count != 0)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5067
    {
5068 5069 5070 5071 5072 5073
      *commit_count= share->commit_count;
      DBUG_PRINT("info", ("Getting commit_count: %llu from share",
                          share->commit_count));
      pthread_mutex_unlock(&share->mutex);
      free_share(share);
      DBUG_RETURN(0);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5074 5075
    }
  }
5076
  DBUG_PRINT("info", ("Get commit_count from NDB"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5077 5078 5079 5080
  Ndb *ndb;
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(1);
  ndb->setDatabaseName(dbname);
5081 5082
  uint lock= share->commit_count_lock;
  pthread_mutex_unlock(&share->mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5083 5084 5085

  struct Ndb_statistics stat;
  if (ndb_get_table_statistics(ndb, tabname, &stat))
5086 5087
  {
    free_share(share);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5088
    DBUG_RETURN(1);
5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104
  }

  pthread_mutex_lock(&share->mutex);
  if(share->commit_count_lock == lock)
  {
    DBUG_PRINT("info", ("Setting commit_count to %llu", stat.commit_count));
    share->commit_count= stat.commit_count;
    *commit_count= stat.commit_count;
  }
  else
  {
    DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed"));
    *commit_count= 0;
  }
  pthread_mutex_unlock(&share->mutex);
  free_share(share);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140
  DBUG_RETURN(0);
}


/*
  Check if a cached query can be used.
  This is done by comparing the supplied engine_data to commit_count of
  the table.
  The commit_count is either retrieved from the share for the table, where
  it has been cached by the util thread. If the util thread is not started,
  NDB has to be contacetd to retrieve the commit_count, this will introduce
  a small delay while waiting for NDB to answer.


  SYNOPSIS
  ndbcluster_cache_retrieval_allowed
    thd            thread handle
    full_name      concatenation of database name,
                   the null character '\0', and the table
                   name
    full_name_len  length of the full name,
                   i.e. len(dbname) + len(tablename) + 1

    engine_data    parameter retrieved when query was first inserted into
                   the cache. If the value of engine_data is changed,
                   all queries for this table should be invalidated.

  RETURN VALUE
    TRUE  Yes, use the query from cache
    FALSE No, don't use the cached query, and if engine_data
          has changed, all queries for this table should be invalidated

*/

static my_bool
ndbcluster_cache_retrieval_allowed(THD *thd,
5141 5142
                                   char *full_name, uint full_name_len,
                                   ulonglong *engine_data)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5143 5144 5145 5146 5147 5148 5149 5150
{
  DBUG_ENTER("ndbcluster_cache_retrieval_allowed");

  Uint64 commit_count;
  bool is_autocommit= !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
  char *dbname= full_name;
  char *tabname= dbname+strlen(dbname)+1;

5151 5152
  DBUG_PRINT("enter", ("dbname: %s, tabname: %s, is_autocommit: %d",
                       dbname, tabname, is_autocommit));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5153 5154

  if (!is_autocommit)
5155 5156
  {
    DBUG_PRINT("exit", ("No, don't use cache in transaction"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5157
    DBUG_RETURN(FALSE);
5158
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5159 5160 5161

  if (ndb_get_commitcount(thd, dbname, tabname, &commit_count))
  {
5162 5163
    *engine_data= 0; /* invalidate */
    DBUG_PRINT("exit", ("No, could not retrieve commit_count"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5164 5165
    DBUG_RETURN(FALSE);
  }
5166
  DBUG_PRINT("info", ("*engine_data: %llu, commit_count: %llu",
5167
                      *engine_data, commit_count));
5168
  if (commit_count == 0)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5169
  {
5170 5171
    *engine_data= 0; /* invalidate */
    DBUG_PRINT("exit", ("No, local commit has been performed"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5172 5173
    DBUG_RETURN(FALSE);
  }
5174 5175 5176 5177 5178 5179
  else if (*engine_data != commit_count)
  {
    *engine_data= commit_count; /* invalidate */
     DBUG_PRINT("exit", ("No, commit_count has changed"));
     DBUG_RETURN(FALSE);
   }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5180

5181
  DBUG_PRINT("exit", ("OK to use cache, engine_data: %llu", *engine_data));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209
  DBUG_RETURN(TRUE);
}


/**
   Register a table for use in the query cache. Fetch the commit_count
   for the table and return it in engine_data, this will later be used
   to check if the table has changed, before the cached query is reused.

   SYNOPSIS
   ha_ndbcluster::can_query_cache_table
    thd            thread handle
    full_name      concatenation of database name,
                   the null character '\0', and the table
                   name
    full_name_len  length of the full name,
                   i.e. len(dbname) + len(tablename) + 1
    qc_engine_callback  function to be called before using cache on this table
    engine_data    out, commit_count for this table

  RETURN VALUE
    TRUE  Yes, it's ok to cahce this query
    FALSE No, don't cach the query

*/

my_bool
ha_ndbcluster::register_query_cache_table(THD *thd,
5210 5211 5212
                                          char *full_name, uint full_name_len,
                                          qc_engine_callback *engine_callback,
                                          ulonglong *engine_data)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5213 5214 5215 5216
{
  DBUG_ENTER("ha_ndbcluster::register_query_cache_table");

  bool is_autocommit= !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
5217 5218 5219 5220

  DBUG_PRINT("enter",("dbname: %s, tabname: %s, is_autocommit: %d",
		      m_dbname, m_tabname, is_autocommit));

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5221
  if (!is_autocommit)
5222 5223
  {
    DBUG_PRINT("exit", ("Can't register table during transaction"))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5224
    DBUG_RETURN(FALSE);
5225
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5226 5227 5228 5229 5230

  Uint64 commit_count;
  if (ndb_get_commitcount(thd, m_dbname, m_tabname, &commit_count))
  {
    *engine_data= 0;
5231
    DBUG_PRINT("exit", ("Error, could not get commitcount"))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5232 5233 5234 5235
    DBUG_RETURN(FALSE);
  }
  *engine_data= commit_count;
  *engine_callback= ndbcluster_cache_retrieval_allowed;
5236 5237
  DBUG_PRINT("exit", ("commit_count: %llu", commit_count));
  DBUG_RETURN(commit_count > 0);
5238
}
5239

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5240

5241
/*
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5242
  Handling the shared NDB_SHARE structure that is needed to
5243 5244 5245 5246 5247 5248 5249
  provide table locking.
  It's also used for sharing data with other NDB handlers
  in the same MySQL Server. There is currently not much
  data we want to or can share.
 */

static byte* ndbcluster_get_key(NDB_SHARE *share,uint *length,
5250
                                my_bool not_used __attribute__((unused)))
5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278
{
  *length=share->table_name_length;
  return (byte*) share->table_name;
}

static NDB_SHARE* get_share(const char *table_name)
{
  NDB_SHARE *share;
  pthread_mutex_lock(&ndbcluster_mutex);
  uint length=(uint) strlen(table_name);
  if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables,
                                       (byte*) table_name,
                                       length)))
  {
    if ((share=(NDB_SHARE *) my_malloc(sizeof(*share)+length+1,
                                       MYF(MY_WME | MY_ZEROFILL))))
    {
      share->table_name_length=length;
      share->table_name=(char*) (share+1);
      strmov(share->table_name,table_name);
      if (my_hash_insert(&ndbcluster_open_tables, (byte*) share))
      {
        pthread_mutex_unlock(&ndbcluster_mutex);
        my_free((gptr) share,0);
        return 0;
      }
      thr_lock_init(&share->lock);
      pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5279
      share->commit_count= 0;
5280 5281 5282 5283 5284 5285 5286
      share->commit_count_lock= 0;
    }
    else
    {
      DBUG_PRINT("error", ("Failed to alloc share"));
      pthread_mutex_unlock(&ndbcluster_mutex);
      return 0;
5287 5288 5289
    }
  }
  share->use_count++;
5290 5291 5292 5293 5294

  DBUG_PRINT("share",
	     ("table_name: %s, length: %d, use_count: %d, commit_count: %d",
	      share->table_name, share->table_name_length, share->use_count,
	      share->commit_count));
5295 5296 5297 5298 5299 5300 5301 5302 5303 5304
  pthread_mutex_unlock(&ndbcluster_mutex);
  return share;
}


static void free_share(NDB_SHARE *share)
{
  pthread_mutex_lock(&ndbcluster_mutex);
  if (!--share->use_count)
  {
5305
     hash_delete(&ndbcluster_open_tables, (byte*) share);
5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333
    thr_lock_delete(&share->lock);
    pthread_mutex_destroy(&share->mutex);
    my_free((gptr) share, MYF(0));
  }
  pthread_mutex_unlock(&ndbcluster_mutex);
}



/*
  Internal representation of the frm blob
   
*/

struct frm_blob_struct 
{
  struct frm_blob_header 
  {
    uint ver;      // Version of header
    uint orglen;   // Original length of compressed data
    uint complen;  // Compressed length of data, 0=uncompressed
  } head;
  char data[1];  
};



static int packfrm(const void *data, uint len, 
5334
                   const void **pack_data, uint *pack_len)
5335 5336 5337 5338 5339 5340 5341 5342 5343
{
  int error;
  ulong org_len, comp_len;
  uint blob_len;
  frm_blob_struct* blob;
  DBUG_ENTER("packfrm");
  DBUG_PRINT("enter", ("data: %x, len: %d", data, len));
  
  error= 1;
5344
  org_len= len;
5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363
  if (my_compress((byte*)data, &org_len, &comp_len))
    goto err;
  
  DBUG_PRINT("info", ("org_len: %d, comp_len: %d", org_len, comp_len));
  DBUG_DUMP("compressed", (char*)data, org_len);
  
  error= 2;
  blob_len= sizeof(frm_blob_struct::frm_blob_header)+org_len;
  if (!(blob= (frm_blob_struct*) my_malloc(blob_len,MYF(MY_WME))))
    goto err;
  
  // Store compressed blob in machine independent format
  int4store((char*)(&blob->head.ver), 1);
  int4store((char*)(&blob->head.orglen), comp_len);
  int4store((char*)(&blob->head.complen), org_len);
  
  // Copy frm data into blob, already in machine independent format
  memcpy(blob->data, data, org_len);  
  
5364 5365 5366
  *pack_data= blob;
  *pack_len= blob_len;
  error= 0;
5367 5368 5369 5370 5371 5372 5373 5374 5375
  
  DBUG_PRINT("exit", ("pack_data: %x, pack_len: %d", *pack_data, *pack_len));
err:
  DBUG_RETURN(error);
  
}


static int unpackfrm(const void **unpack_data, uint *unpack_len,
5376
                    const void *pack_data)
5377
{
5378
   const frm_blob_struct *blob= (frm_blob_struct*)pack_data;
5379 5380 5381 5382 5383
   byte *data;
   ulong complen, orglen, ver;
   DBUG_ENTER("unpackfrm");
   DBUG_PRINT("enter", ("pack_data: %x", pack_data));

5384 5385 5386
   complen=     uint4korr((char*)&blob->head.complen);
   orglen=      uint4korr((char*)&blob->head.orglen);
   ver=         uint4korr((char*)&blob->head.ver);
5387 5388
 
   DBUG_PRINT("blob",("ver: %d complen: %d orglen: %d",
5389
                     ver,complen,orglen));
5390 5391 5392 5393
   DBUG_DUMP("blob->data", (char*) blob->data, complen);
 
   if (ver != 1)
     DBUG_RETURN(1);
5394
   if (!(data= my_malloc(max(orglen, complen), MYF(MY_WME))))
5395 5396 5397 5398 5399 5400 5401 5402 5403
     DBUG_RETURN(2);
   memcpy(data, blob->data, complen);
 
   if (my_uncompress(data, &complen, &orglen))
   {
     my_free((char*)data, MYF(0));
     DBUG_RETURN(3);
   }

5404 5405
   *unpack_data= data;
   *unpack_len= complen;
5406 5407 5408 5409 5410

   DBUG_PRINT("exit", ("frmdata: %x, len: %d", *unpack_data, *unpack_len));

   DBUG_RETURN(0);
}
5411 5412 5413

static 
int
5414
ndb_get_table_statistics(Ndb* ndb, const char * table,
5415
                         struct Ndb_statistics * ndbstat)
5416 5417 5418
{
  DBUG_ENTER("ndb_get_table_statistics");
  DBUG_PRINT("enter", ("table: %s", table));
5419
  NdbTransaction* pTrans= ndb->startTransaction();
5420 5421 5422 5423
  do 
  {
    if (pTrans == NULL)
      break;
5424
      
5425 5426 5427 5428
    NdbScanOperation* pOp= pTrans->getNdbScanOperation(table);
    if (pOp == NULL)
      break;
    
5429
    if (pOp->readTuples(NdbOperation::LM_CommittedRead))
5430 5431 5432 5433 5434 5435
      break;
    
    int check= pOp->interpret_exit_last_row();
    if (check == -1)
      break;
    
5436 5437
    Uint64 rows, commits, mem;
    Uint32 size;
5438 5439
    pOp->getValue(NdbDictionary::Column::ROW_COUNT, (char*)&rows);
    pOp->getValue(NdbDictionary::Column::COMMIT_COUNT, (char*)&commits);
5440 5441
    pOp->getValue(NdbDictionary::Column::ROW_SIZE, (char*)&size);
    pOp->getValue(NdbDictionary::Column::FRAGMENT_MEMORY, (char*)&mem);
5442
    
5443
    check= pTrans->execute(NdbTransaction::NoCommit,
5444 5445
                           NdbTransaction::AbortOnError,
                           TRUE);
5446 5447 5448
    if (check == -1)
      break;
    
5449
    Uint32 count= 0;
5450 5451
    Uint64 sum_rows= 0;
    Uint64 sum_commits= 0;
5452 5453
    Uint64 sum_row_size= 0;
    Uint64 sum_mem= 0;
5454
    while((check= pOp->nextResult(TRUE, TRUE)) == 0)
5455 5456 5457
    {
      sum_rows+= rows;
      sum_commits+= commits;
5458
      if (sum_row_size < size)
5459
        sum_row_size= size;
5460
      sum_mem+= mem;
5461
      count++;
5462 5463 5464 5465 5466
    }
    
    if (check == -1)
      break;

5467
    pOp->close(TRUE);
5468

5469
    ndb->closeTransaction(pTrans);
5470 5471 5472 5473 5474 5475

    ndbstat->row_count= sum_rows;
    ndbstat->commit_count= sum_commits;
    ndbstat->row_size= sum_row_size;
    ndbstat->fragment_memory= sum_mem;

5476 5477 5478 5479 5480
    DBUG_PRINT("exit", ("records: %llu commits: %llu "
                        "row_size: %llu mem: %llu count: %u",
			sum_rows, sum_commits, sum_row_size,
                        sum_mem, count));

5481 5482 5483
    DBUG_RETURN(0);
  } while(0);

msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
5484 5485
  if (pTrans)
    ndb->closeTransaction(pTrans);
5486 5487 5488 5489
  DBUG_PRINT("exit", ("failed"));
  DBUG_RETURN(-1);
}

5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504
/*
  Create a .ndb file to serve as a placeholder indicating 
  that the table with this name is a ndb table
*/

int ha_ndbcluster::write_ndb_file()
{
  File file;
  bool error=1;
  char path[FN_REFLEN];
  
  DBUG_ENTER("write_ndb_file");
  DBUG_PRINT("enter", ("db: %s, name: %s", m_dbname, m_tabname));

  (void)strxnmov(path, FN_REFLEN, 
5505
                 mysql_data_home,"/",m_dbname,"/",m_tabname,ha_ndb_ext,NullS);
5506 5507 5508 5509 5510 5511 5512 5513 5514 5515

  if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0)
  {
    // It's an empty file
    error=0;
    my_close(file,MYF(0));
  }
  DBUG_RETURN(error);
}

5516
int
5517
ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
5518 5519 5520 5521
                                      KEY_MULTI_RANGE *ranges, 
                                      uint range_count,
                                      bool sorted, 
                                      HANDLER_BUFFER *buffer)
5522 5523
{
  DBUG_ENTER("ha_ndbcluster::read_multi_range_first");
5524
  
5525 5526
  int res;
  KEY* key_info= table->key_info + active_index;
5527
  NDB_INDEX_TYPE index_type= get_index_type(active_index);
joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
5528
  ulong reclength= table->s->reclength;
5529 5530
  NdbOperation* op;

5531
  if (uses_blob_value(m_retrieve_all_fields))
5532 5533 5534 5535
  {
    /**
     * blobs can't be batched currently
     */
5536
    m_disable_multi_read= TRUE;
5537
    DBUG_RETURN(handler::read_multi_range_first(found_range_p, 
5538 5539 5540 5541
                                                ranges, 
                                                range_count,
                                                sorted, 
                                                buffer));
5542 5543
  }

5544
  m_disable_multi_read= FALSE;
5545 5546 5547 5548

  /**
   * Copy arguments into member variables
   */
5549 5550 5551
  m_multi_ranges= ranges;
  multi_range_curr= ranges;
  multi_range_end= ranges+range_count;
5552 5553 5554
  multi_range_sorted= sorted;
  multi_range_buffer= buffer;

5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565
  /**
   * read multi range will read ranges as follows (if not ordered)
   *
   * input    read order
   * ======   ==========
   * pk-op 1  pk-op 1
   * pk-op 2  pk-op 2
   * range 3  range (3,5) NOTE result rows will be intermixed
   * pk-op 4  pk-op 4
   * range 5
   * pk-op 6  pk-ok 6
5566 5567
   */   

mskold@mysql.com's avatar
mskold@mysql.com committed
5568
  /**
5569 5570
   * Variables for loop
   */
5571 5572
  byte *curr= (byte*)buffer->buffer;
  byte *end_of_buffer= (byte*)buffer->buffer_end;
5573 5574 5575 5576
  NdbOperation::LockMode lm= 
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
  const NDBTAB *tab= (const NDBTAB *) m_table;
  const NDBINDEX *unique_idx= (NDBINDEX *) m_index[active_index].unique_index;
5577
  const NDBINDEX *idx= (NDBINDEX *) m_index[active_index].index; 
5578 5579
  const NdbOperation* lastOp= m_active_trans->getLastDefinedOperation();
  NdbIndexScanOperation* scanOp= 0;
5580 5581
  for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer; 
       multi_range_curr++)
5582 5583 5584 5585
  {
    switch(index_type){
    case PRIMARY_KEY_INDEX:
  pk:
5586
    {
5587
      multi_range_curr->range_flag |= UNIQUE_RANGE;
5588
      if ((op= m_active_trans->getNdbOperation(tab)) && 
5589 5590 5591
          !op->readTuple(lm) && 
          !set_primary_key(op, multi_range_curr->start_key.key) &&
          !define_read_attrs(curr, op) &&
5592
          (op->setAbortOption(AO_IgnoreError), TRUE))
5593
        curr += reclength;
5594
      else
5595
        ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
5596
      break;
5597 5598
    }
    break;
5599 5600
    case UNIQUE_INDEX:
  sk:
5601
    {
5602
      multi_range_curr->range_flag |= UNIQUE_RANGE;
5603
      if ((op= m_active_trans->getNdbIndexOperation(unique_idx, tab)) && 
5604 5605 5606
          !op->readTuple(lm) && 
          !set_index_key(op, key_info, multi_range_curr->start_key.key) &&
          !define_read_attrs(curr, op) &&
5607
          (op->setAbortOption(AO_IgnoreError), TRUE))
5608
        curr += reclength;
5609
      else
5610
        ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
5611 5612 5613
      break;
    }
    case PRIMARY_KEY_ORDERED_INDEX:
5614
      if (multi_range_curr->start_key.length == key_info->key_length &&
5615 5616
          multi_range_curr->start_key.flag == HA_READ_KEY_EXACT)
        goto pk;
5617 5618
      goto range;
    case UNIQUE_ORDERED_INDEX:
5619
      if (multi_range_curr->start_key.length == key_info->key_length &&
5620 5621 5622 5623
          multi_range_curr->start_key.flag == HA_READ_KEY_EXACT &&
          !check_null_in_key(key_info, multi_range_curr->start_key.key,
                             multi_range_curr->start_key.length))
        goto sk;
5624
      goto range;
mskold@mysql.com's avatar
mskold@mysql.com committed
5625
    case ORDERED_INDEX: {
5626
  range:
5627
      multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE;
5628 5629
      if (scanOp == 0)
      {
5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641
        if (m_multi_cursor)
        {
          scanOp= m_multi_cursor;
          DBUG_ASSERT(scanOp->getSorted() == sorted);
          DBUG_ASSERT(scanOp->getLockMode() == 
                      (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
          if(scanOp->reset_bounds(m_force_send))
            DBUG_RETURN(ndb_err(m_active_trans));
          
          end_of_buffer -= reclength;
        }
        else if ((scanOp= m_active_trans->getNdbIndexScanOperation(idx, tab)) 
5642
                 &&!scanOp->readTuples(lm, 0, parallelism, sorted, FALSE, TRUE)
5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653
                 &&!generate_scan_filter(m_cond_stack, scanOp)
                 &&!define_read_attrs(end_of_buffer-reclength, scanOp))
        {
          m_multi_cursor= scanOp;
          m_multi_range_cursor_result_ptr= end_of_buffer-reclength;
        }
        else
        {
          ERR_RETURN(scanOp ? scanOp->getNdbError() : 
                     m_active_trans->getNdbError());
        }
5654
      }
5655

5656
      const key_range *keys[2]= { &multi_range_curr->start_key, 
5657
                                  &multi_range_curr->end_key };
5658
      if ((res= set_bounds(scanOp, keys, multi_range_curr-ranges)))
5659
        DBUG_RETURN(res);
5660
      break;
5661
    }
mskold@mysql.com's avatar
mskold@mysql.com committed
5662 5663 5664 5665 5666
    case(UNDEFINED_INDEX):
      DBUG_ASSERT(FALSE);
      DBUG_RETURN(1);
      break;
    }
5667 5668
  }
  
5669
  if (multi_range_curr != multi_range_end)
5670
  {
5671 5672 5673 5674 5675 5676
    /**
     * Mark that we're using entire buffer (even if might not) as
     *   we haven't read all ranges for some reason
     * This as we don't want mysqld to reuse the buffer when we read
     *   the remaining ranges
     */
5677
    buffer->end_of_used_area= (byte*)buffer->buffer_end;
5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688
  }
  else
  {
    buffer->end_of_used_area= curr;
  }
  
  /**
   * Set first operation in multi range
   */
  m_current_multi_operation= 
    lastOp ? lastOp->next() : m_active_trans->getFirstDefinedOperation();
5689
  if (!(res= execute_no_commit_ie(this, m_active_trans)))
5690
  {
5691 5692
    m_multi_range_defined= multi_range_curr;
    multi_range_curr= ranges;
5693 5694
    m_multi_range_result_ptr= (byte*)buffer->buffer;
    DBUG_RETURN(read_multi_range_next(found_range_p));
5695 5696 5697 5698
  }
  ERR_RETURN(m_active_trans->getNdbError());
}

5699 5700 5701 5702 5703 5704
#if 0
#define DBUG_MULTI_RANGE(x) printf("read_multi_range_next: case %d\n", x);
#else
#define DBUG_MULTI_RANGE(x)
#endif

5705
int
5706
ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
5707 5708
{
  DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
5709
  if (m_disable_multi_read)
5710
  {
5711
    DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));
5712
  }
5713
  
5714
  int res;
5715
  int range_no;
joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
5716
  ulong reclength= table->s->reclength;
5717
  const NdbOperation* op= m_current_multi_operation;
5718
  for (;multi_range_curr < m_multi_range_defined; multi_range_curr++)
5719
  {
5720
    if (multi_range_curr->range_flag & UNIQUE_RANGE)
5721
    {
5722
      if (op->getNdbError().code == 0)
5723
        goto found_next;
5724 5725 5726
      
      op= m_active_trans->getNextCompletedOperation(op);
      m_multi_range_result_ptr += reclength;
5727
      continue;
5728
    } 
5729
    else if (m_multi_cursor && !multi_range_sorted)
5730
    {
5731 5732
      DBUG_MULTI_RANGE(1);
      if ((res= fetch_next(m_multi_cursor)) == 0)
5733
      {
5734 5735 5736
        DBUG_MULTI_RANGE(2);
        range_no= m_multi_cursor->get_range_no();
        goto found;
5737 5738 5739
      } 
      else
      {
5740
        goto close_scan;
5741 5742
      }
    }
5743
    else if (m_multi_cursor && multi_range_sorted)
5744
    {
5745 5746
      if (m_active_cursor && (res= fetch_next(m_multi_cursor)))
      {
5747 5748
        DBUG_MULTI_RANGE(3);
        goto close_scan;
5749
      }
5750
      
5751
      range_no= m_multi_cursor->get_range_no();
5752
      uint current_range_no= multi_range_curr - m_multi_ranges;
mskold@mysql.com's avatar
mskold@mysql.com committed
5753
      if ((uint) range_no == current_range_no)
5754
      {
5755
        DBUG_MULTI_RANGE(4);
5756
        // return current row
5757
        goto found;
5758
      }
5759
      else if (range_no > (int)current_range_no)
5760
      {
5761 5762 5763 5764
        DBUG_MULTI_RANGE(5);
        // wait with current row
        m_active_cursor= 0;
        continue;
5765 5766 5767
      }
      else 
      {
5768 5769 5770 5771 5772 5773 5774 5775 5776
        DBUG_MULTI_RANGE(6);
        // First fetch from cursor
        DBUG_ASSERT(range_no == -1);
        if((res= m_multi_cursor->nextResult(true)))
        {
          goto close_scan;
        }
        multi_range_curr--; // Will be increased in for-loop
        continue;
5777
      }
5778
    }
5779
    else /** m_multi_cursor == 0 */
5780
    {
5781
      DBUG_MULTI_RANGE(7);
5782 5783 5784 5785
      /**
       * Corresponds to range 5 in example in read_multi_range_first
       */
      (void)1;
5786
      continue;
5787
    }
5788
    
5789
    DBUG_ASSERT(FALSE); // Should only get here via goto's
5790 5791 5792
close_scan:
    if (res == 1)
    {
5793
      m_multi_cursor->close(FALSE, TRUE);
5794
      m_active_cursor= m_multi_cursor= 0;
5795
      DBUG_MULTI_RANGE(8);
5796 5797 5798 5799 5800 5801 5802
      continue;
    } 
    else 
    {
      DBUG_RETURN(ndb_err(m_active_trans));
    }
  }
5803
  
5804
  if (multi_range_curr == multi_range_end)
5805
    DBUG_RETURN(HA_ERR_END_OF_FILE);
5806
  
5807 5808 5809 5810
  /**
   * Read remaining ranges
   */
  DBUG_RETURN(read_multi_range_first(multi_range_found_p, 
5811 5812 5813 5814
                                     multi_range_curr,
                                     multi_range_end - multi_range_curr, 
                                     multi_range_sorted,
                                     multi_range_buffer));
5815 5816
  
found:
5817 5818 5819
  /**
   * Found a record belonging to a scan
   */
5820
  m_active_cursor= m_multi_cursor;
5821
  * multi_range_found_p= m_multi_ranges + range_no;
5822 5823
  memcpy(table->record[0], m_multi_range_cursor_result_ptr, reclength);
  setup_recattr(m_active_cursor->getFirstRecAttr());
5824 5825 5826
  unpack_record(table->record[0]);
  table->status= 0;     
  DBUG_RETURN(0);
5827
  
5828
found_next:
5829 5830 5831 5832
  /**
   * Found a record belonging to a pk/index op,
   *   copy result and move to next to prepare for next call
   */
5833
  * multi_range_found_p= multi_range_curr;
5834
  memcpy(table->record[0], m_multi_range_result_ptr, reclength);
5835
  setup_recattr(op->getFirstRecAttr());
5836
  unpack_record(table->record[0]);
5837 5838
  table->status= 0;
  
5839
  multi_range_curr++;
5840
  m_current_multi_operation= m_active_trans->getNextCompletedOperation(op);
5841 5842
  m_multi_range_result_ptr += reclength;
  DBUG_RETURN(0);
5843 5844
}

5845 5846 5847 5848 5849 5850 5851 5852
int
ha_ndbcluster::setup_recattr(const NdbRecAttr* curr)
{
  DBUG_ENTER("setup_recattr");

  Field **field, **end;
  NdbValue *value= m_value;
  
joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
5853
  end= table->field + table->s->fields;
5854 5855 5856 5857 5858 5859
  
  for (field= table->field; field < end; field++, value++)
  {
    if ((* value).ptr)
    {
      DBUG_ASSERT(curr != 0);
5860 5861
      (* value).rec= curr;
      curr= curr->next();
5862 5863 5864
    }
  }
  
5865
  DBUG_RETURN(0);
5866 5867
}

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5868 5869
char*
ha_ndbcluster::update_table_comment(
5870 5871
                                /* out: table comment + additional */
        const char*     comment)/* in:  table comment defined by user */
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901
{
  uint length= strlen(comment);
  if(length > 64000 - 3)
  {
    return((char*)comment); /* string too long */
  }

  Ndb* ndb;
  if (!(ndb= get_ndb()))
  {
    return((char*)comment);
  }

  ndb->setDatabaseName(m_dbname);
  NDBDICT* dict= ndb->getDictionary();
  const NDBTAB* tab;
  if (!(tab= dict->getTable(m_tabname)))
  {
    return((char*)comment);
  }

  char *str;
  const char *fmt="%s%snumber_of_replicas: %d";
  const unsigned fmt_len_plus_extra= length + strlen(fmt);
  if ((str= my_malloc(fmt_len_plus_extra, MYF(0))) == NULL)
  {
    return (char*)comment;
  }

  snprintf(str,fmt_len_plus_extra,fmt,comment,
5902 5903
           length > 0 ? " ":"",
           tab->getReplicaCount());
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5904 5905 5906 5907 5908 5909 5910 5911 5912
  return str;
}


// Utility thread main loop
extern "C" pthread_handler_decl(ndb_util_thread_func,
                                arg __attribute__((unused)))
{
  THD *thd; /* needs to be first for thread_stack */
5913
  Ndb* ndb;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5914 5915 5916 5917 5918 5919 5920 5921 5922
  int error= 0;
  struct timespec abstime;

  my_thread_init();
  DBUG_ENTER("ndb_util_thread");
  DBUG_PRINT("enter", ("ndb_cache_check_time: %d", ndb_cache_check_time));

  thd= new THD; /* note that contructor of THD uses DBUG_ */
  THD_CHECK_SENTRY(thd);
5923
  ndb= new Ndb(g_ndb_cluster_connection, "");
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5924 5925 5926 5927 5928

  pthread_detach_this_thread();
  ndb_util_thread= pthread_self();

  thd->thread_stack= (char*)&thd; /* remember where our stack is */
5929
  if (thd->store_globals() && (ndb->init() != -1))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5930 5931 5932
  {
    thd->cleanup();
    delete thd;
5933
    delete ndb;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5934 5935 5936 5937
    DBUG_RETURN(NULL);
  }

  List<NDB_SHARE> util_open_tables;
5938
  set_timespec(abstime, 0);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955
  for (;;)
  {

    pthread_mutex_lock(&LOCK_ndb_util_thread);
    error= pthread_cond_timedwait(&COND_ndb_util_thread,
                                  &LOCK_ndb_util_thread,
                                  &abstime);
    pthread_mutex_unlock(&LOCK_ndb_util_thread);

    DBUG_PRINT("ndb_util_thread", ("Started, ndb_cache_check_time: %d",
                                   ndb_cache_check_time));

    if (abort_loop)
      break; /* Shutting down server */

    if (ndb_cache_check_time == 0)
    {
5956 5957
      /* Wake up in 1 second to check if value has changed */
      set_timespec(abstime, 1);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978
      continue;
    }

    /* Lock mutex and fill list with pointers to all open tables */
    NDB_SHARE *share;
    pthread_mutex_lock(&ndbcluster_mutex);
    for (uint i= 0; i < ndbcluster_open_tables.records; i++)
    {
      share= (NDB_SHARE *)hash_element(&ndbcluster_open_tables, i);
      share->use_count++; /* Make sure the table can't be closed */
      DBUG_PRINT("ndb_util_thread",
                 ("Found open table[%d]: %s, use_count: %d",
                  i, share->table_name, share->use_count));

      /* Store pointer to table */
      util_open_tables.push_back(share);
    }
    pthread_mutex_unlock(&ndbcluster_mutex);

    /* Iterate through the  open files list */
    List_iterator_fast<NDB_SHARE> it(util_open_tables);
5979
    while ((share= it++))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5980 5981 5982 5983 5984 5985 5986 5987 5988 5989
    {
      /* Split tab- and dbname */
      char buf[FN_REFLEN];
      char *tabname, *db;
      uint length= dirname_length(share->table_name);
      tabname= share->table_name+length;
      memcpy(buf, share->table_name, length-1);
      buf[length-1]= 0;
      db= buf+dirname_length(buf);
      DBUG_PRINT("ndb_util_thread",
5990 5991
                 ("Fetching commit count for: %s",
                  share->table_name));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5992 5993

      /* Contact NDB to get commit count for table */
5994 5995 5996 5997 5998 5999 6000 6001 6002
      ndb->setDatabaseName(db);
      struct Ndb_statistics stat;

      uint lock;
      pthread_mutex_lock(&share->mutex);
      lock= share->commit_count_lock;
      pthread_mutex_unlock(&share->mutex);

      if(ndb_get_table_statistics(ndb, tabname, &stat) == 0)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6003 6004
      {
        DBUG_PRINT("ndb_util_thread",
6005 6006
                   ("Table: %s, commit_count: %llu, rows: %llu",
                    share->table_name, stat.commit_count, stat.row_count));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6007 6008 6009 6010 6011 6012
      }
      else
      {
        DBUG_PRINT("ndb_util_thread",
                   ("Error: Could not get commit count for table %s",
                    share->table_name));
6013
        stat.commit_count= 0;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6014
      }
6015 6016 6017 6018 6019 6020

      pthread_mutex_lock(&share->mutex);
      if (share->commit_count_lock == lock)
        share->commit_count= stat.commit_count;
      pthread_mutex_unlock(&share->mutex);

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6021 6022 6023 6024 6025 6026 6027
      /* Decrease the use count and possibly free share */
      free_share(share);
    }

    /* Clear the list of open tables */
    util_open_tables.empty();

6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047
    /* Calculate new time to wake up */
    int secs= 0;
    int msecs= ndb_cache_check_time;

    struct timeval tick_time;
    gettimeofday(&tick_time, 0);
    abstime.tv_sec=  tick_time.tv_sec;
    abstime.tv_nsec= tick_time.tv_usec * 1000;

    if(msecs >= 1000){
      secs=  msecs / 1000;
      msecs= msecs % 1000;
    }

    abstime.tv_sec+=  secs;
    abstime.tv_nsec+= msecs * 1000000;
    if (abstime.tv_nsec >= 1000000000) {
      abstime.tv_sec+=  1;
      abstime.tv_nsec-= 1000000000;
    }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6048 6049 6050 6051
  }

  thd->cleanup();
  delete thd;
6052
  delete ndb;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6053 6054 6055 6056 6057 6058
  DBUG_PRINT("exit", ("ndb_util_thread"));
  my_thread_end();
  pthread_exit(0);
  DBUG_RETURN(NULL);
}

6059 6060 6061
/*
  Condition pushdown
*/
6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078
/*
  Push a condition to ndbcluster storage engine for evaluation 
  during table   and index scans. The conditions will be stored on a stack
  for possibly storing several conditions. The stack can be popped
  by calling cond_pop, handler::extra(HA_EXTRA_RESET) (handler::reset())
  will clear the stack.
  The current implementation supports arbitrary AND/OR nested conditions
  with comparisons between columns and constants (including constant
  expressions and function calls) and the following comparison operators:
  =, !=, >, >=, <, <=, "is null", and "is not null".
  
  RETURN
    NULL The condition was supported and will be evaluated for each 
    row found during the scan
    cond The condition was not supported and all rows will be returned from
         the scan for evaluation (and thus not saved on stack)
*/
6079 6080 6081 6082 6083
const 
COND* 
ha_ndbcluster::cond_push(const COND *cond) 
{ 
  DBUG_ENTER("cond_push");
6084 6085 6086
  Ndb_cond_stack *ndb_cond = new Ndb_cond_stack();
  DBUG_EXECUTE("where",print_where((COND *)cond, m_tabname););
  if (m_cond_stack)
mskold@mysql.com's avatar
mskold@mysql.com committed
6087
    ndb_cond->next= m_cond_stack;
6088 6089 6090 6091 6092 6093
  else
    ndb_cond->next= NULL;
  m_cond_stack= ndb_cond;
  
  if (serialize_cond(cond, ndb_cond))
  {
mskold@mysql.com's avatar
mskold@mysql.com committed
6094
    DBUG_RETURN(NULL);
6095 6096 6097 6098
  }
  else
  {
    cond_pop();
mskold@mysql.com's avatar
mskold@mysql.com committed
6099
  }
6100 6101 6102
  DBUG_RETURN(cond); 
}

6103 6104 6105
/*
  Pop the top condition from the condition stack of the handler instance.
*/
6106 6107 6108 6109 6110 6111 6112 6113 6114
void 
ha_ndbcluster::cond_pop() 
{ 
  Ndb_cond_stack *ndb_cond_stack= m_cond_stack;  
  if (ndb_cond_stack)
  {
    m_cond_stack= ndb_cond_stack->next;
    delete ndb_cond_stack;
  }
mskold@mysql.com's avatar
mskold@mysql.com committed
6115
}
6116

6117 6118 6119
/*
  Clear the condition stack
*/
6120 6121 6122 6123 6124 6125 6126 6127 6128 6129
void
ha_ndbcluster::cond_clear()
{
  DBUG_ENTER("cond_clear");
  while (m_cond_stack)
    cond_pop();

  DBUG_VOID_RETURN;
}

6130 6131 6132 6133 6134 6135
/*
  Serialize the item tree into a linked list represented by Ndb_cond
  for fast generation of NbdScanFilter. Adds information such as
  position of fields that is not directly available in the Item tree.
  Also checks if condition is supported.
*/
6136 6137 6138 6139 6140
void ndb_serialize_cond(const Item *item, void *arg)
{
  Ndb_cond_traverse_context *context= (Ndb_cond_traverse_context *) arg;
  DBUG_ENTER("ndb_serialize_cond");  

mskold@mysql.com's avatar
mskold@mysql.com committed
6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155
  // Check if we are skipping arguments to a function to be evaluated
  if (context->skip)
  {
    DBUG_PRINT("info", ("Skiping argument %d", context->skip));
    context->skip--;
    switch(item->type()) {
    case (Item::FUNC_ITEM): {
      Item_func *func_item= (Item_func *) item;
      context->skip+= func_item->argument_count();
      break;
    }
    case(Item::INT_ITEM):
    case(Item::REAL_ITEM):
    case(Item::STRING_ITEM):
    case(Item::VARBIN_ITEM):
6156
    case(Item::DECIMAL_ITEM):
mskold@mysql.com's avatar
mskold@mysql.com committed
6157 6158
      break;
    default:
6159
      context->supported= FALSE;
mskold@mysql.com's avatar
mskold@mysql.com committed
6160 6161
      break;
    }
6162
    
mskold@mysql.com's avatar
mskold@mysql.com committed
6163 6164 6165
    DBUG_VOID_RETURN;
  }
  
6166
  if (context->supported)
6167
  {
6168 6169 6170 6171 6172 6173
    Ndb_rewrite_context *rewrite_context= context->rewrite_stack;
    const Item_func *func_item;
    // Check if we are rewriting some unsupported function call
    if (rewrite_context &&
        (func_item= rewrite_context->func_item) &&
        rewrite_context->count++ == 0)
mskold@mysql.com's avatar
mskold@mysql.com committed
6174
    {
6175 6176
      switch(func_item->functype()) {
      case(Item_func::BETWEEN):
6177
        /*
6178 6179 6180 6181 6182 6183 6184
          Rewrite 
          <field>|<const> BETWEEN <const1>|<field1> AND <const2>|<field2>
          to <field>|<const> > <const1>|<field1> AND 
          <field>|<const> < <const2>|<field2>
          or actually in prefix format
          BEGIN(AND) GT(<field>|<const>, <const1>|<field1>), 
          LT(<field>|<const>, <const2>|<field2>), END()
6185
        */
6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202
      case(Item_func::IN_FUNC): {
        /*
          Rewrite <field>|<const> IN(<const1>|<field1>, <const2>|<field2>,..)
          to <field>|<const> = <const1>|<field1> OR 
          <field> = <const2>|<field2> ...
          or actually in prefix format
          BEGIN(OR) EQ(<field>|<const>, <const1><field1>), 
          EQ(<field>|<const>, <const2>|<field2>), ... END()
          Each part of the disjunction is added for each call
          to ndb_serialize_cond and end of rewrite statement 
          is wrapped in end of ndb_serialize_cond
        */
        if (context->expecting(item->type()))
        {
          // This is the <field>|<const> item, save it in the rewrite context
          rewrite_context->left_hand_item= item;
          if (item->type() == Item::FUNC_ITEM)
6203
          {
6204 6205 6206
            Item_func *func_item= (Item_func *) item;
            if (func_item->functype() == Item_func::UNKNOWN_FUNC &&
                func_item->const_item())
6207
            {
6208 6209 6210
              // Skip any arguments since we will evaluate function instead
              DBUG_PRINT("info", ("Skip until end of arguments marker"));
              context->skip= func_item->argument_count();
6211 6212 6213
            }
            else
            {
6214 6215 6216 6217
              DBUG_PRINT("info", ("Found unsupported functional expression in BETWEEN|IN"));
              context->supported= FALSE;
              DBUG_VOID_RETURN;
              
6218 6219 6220
            }
          }
        }
6221 6222
        else
        {
6223 6224 6225
          // Non-supported BETWEEN|IN expression
          DBUG_PRINT("info", ("Found unexpected item of type %u in BETWEEN|IN",
                              item->type()));
6226
          context->supported= FALSE;
6227
          DBUG_VOID_RETURN;
6228
        }
6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250
        break;
      }
      default:
        context->supported= FALSE;
        break;
      }
      DBUG_VOID_RETURN;
    }
    else
    {
      Ndb_cond_stack *ndb_stack= context->stack_ptr;
      Ndb_cond *prev_cond= context->cond_ptr;
      Ndb_cond *curr_cond= context->cond_ptr= new Ndb_cond();
      if (!ndb_stack->ndb_cond)
        ndb_stack->ndb_cond= curr_cond;
      curr_cond->prev= prev_cond;
      if (prev_cond) prev_cond->next= curr_cond;
    // Check if we are rewriting some unsupported function call
      if (context->rewrite_stack)
      {
        Ndb_rewrite_context *rewrite_context= context->rewrite_stack;
        const Item_func *func_item= rewrite_context->func_item;
6251
        switch(func_item->functype()) {
6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280
        case(Item_func::BETWEEN): {
        /*
          Rewrite 
          <field>|<const> BETWEEN <const1>|<field1> AND <const2>|<field2>
          to <field>|<const> > <const1>|<field1> AND 
          <field>|<const> < <const2>|<field2>
          or actually in prefix format
          BEGIN(AND) GT(<field>|<const>, <const1>|<field1>), 
          LT(<field>|<const>, <const2>|<field2>), END()
        */
          if (rewrite_context->count == 2)
          {
            // Lower limit of BETWEEN
            DBUG_PRINT("info", ("GE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(Item_func::GE_FUNC, 2);
          }
          else if (rewrite_context->count == 3)
          {
            // Upper limit of BETWEEN
            DBUG_PRINT("info", ("LE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(Item_func::LE_FUNC, 2);
          }
          else
          {
            // Illegal BETWEEN expression
            DBUG_PRINT("info", ("Illegal BETWEEN expression"));
            context->supported= FALSE;
            DBUG_VOID_RETURN;
          }
6281 6282
          break;
        }
6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296
        case(Item_func::IN_FUNC): {
          /*
            Rewrite <field>|<const> IN(<const1>|<field1>, <const2>|<field2>,..)
            to <field>|<const> = <const1>|<field1> OR 
            <field> = <const2>|<field2> ...
            or actually in prefix format
            BEGIN(OR) EQ(<field>|<const>, <const1><field1>), 
            EQ(<field>|<const>, <const2>|<field2>), ... END()
            Each part of the disjunction is added for each call
            to ndb_serialize_cond and end of rewrite statement 
            is wrapped in end of ndb_serialize_cond
          */
          DBUG_PRINT("info", ("EQ_FUNC"));      
          curr_cond->ndb_item= new Ndb_item(Item_func::EQ_FUNC, 2);
6297 6298
          break;
        }
6299 6300
        default:
          context->supported= FALSE;
6301
        }
6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367
        // Handle left hand <field>|<const>
        context->rewrite_stack= NULL; // Disable rewrite mode
        context->expect_only(Item::FIELD_ITEM);
        context->expect_field_result(STRING_RESULT);
        context->expect_field_result(REAL_RESULT);
        context->expect_field_result(INT_RESULT);
        context->expect_field_result(DECIMAL_RESULT);
        context->expect(Item::INT_ITEM);
        context->expect(Item::STRING_ITEM);
        context->expect(Item::VARBIN_ITEM);
        context->expect(Item::FUNC_ITEM);
        ndb_serialize_cond(rewrite_context->left_hand_item, arg);
        context->skip= 0; // Any FUNC_ITEM expression has already been parsed
        context->rewrite_stack= rewrite_context; // Enable rewrite mode
        if (!context->supported)
          DBUG_VOID_RETURN;

        prev_cond= context->cond_ptr;
        curr_cond= context->cond_ptr= new Ndb_cond();
        prev_cond->next= curr_cond;
      }
      
      // Check for end of AND/OR expression
      if (!item)
      {
        // End marker for condition group
        DBUG_PRINT("info", ("End of condition group"));
        curr_cond->ndb_item= new Ndb_item(NDB_END_COND);
      }
      else
        switch(item->type()) {
        case(Item::FIELD_ITEM): {
          Item_field *field_item= (Item_field *) item;
          Field *field= field_item->field;
          enum_field_types type= field->type();
          /*
            Check that the field is part of the table of the handler
            instance and that we expect a field with of this result type.
          */
          if (context->table == field->table)
          {       
            const NDBTAB *tab= (const NDBTAB *) context->ndb_table;
            DBUG_PRINT("info", ("FIELD_ITEM"));
            DBUG_PRINT("info", ("table %s", tab->getName()));
            DBUG_PRINT("info", ("column %s", field->field_name));
            DBUG_PRINT("info", ("result type %d", field->result_type()));
            
            // Check that we are expecting a field and with the correct
            // result type
            if (context->expecting(Item::FIELD_ITEM) &&
                (context->expecting_field_result(field->result_type()) ||
                 // Date and year can be written as strings
                 ((type == MYSQL_TYPE_TIME ||
                   type == MYSQL_TYPE_DATE || 
                   type == MYSQL_TYPE_YEAR ||
                   type == MYSQL_TYPE_DATETIME)
                  ? context->expecting_field_result(STRING_RESULT) : true)) &&
                // Bit fields no yet supported in scan filter
                type != MYSQL_TYPE_BIT)
            {
              const NDBCOL *col= tab->getColumn(field->field_name);
              DBUG_ASSERT(col);
              curr_cond->ndb_item= new Ndb_item(field, col->getColumnNo());
              context->dont_expect(Item::FIELD_ITEM);
              context->expect_no_field_result();
              if (context->expect_mask)
6368
              {
6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388
                // We have not seen second argument yet
                if (type == MYSQL_TYPE_TIME ||
                    type == MYSQL_TYPE_DATE || 
                    type == MYSQL_TYPE_YEAR ||
                    type == MYSQL_TYPE_DATETIME)
                {
                  context->expect_only(Item::STRING_ITEM);
                  context->expect(Item::INT_ITEM);
                }
                else
                  switch(field->result_type()) {
                  case(STRING_RESULT):
                    // Expect char string or binary string
                    context->expect_only(Item::STRING_ITEM);
                    context->expect(Item::VARBIN_ITEM);
                    context->expect_collation(field_item->collation.collation);
                    break;
                  case(REAL_RESULT):
                    context->expect_only(Item::REAL_ITEM);
                    context->expect(Item::DECIMAL_ITEM);
6389
                    context->expect(Item::INT_ITEM);
6390 6391 6392 6393 6394 6395 6396 6397
                    break;
                  case(INT_RESULT):
                    context->expect_only(Item::INT_ITEM);
                    context->expect(Item::VARBIN_ITEM);
                    break;
                  case(DECIMAL_RESULT):
                    context->expect_only(Item::DECIMAL_ITEM);
                    context->expect(Item::REAL_ITEM);
6398
                    context->expect(Item::INT_ITEM);
6399 6400 6401 6402
                    break;
                  default:
                    break;
                  }    
6403 6404
              }
              else
6405 6406 6407 6408
              {
                // Expect another logical expression
                context->expect_only(Item::FUNC_ITEM);
                context->expect(Item::COND_ITEM);
6409 6410 6411 6412 6413 6414 6415
                // Check that field and string constant collations are the same
                if ((field->result_type() == STRING_RESULT) &&
                    !context->expecting_collation(item->collation.collation)
                    && type != MYSQL_TYPE_TIME
                    && type != MYSQL_TYPE_DATE
                    && type != MYSQL_TYPE_YEAR
                    && type != MYSQL_TYPE_DATETIME)
6416
                {
mskold@mysql.com's avatar
mskold@mysql.com committed
6417
                  DBUG_PRINT("info", ("Found non-matching collation %s",  
6418 6419
                                      item->collation.collation->name)); 
                  context->supported= FALSE;                
6420 6421
                }
              }
6422 6423
              break;
            }
6424 6425 6426 6427 6428
            else
            {
              DBUG_PRINT("info", ("Was not expecting field of type %u",
                                  field->result_type()));
              context->supported= FALSE;
6429
            }
6430
          }
6431
          else
6432 6433 6434 6435
          {
            DBUG_PRINT("info", ("Was not expecting field from table %s(%s)",
                                context->table->s->table_name, 
                                field->table->s->table_name));
6436
            context->supported= FALSE;
6437
          }
6438 6439
          break;
        }
6440 6441 6442 6443 6444 6445 6446
        case(Item::FUNC_ITEM): {
          Item_func *func_item= (Item_func *) item;
          // Check that we expect a function or functional expression here
          if (context->expecting(Item::FUNC_ITEM) || 
              func_item->functype() == Item_func::UNKNOWN_FUNC)
            context->expect_nothing();
          else
6447
          {
6448 6449 6450
            // Did not expect function here
            context->supported= FALSE;
            break;
6451
          }
6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468
          
          switch(func_item->functype()) {
          case(Item_func::EQ_FUNC): {
            DBUG_PRINT("info", ("EQ_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(), 
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
6469
          }
6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484
          case(Item_func::NE_FUNC): {
            DBUG_PRINT("info", ("NE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
6485
          }
6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596
          case(Item_func::LT_FUNC): {
            DBUG_PRINT("info", ("LT_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
          case(Item_func::LE_FUNC): {
            DBUG_PRINT("info", ("LE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
          case(Item_func::GE_FUNC): {
            DBUG_PRINT("info", ("GE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
          case(Item_func::GT_FUNC): {
            DBUG_PRINT("info", ("GT_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
          case(Item_func::LIKE_FUNC): {
            DBUG_PRINT("info", ("LIKE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect(Item::FUNC_ITEM);
            break;
          }
          case(Item_func::NOTLIKE_FUNC): {
            DBUG_PRINT("info", ("NOTLIKE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect(Item::FUNC_ITEM);
            break;
          }
          case(Item_func::ISNULL_FUNC): {
            DBUG_PRINT("info", ("ISNULL_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
          case(Item_func::ISNOTNULL_FUNC): {
            DBUG_PRINT("info", ("ISNOTNULL_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);     
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
          case(Item_func::NOT_FUNC): {
            DBUG_PRINT("info", ("NOT_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);     
            context->expect(Item::FUNC_ITEM);
6597
            context->expect(Item::COND_ITEM);
6598
            break;
6599
          }
6600 6601 6602 6603 6604 6605 6606 6607 6608
          case(Item_func::BETWEEN) : {
            DBUG_PRINT("info", ("BETWEEN, rewriting using AND"));
            Ndb_rewrite_context *rewrite_context= 
              new Ndb_rewrite_context(func_item);
            rewrite_context->next= context->rewrite_stack;
            context->rewrite_stack= rewrite_context;
            DBUG_PRINT("info", ("COND_AND_FUNC"));
            curr_cond->ndb_item= new Ndb_item(Item_func::COND_AND_FUNC, 
                                              func_item->argument_count() - 1);
6609
            context->expect_only(Item::FIELD_ITEM);
6610 6611 6612 6613 6614
            context->expect(Item::INT_ITEM);
            context->expect(Item::STRING_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FUNC_ITEM);
            break;
6615
          }
6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743
          case(Item_func::IN_FUNC) : {
            DBUG_PRINT("info", ("IN_FUNC, rewriting using OR"));
            Ndb_rewrite_context *rewrite_context= 
              new Ndb_rewrite_context(func_item);
            rewrite_context->next= context->rewrite_stack;
            context->rewrite_stack= rewrite_context;
            DBUG_PRINT("info", ("COND_OR_FUNC"));
            curr_cond->ndb_item= new Ndb_item(Item_func::COND_OR_FUNC, 
                                              func_item->argument_count() - 1);
            context->expect_only(Item::FIELD_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::STRING_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FUNC_ITEM);
            break;
          }
          case(Item_func::UNKNOWN_FUNC): {
            DBUG_PRINT("info", ("UNKNOWN_FUNC %s", 
                                func_item->const_item()?"const":""));  
            DBUG_PRINT("info", ("result type %d", func_item->result_type()));
            if (func_item->const_item())
              switch(func_item->result_type()) {
              case(STRING_RESULT): {
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::STRING_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); 
                if (context->expect_field_result_mask)
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(STRING_RESULT);
                  context->expect_collation(func_item->collation.collation);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                  // Check that string result have correct collation
                  if (!context->expecting_collation(item->collation.collation))
                  {
                    DBUG_PRINT("info", ("Found non-matching collation %s",  
                                        item->collation.collation->name));
                    context->supported= FALSE;
                  }
                }
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
              case(REAL_RESULT): {
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::REAL_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
                if (context->expect_field_result_mask) 
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(REAL_RESULT);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                }
                
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
              case(INT_RESULT): {
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::INT_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
                if (context->expect_field_result_mask) 
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(INT_RESULT);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                }
                
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
              case(DECIMAL_RESULT): {
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::DECIMAL_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
                if (context->expect_field_result_mask) 
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(DECIMAL_RESULT);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                }
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
              default:
                break;
              }
            else
              // Function does not return constant expression
              context->supported= FALSE;
            break;
          }
          default: {
            DBUG_PRINT("info", ("Found func_item of type %d", 
                                func_item->functype()));
            context->supported= FALSE;
6744
          }
6745 6746
          }
          break;
6747
        }
6748 6749 6750 6751
        case(Item::STRING_ITEM):
          DBUG_PRINT("info", ("STRING_ITEM")); 
          if (context->expecting(Item::STRING_ITEM)) 
          {
6752
#ifndef DBUG_OFF
6753 6754 6755 6756 6757 6758
            char buff[256];
            String str(buff,(uint32) sizeof(buff), system_charset_info);
            str.length(0);
            Item_string *string_item= (Item_string *) item;
            DBUG_PRINT("info", ("value \"%s\"", 
                                string_item->val_str(&str)->ptr()));
6759
#endif
6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::STRING_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);      
            if (context->expect_field_result_mask)
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(STRING_RESULT);
              context->expect_collation(item->collation.collation);
            }
            else 
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
              // Check that we are comparing with a field with same collation
              if (!context->expecting_collation(item->collation.collation))
              {
                DBUG_PRINT("info", ("Found non-matching collation %s",  
                                    item->collation.collation->name));
                context->supported= FALSE;
              }
            }
          }
          else
            context->supported= FALSE;
          break;
        case(Item::INT_ITEM): 
          DBUG_PRINT("info", ("INT_ITEM"));
          if (context->expecting(Item::INT_ITEM)) 
6790
          {
6791 6792 6793 6794 6795 6796 6797 6798 6799 6800
            Item_int *int_item= (Item_int *) item;      
            DBUG_PRINT("info", ("value %d", int_item->value));
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::INT_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
            if (context->expect_field_result_mask) 
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(INT_RESULT);
6801 6802
              context->expect_field_result(REAL_RESULT);
              context->expect_field_result(DECIMAL_RESULT);
6803 6804 6805 6806 6807 6808 6809
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
6810 6811
          }
          else
6812 6813 6814 6815 6816
            context->supported= FALSE;
          break;
        case(Item::REAL_ITEM):
          DBUG_PRINT("info", ("REAL_ITEM %s"));
          if (context->expecting(Item::REAL_ITEM)) 
6817
          {
6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834
            Item_float *float_item= (Item_float *) item;      
            DBUG_PRINT("info", ("value %f", float_item->value));
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::REAL_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
            if (context->expect_field_result_mask) 
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(REAL_RESULT);
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
6835
          }
6836 6837 6838 6839 6840 6841
          else
            context->supported= FALSE;
          break;
        case(Item::VARBIN_ITEM):
          DBUG_PRINT("info", ("VARBIN_ITEM"));
          if (context->expecting(Item::VARBIN_ITEM)) 
6842
          {
6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::VARBIN_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);      
            if (context->expect_field_result_mask)
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(STRING_RESULT);
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
6858 6859
          }
          else
6860 6861 6862 6863 6864
            context->supported= FALSE;
          break;
        case(Item::DECIMAL_ITEM):
          DBUG_PRINT("info", ("DECIMAL_ITEM %s"));
          if (context->expecting(Item::DECIMAL_ITEM)) 
6865
          {
6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883
            Item_decimal *decimal_item= (Item_decimal *) item;      
            DBUG_PRINT("info", ("value %f", decimal_item->val_real()));
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::DECIMAL_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
            if (context->expect_field_result_mask) 
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(REAL_RESULT);
              context->expect_field_result(DECIMAL_RESULT);
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
6884
          }
6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911
          else
            context->supported= FALSE;
          break;
        case(Item::COND_ITEM): {
          Item_cond *cond_item= (Item_cond *) item;
          
          if (context->expecting(Item::COND_ITEM))
            switch(cond_item->functype()) {
            case(Item_func::COND_AND_FUNC):
              DBUG_PRINT("info", ("COND_AND_FUNC"));
              curr_cond->ndb_item= new Ndb_item(cond_item->functype(),
                                                cond_item);      
              break;
            case(Item_func::COND_OR_FUNC):
              DBUG_PRINT("info", ("COND_OR_FUNC"));
              curr_cond->ndb_item= new Ndb_item(cond_item->functype(),
                                                cond_item);      
              break;
            default:
              DBUG_PRINT("info", ("COND_ITEM %d", cond_item->functype()));
              context->supported= FALSE;
              break;
            }
          else
            // Did not expect condition
            context->supported= FALSE;          
          break;
6912
        }
6913 6914
        default: {
          DBUG_PRINT("info", ("Found item of type %d", item->type()));
6915
          context->supported= FALSE;
6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933
        }
        }
      
      if (context->supported && context->rewrite_stack)
      {
        Ndb_rewrite_context *rewrite_context= context->rewrite_stack;
        if (rewrite_context->count == 
            rewrite_context->func_item->argument_count())
        {
          // Rewrite is done, wrap an END() at the en
          DBUG_PRINT("info", ("End of condition group"));
          prev_cond= curr_cond;
          curr_cond= context->cond_ptr= new Ndb_cond();
          prev_cond->next= curr_cond;
          curr_cond->ndb_item= new Ndb_item(NDB_END_COND);
          // Pop rewrite stack
          context->rewrite_stack= context->rewrite_stack->next;
        }
6934
      }
6935
    }
6936
  }
6937
 
6938 6939 6940 6941 6942 6943 6944 6945
  DBUG_VOID_RETURN;
}

bool
ha_ndbcluster::serialize_cond(const COND *cond, Ndb_cond_stack *ndb_cond)
{
  DBUG_ENTER("serialize_cond");
  Item *item= (Item *) cond;
6946
  Ndb_cond_traverse_context context(table, (void *)m_table, ndb_cond);
6947 6948 6949
  // Expect a logical expression
  context.expect(Item::FUNC_ITEM);
  context.expect(Item::COND_ITEM);
6950
  item->traverse_cond(&ndb_serialize_cond, (void *) &context, Item::PREFIX);
6951
  DBUG_PRINT("info", ("The pushed condition is %ssupported", (context.supported)?"":"not "));
6952

6953
  DBUG_RETURN(context.supported);
6954 6955
}

6956 6957
int
ha_ndbcluster::build_scan_filter_predicate(Ndb_cond * &cond, 
6958 6959
                                           NdbScanFilter *filter,
                                           bool negated)
6960 6961 6962 6963 6964 6965 6966
{
  DBUG_ENTER("build_scan_filter_predicate");  
  switch(cond->ndb_item->type) {
  case(NDB_FUNCTION): {
    if (!cond->next)
      break;
    Ndb_item *a= cond->next->ndb_item;
6967 6968 6969 6970 6971 6972 6973
    Ndb_item *b, *field, *value= NULL;
    switch(cond->ndb_item->argument_count()) {
    case(1):
      field= 
        (a->type == NDB_FIELD)? a : NULL;
      break;
    case(2):
6974
      if (!cond->next->next)
6975
        break;
6976 6977
      b= cond->next->next->ndb_item;
      value= 
6978 6979 6980
        (a->type == NDB_VALUE)? a
        : (b->type == NDB_VALUE)? b
        : NULL;
6981
      field= 
6982 6983 6984
        (a->type == NDB_FIELD)? a
        : (b->type == NDB_FIELD)? b
        : NULL;
6985
      break;
6986
    default:
6987 6988 6989 6990 6991 6992 6993
      break;
    }
    switch((negated) ? 
           Ndb_item::negate(cond->ndb_item->qualification.function_type)
           : cond->ndb_item->qualification.function_type)
    {
    case(Item_func::EQ_FUNC): {
6994
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
6995 6996
      // Save value in right format for the field type
      value->save_in_field(field);
6997
      DBUG_PRINT("info", ("Generating EQ filter"));
6998
      if (filter->cmp(NdbScanFilter::COND_EQ, 
6999 7000 7001 7002
                      field->get_field_no(),
                      field->get_val(),
                      field->pack_length()) == -1)
        DBUG_RETURN(1);
7003 7004
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7005 7006 7007
    }
    case(Item_func::NE_FUNC): {
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7008 7009
      // Save value in right format for the field type
      value->save_in_field(field);
7010
      DBUG_PRINT("info", ("Generating NE filter"));
7011
      if (filter->cmp(NdbScanFilter::COND_NE, 
7012 7013 7014 7015
                      field->get_field_no(),
                      field->get_val(),
                      field->pack_length()) == -1)
        DBUG_RETURN(1);
7016 7017
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7018 7019 7020
    }
    case(Item_func::LT_FUNC): {
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7021 7022
      // Save value in right format for the field type
      value->save_in_field(field);
7023
      if (a == field)
7024
      {
7025 7026 7027 7028 7029 7030
        DBUG_PRINT("info", ("Generating LT filter")); 
        if (filter->cmp(NdbScanFilter::COND_LT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7031
      }
7032
      else
7033
      {
7034 7035 7036 7037 7038 7039
        DBUG_PRINT("info", ("Generating GT filter")); 
        if (filter->cmp(NdbScanFilter::COND_GT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7040
      }
7041 7042
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7043 7044 7045
    }
    case(Item_func::LE_FUNC): {
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7046 7047
      // Save value in right format for the field type
      value->save_in_field(field);
7048
      if (a == field)
7049
      {
7050 7051 7052 7053 7054 7055
        DBUG_PRINT("info", ("Generating LE filter")); 
        if (filter->cmp(NdbScanFilter::COND_LE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);       
7056
      }
7057
      else
7058
      {
7059 7060 7061 7062 7063 7064
        DBUG_PRINT("info", ("Generating GE filter")); 
        if (filter->cmp(NdbScanFilter::COND_GE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7065
      }
7066 7067
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7068 7069 7070
    }
    case(Item_func::GE_FUNC): {
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7071 7072
      // Save value in right format for the field type
      value->save_in_field(field);
7073
      if (a == field)
7074
      {
7075 7076 7077 7078 7079 7080
        DBUG_PRINT("info", ("Generating GE filter")); 
        if (filter->cmp(NdbScanFilter::COND_GE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7081
      }
7082
      else
7083
      {
7084 7085 7086 7087 7088 7089
        DBUG_PRINT("info", ("Generating LE filter")); 
        if (filter->cmp(NdbScanFilter::COND_LE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7090
      }
7091 7092
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7093 7094 7095
    }
    case(Item_func::GT_FUNC): {
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7096 7097
      // Save value in right format for the field type
      value->save_in_field(field);
7098
      if (a == field)
7099
      {
7100 7101 7102 7103 7104 7105
        DBUG_PRINT("info", ("Generating GT filter"));
        if (filter->cmp(NdbScanFilter::COND_GT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7106
      }
7107
      else
7108
      {
7109 7110 7111 7112 7113 7114
        DBUG_PRINT("info", ("Generating LT filter"));
        if (filter->cmp(NdbScanFilter::COND_LT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7115
      }
7116 7117
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7118 7119 7120
    }
    case(Item_func::LIKE_FUNC): {
      if (!value || !field) break;
7121 7122 7123
      if ((value->qualification.value_type != Item::STRING_ITEM) &&
          (value->qualification.value_type != Item::VARBIN_ITEM))
          break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7124 7125 7126
      // Save value in right format for the field type
      value->save_in_field(field);
      DBUG_PRINT("info", ("Generating LIKE filter: like(%d,%s,%d)", 
7127 7128 7129 7130
                          field->get_field_no(), value->get_val(), 
                          value->pack_length()));
      if (filter->cmp(NdbScanFilter::COND_LIKE, 
                      field->get_field_no(),
7131 7132
                      value->get_val(),
                      value->pack_length()) == -1)
7133
        DBUG_RETURN(1);
7134 7135
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7136 7137 7138
    }
    case(Item_func::NOTLIKE_FUNC): {
      if (!value || !field) break;
7139 7140 7141
      if ((value->qualification.value_type != Item::STRING_ITEM) &&
          (value->qualification.value_type != Item::VARBIN_ITEM))
        break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7142 7143 7144
      // Save value in right format for the field type
      value->save_in_field(field);
      DBUG_PRINT("info", ("Generating NOTLIKE filter: notlike(%d,%s,%d)", 
7145 7146 7147 7148
                          field->get_field_no(), value->get_val(), 
                          value->pack_length()));
      if (filter->cmp(NdbScanFilter::COND_NOT_LIKE, 
                      field->get_field_no(),
7149 7150
                      value->get_val(),
                      value->pack_length()) == -1)
7151
        DBUG_RETURN(1);
7152 7153
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7154 7155
    }
    case(Item_func::ISNULL_FUNC):
7156 7157 7158 7159 7160
      if (!field)
        break;
      DBUG_PRINT("info", ("Generating ISNULL filter"));
      if (filter->isnull(field->get_field_no()) == -1)
        DBUG_RETURN(1);
7161 7162
      cond= cond->next->next;
      DBUG_RETURN(0);
7163
    case(Item_func::ISNOTNULL_FUNC): {
7164 7165 7166 7167 7168
      if (!field)
        break;
      DBUG_PRINT("info", ("Generating ISNOTNULL filter"));
      if (filter->isnotnull(field->get_field_no()) == -1)
        DBUG_RETURN(1);         
7169 7170
      cond= cond->next->next;
      DBUG_RETURN(0);
7171 7172 7173 7174 7175 7176 7177 7178 7179 7180
    }
    default:
      break;
    }
    break;
  }
  default:
    break;
  }
  DBUG_PRINT("info", ("Found illegal condition"));
7181
  DBUG_RETURN(1);
7182 7183
}

7184
int
7185
ha_ndbcluster::build_scan_filter_group(Ndb_cond* &cond, NdbScanFilter *filter)
7186
{
7187
  uint level=0;
7188
  bool negated= FALSE;
7189

7190
  DBUG_ENTER("build_scan_filter_group");
7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202
  do
  {
    if (!cond) DBUG_RETURN(1);
    switch(cond->ndb_item->type) {
    case(NDB_FUNCTION):
      switch(cond->ndb_item->qualification.function_type) {
      case(Item_func::COND_AND_FUNC): {
        level++;
        DBUG_PRINT("info", ("Generating %s group %u", (negated)?"NAND":"AND",
                            level));
        if ((negated) ? filter->begin(NdbScanFilter::NAND)
            : filter->begin(NdbScanFilter::AND) == -1)
7203
          DBUG_RETURN(1);
7204
        negated= FALSE;
7205 7206 7207 7208 7209 7210 7211 7212 7213 7214
        cond= cond->next;
        break;
      }
      case(Item_func::COND_OR_FUNC): {
        level++;
        DBUG_PRINT("info", ("Generating %s group %u", (negated)?"NOR":"OR",
                            level));
        if ((negated) ? filter->begin(NdbScanFilter::NOR)
            : filter->begin(NdbScanFilter::OR) == -1)
          DBUG_RETURN(1);
7215
        negated= FALSE;
7216 7217 7218 7219
        cond= cond->next;
        break;
      }
      case(Item_func::NOT_FUNC): {
7220
        DBUG_PRINT("info", ("Generating negated query"));
7221
        cond= cond->next;
7222
        negated= TRUE;
7223
        
7224 7225 7226 7227
        break;
      }
      default:
        if (build_scan_filter_predicate(cond, filter, negated))
7228
          DBUG_RETURN(1);
7229
        negated= FALSE;
7230 7231 7232 7233 7234 7235
        break;
      }
      break;
    case(NDB_END_COND):
      DBUG_PRINT("info", ("End of group %u", level));
      level--;
7236 7237
      if (cond) cond= cond->next;
      if (filter->end() == -1)
7238
        DBUG_RETURN(1);
7239 7240 7241
      if (!negated)
        break;
      // else fall through (NOT END is an illegal condition)
7242 7243
    default: {
      DBUG_PRINT("info", ("Illegal scan filter"));
7244
    }
7245
    }
7246
  }  while (level > 0 || negated);
7247
  
7248
  DBUG_RETURN(0);
7249 7250
}

7251 7252
int
ha_ndbcluster::build_scan_filter(Ndb_cond * &cond, NdbScanFilter *filter)
7253 7254 7255 7256
{
  bool simple_cond= TRUE;
  DBUG_ENTER("build_scan_filter");  

7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270
    switch(cond->ndb_item->type) {
    case(NDB_FUNCTION):
      switch(cond->ndb_item->qualification.function_type) {
      case(Item_func::COND_AND_FUNC):
      case(Item_func::COND_OR_FUNC):
        simple_cond= FALSE;
        break;
      default:
        break;
      }
      break;
    default:
      break;
    }
7271 7272 7273 7274 7275 7276
  if (simple_cond && filter->begin() == -1)
    DBUG_RETURN(1);
  if (build_scan_filter_group(cond, filter))
    DBUG_RETURN(1);
  if (simple_cond && filter->end() == -1)
    DBUG_RETURN(1);
7277

7278
  DBUG_RETURN(0);
7279 7280
}

7281
int
7282
ha_ndbcluster::generate_scan_filter(Ndb_cond_stack *ndb_cond_stack,
7283
                                    NdbScanOperation *op)
7284 7285 7286 7287
{
  DBUG_ENTER("generate_scan_filter");
  if (ndb_cond_stack)
  {
7288
    DBUG_PRINT("info", ("Generating scan filter"));
7289 7290 7291 7292 7293
    NdbScanFilter filter(op);
    bool multiple_cond= FALSE;
    // Wrap an AND group around multiple conditions
    if (ndb_cond_stack->next) {
      multiple_cond= TRUE;
7294
      if (filter.begin() == -1)
7295
        DBUG_RETURN(1); 
7296 7297
    }
    for (Ndb_cond_stack *stack= ndb_cond_stack; 
7298 7299
         (stack); 
         stack= stack->next)
7300
      {
7301
        Ndb_cond *cond= stack->ndb_cond;
7302

7303 7304 7305 7306 7307
        if (build_scan_filter(cond, &filter))
        {
          DBUG_PRINT("info", ("build_scan_filter failed"));
          DBUG_RETURN(1);
        }
7308
      }
7309 7310
    if (multiple_cond && filter.end() == -1)
      DBUG_RETURN(1);
7311 7312 7313 7314 7315 7316
  }
  else
  {  
    DBUG_PRINT("info", ("Empty stack"));
  }

7317
  DBUG_RETURN(0);
7318 7319
}

7320
#endif /* HAVE_NDBCLUSTER_DB */