ha_ndbcluster.cc 227 KB
Newer Older
1
/* Copyright (C) 2000-2003 MySQL AB
2 3 4 5 6 7 8 9 10 11 12 13 14

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
15
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 17 18 19 20 21 22
*/

/*
  This file defines the NDB Cluster handler: the interface between MySQL and
  NDB Cluster
*/

23
#ifdef USE_PRAGMA_IMPLEMENTATION
24
#pragma implementation				// gcc: Class implementation
25 26 27 28 29 30 31 32 33 34
#endif

#include "mysql_priv.h"

#ifdef HAVE_NDBCLUSTER_DB
#include <my_dir.h>
#include "ha_ndbcluster.h"
#include <ndbapi/NdbApi.hpp>
#include <ndbapi/NdbScanFilter.hpp>

35 36 37 38
// options from from mysqld.cc
extern my_bool opt_ndb_optimized_node_selection;
extern const char *opt_ndbcluster_connectstring;

39
// Default value for parallelism
40
static const int parallelism= 0;
41

42 43
// Default value for max number of transactions
// createable against NDB from this handler
44
static const int max_transactions= 2;
45

46 47
static const char *ha_ndb_ext=".ndb";

48 49 50 51
static int ndbcluster_close_connection(THD *thd);
static int ndbcluster_commit(THD *thd, bool all);
static int ndbcluster_rollback(THD *thd, bool all);

52
handlerton ndbcluster_hton = {
serg@serg.mylan's avatar
serg@serg.mylan committed
53
  "ndbcluster",
54 55 56 57
  SHOW_OPTION_YES,
  "Clustered, fault-tolerant, memory-based tables", 
  DB_TYPE_NDBCLUSTER,
  ndbcluster_init,
58 59 60 61 62 63 64 65 66
  0, /* slot */
  0, /* savepoint size */
  ndbcluster_close_connection,
  NULL, /* savepoint_set */
  NULL, /* savepoint_rollback */
  NULL, /* savepoint_release */
  ndbcluster_commit,
  ndbcluster_rollback,
  NULL, /* prepare */
67 68
  NULL, /* recover */
  NULL, /* commit_by_xid */
69
  NULL, /* rollback_by_xid */
70 71 72
  NULL, /* create_cursor_read_view */
  NULL, /* set_cursor_read_view */
  NULL, /* close_cursor_read_view */
73
  HTON_CAN_RECREATE
74 75
};

76
#define NDB_AUTO_INCREMENT_RETRIES 10
77

78 79
#define NDB_INVALID_SCHEMA_OBJECT 241

80
#define ERR_PRINT(err) \
81
  DBUG_PRINT("error", ("%d  message: %s", err.code, err.message))
82

83 84
#define ERR_RETURN(err)                  \
{                                        \
85
  const NdbError& tmp= err;              \
86
  ERR_PRINT(tmp);                        \
87
  DBUG_RETURN(ndb_to_mysql_error(&tmp)); \
88 89 90 91
}

// Typedefs for long names
typedef NdbDictionary::Column NDBCOL;
joreland@mysql.com's avatar
joreland@mysql.com committed
92
typedef NdbDictionary::Table NDBTAB;
93 94 95
typedef NdbDictionary::Index  NDBINDEX;
typedef NdbDictionary::Dictionary  NDBDICT;

96
bool ndbcluster_inited= FALSE;
97

98
static Ndb* g_ndb= NULL;
99
static Ndb_cluster_connection* g_ndb_cluster_connection= NULL;
100

101 102 103 104 105 106 107 108 109 110 111 112 113
// Handler synchronization
pthread_mutex_t ndbcluster_mutex;

// Table lock handling
static HASH ndbcluster_open_tables;

static byte *ndbcluster_get_key(NDB_SHARE *share,uint *length,
                                my_bool not_used __attribute__((unused)));
static NDB_SHARE *get_share(const char *table_name);
static void free_share(NDB_SHARE *share);

static int packfrm(const void *data, uint len, const void **pack_data, uint *pack_len);
static int unpackfrm(const void **data, uint *len,
114
                     const void* pack_data);
115

116
static int ndb_get_table_statistics(Ndb*, const char *, 
117
                                    struct Ndb_statistics *);
118

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
119 120 121 122
// Util thread variables
static pthread_t ndb_util_thread;
pthread_mutex_t LOCK_ndb_util_thread;
pthread_cond_t COND_ndb_util_thread;
123
pthread_handler_t ndb_util_thread_func(void *arg);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
124
ulong ndb_cache_check_time;
125

126 127 128 129
/*
  Dummy buffer to read zero pack_length fields
  which are mapped to 1 char
*/
130
static uint32 dummy_buf;
131

132 133 134 135 136 137 138 139 140 141 142
/*
  Stats that can be retrieved from ndb
*/

struct Ndb_statistics {
  Uint64 row_count;
  Uint64 commit_count;
  Uint64 row_size;
  Uint64 fragment_memory;
};

143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
/* Status variables shown with 'show status like 'Ndb%' */

static long ndb_cluster_node_id= 0;
static const char * ndb_connected_host= 0;
static long ndb_connected_port= 0;
static long ndb_number_of_replicas= 0;
static long ndb_number_of_storage_nodes= 0;

static int update_status_variables(Ndb_cluster_connection *c)
{
  ndb_cluster_node_id=         c->node_id();
  ndb_connected_port=          c->get_connected_port();
  ndb_connected_host=          c->get_connected_host();
  ndb_number_of_replicas=      0;
  ndb_number_of_storage_nodes= c->no_db_nodes();
  return 0;
}

struct show_var_st ndb_status_variables[]= {
  {"cluster_node_id",        (char*) &ndb_cluster_node_id,         SHOW_LONG},
163 164
  {"config_from_host",         (char*) &ndb_connected_host,      SHOW_CHAR_PTR},
  {"config_from_port",         (char*) &ndb_connected_port,          SHOW_LONG},
165 166 167 168 169
//  {"number_of_replicas",     (char*) &ndb_number_of_replicas,      SHOW_LONG},
  {"number_of_storage_nodes",(char*) &ndb_number_of_storage_nodes, SHOW_LONG},
  {NullS, NullS, SHOW_LONG}
};

170 171 172 173 174 175 176 177
/*
  Error handling functions
*/

struct err_code_mapping
{
  int ndb_err;
  int my_err;
178
  int show_warning;
179 180 181 182
};

static const err_code_mapping err_map[]= 
{
183 184
  { 626, HA_ERR_KEY_NOT_FOUND, 0 },
  { 630, HA_ERR_FOUND_DUPP_KEY, 0 },
185
  { 893, HA_ERR_FOUND_DUPP_KEY, 0 },
186 187 188
  { 721, HA_ERR_TABLE_EXIST, 1 },
  { 4244, HA_ERR_TABLE_EXIST, 1 },

189
  { 709, HA_ERR_NO_SUCH_TABLE, 0 },
190 191 192 193 194 195 196 197 198 199 200 201 202 203

  { 266, HA_ERR_LOCK_WAIT_TIMEOUT, 1 },
  { 274, HA_ERR_LOCK_WAIT_TIMEOUT, 1 },
  { 296, HA_ERR_LOCK_WAIT_TIMEOUT, 1 },
  { 297, HA_ERR_LOCK_WAIT_TIMEOUT, 1 },
  { 237, HA_ERR_LOCK_WAIT_TIMEOUT, 1 },

  { 623, HA_ERR_RECORD_FILE_FULL, 1 },
  { 624, HA_ERR_RECORD_FILE_FULL, 1 },
  { 625, HA_ERR_RECORD_FILE_FULL, 1 },
  { 826, HA_ERR_RECORD_FILE_FULL, 1 },
  { 827, HA_ERR_RECORD_FILE_FULL, 1 },
  { 832, HA_ERR_RECORD_FILE_FULL, 1 },

204 205
  { 284, HA_ERR_TABLE_DEF_CHANGED, 0 },

206 207 208
  { 0, 1, 0 },

  { -1, -1, 1 }
209 210 211 212 213 214
};


static int ndb_to_mysql_error(const NdbError *err)
{
  uint i;
215 216
  for (i=0; err_map[i].ndb_err != err->code && err_map[i].my_err != -1; i++);
  if (err_map[i].show_warning)
217
  {
218 219
    // Push the NDB error message as warning
    push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_ERROR,
220 221
                        ER_GET_ERRMSG, ER(ER_GET_ERRMSG),
                        err->code, err->message, "NDB");
222
  }
223 224
  if (err_map[i].my_err == -1)
    return err->code;
225 226 227 228
  return err_map[i].my_err;
}


229 230

inline
231 232
int execute_no_commit(ha_ndbcluster *h, NdbTransaction *trans,
		      bool force_release)
233
{
234
#ifdef NOT_USED
235
  int m_batch_execute= 0;
236
  if (m_batch_execute)
237
    return 0;
238
#endif
239
  h->release_completed_operations(trans, force_release);
240
  return trans->execute(NdbTransaction::NoCommit,
241 242
                        NdbTransaction::AbortOnError,
                        h->m_force_send);
243 244 245
}

inline
246
int execute_commit(ha_ndbcluster *h, NdbTransaction *trans)
247
{
248
#ifdef NOT_USED
249
  int m_batch_execute= 0;
250
  if (m_batch_execute)
251
    return 0;
252
#endif
253
  return trans->execute(NdbTransaction::Commit,
254 255
                        NdbTransaction::AbortOnError,
                        h->m_force_send);
256 257 258
}

inline
259
int execute_commit(THD *thd, NdbTransaction *trans)
260 261
{
#ifdef NOT_USED
262
  int m_batch_execute= 0;
263 264 265
  if (m_batch_execute)
    return 0;
#endif
266
  return trans->execute(NdbTransaction::Commit,
267 268
                        NdbTransaction::AbortOnError,
                        thd->variables.ndb_force_send);
269 270 271
}

inline
272 273
int execute_no_commit_ie(ha_ndbcluster *h, NdbTransaction *trans,
			 bool force_release)
274
{
275
#ifdef NOT_USED
276
  int m_batch_execute= 0;
277
  if (m_batch_execute)
278
    return 0;
279
#endif
280
  h->release_completed_operations(trans, force_release);
281
  return trans->execute(NdbTransaction::NoCommit,
282 283
                        NdbTransaction::AO_IgnoreError,
                        h->m_force_send);
284 285
}

286 287 288
/*
  Place holder for ha_ndbcluster thread specific data
*/
289 290
Thd_ndb::Thd_ndb()
{
291
  ndb= new Ndb(g_ndb_cluster_connection, "");
292 293
  lock_count= 0;
  count= 0;
294 295
  all= NULL;
  stmt= NULL;
296
  error= 0;
297
  query_state&= NDB_QUERY_NORMAL;
298 299 300 301
}

Thd_ndb::~Thd_ndb()
{
302
  if (ndb)
303 304
  {
#ifndef DBUG_OFF
305 306
    Ndb::Free_list_usage tmp;
    tmp.m_name= 0;
307 308 309 310 311 312 313 314 315 316
    while (ndb->get_free_list_usage(&tmp))
    {
      uint leaked= (uint) tmp.m_created - tmp.m_free;
      if (leaked)
        fprintf(stderr, "NDB: Found %u %s%s that %s not been released\n",
                leaked, tmp.m_name,
                (leaked == 1)?"":"'s",
                (leaked == 1)?"has":"have");
    }
#endif
317
    delete ndb;
318
    ndb= NULL;
319
  }
320
  changed_tables.empty();
321 322
}

323 324 325 326 327 328 329 330
inline
Thd_ndb *
get_thd_ndb(THD *thd) { return (Thd_ndb *) thd->ha_data[ndbcluster_hton.slot]; }

inline
void
set_thd_ndb(THD *thd, Thd_ndb *thd_ndb) { thd->ha_data[ndbcluster_hton.slot]= thd_ndb; }

331 332 333
inline
Ndb *ha_ndbcluster::get_ndb()
{
334
  return get_thd_ndb(current_thd)->ndb;
335 336 337 338 339 340
}

/*
 * manage uncommitted insert/deletes during transactio to get records correct
 */

341
struct Ndb_local_table_statistics {
342
  int no_uncommitted_rows_count;
343
  ulong last_count;
344 345 346
  ha_rows records;
};

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
347 348 349
void ha_ndbcluster::set_rec_per_key()
{
  DBUG_ENTER("ha_ndbcluster::get_status_const");
350
  for (uint i=0 ; i < table->s->keys ; i++)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
351 352 353 354 355 356
  {
    table->key_info[i].rec_per_key[table->key_info[i].key_parts-1]= 1;
  }
  DBUG_VOID_RETURN;
}

357 358
void ha_ndbcluster::records_update()
{
359 360
  if (m_ha_not_exact_count)
    return;
361
  DBUG_ENTER("ha_ndbcluster::records_update");
362 363
  struct Ndb_local_table_statistics *info= 
    (struct Ndb_local_table_statistics *)m_table_info;
364
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
365 366
                      ((const NDBTAB *)m_table)->getTableId(),
                      info->no_uncommitted_rows_count));
367
  //  if (info->records == ~(ha_rows)0)
368
  {
369
    Ndb *ndb= get_ndb();
370
    struct Ndb_statistics stat;
371
    ndb->setDatabaseName(m_dbname);
372
    if (ndb_get_table_statistics(ndb, m_tabname, &stat) == 0){
373 374 375
      mean_rec_length= stat.row_size;
      data_file_length= stat.fragment_memory;
      info->records= stat.row_count;
376 377
    }
  }
378 379
  {
    THD *thd= current_thd;
380
    if (get_thd_ndb(thd)->error)
381 382
      info->no_uncommitted_rows_count= 0;
  }
383 384 385 386
  records= info->records+ info->no_uncommitted_rows_count;
  DBUG_VOID_RETURN;
}

387 388
void ha_ndbcluster::no_uncommitted_rows_execute_failure()
{
389 390
  if (m_ha_not_exact_count)
    return;
391
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_execute_failure");
392
  get_thd_ndb(current_thd)->error= 1;
393 394 395
  DBUG_VOID_RETURN;
}

396 397
void ha_ndbcluster::no_uncommitted_rows_init(THD *thd)
{
398 399
  if (m_ha_not_exact_count)
    return;
400
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_init");
401 402
  struct Ndb_local_table_statistics *info= 
    (struct Ndb_local_table_statistics *)m_table_info;
403
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
404
  if (info->last_count != thd_ndb->count)
405
  {
406
    info->last_count= thd_ndb->count;
407 408 409
    info->no_uncommitted_rows_count= 0;
    info->records= ~(ha_rows)0;
    DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
410 411
                        ((const NDBTAB *)m_table)->getTableId(),
                        info->no_uncommitted_rows_count));
412 413 414 415 416 417
  }
  DBUG_VOID_RETURN;
}

void ha_ndbcluster::no_uncommitted_rows_update(int c)
{
418 419
  if (m_ha_not_exact_count)
    return;
420
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_update");
421 422
  struct Ndb_local_table_statistics *info=
    (struct Ndb_local_table_statistics *)m_table_info;
423 424
  info->no_uncommitted_rows_count+= c;
  DBUG_PRINT("info", ("id=%d, no_uncommitted_rows_count=%d",
425 426
                      ((const NDBTAB *)m_table)->getTableId(),
                      info->no_uncommitted_rows_count));
427 428 429 430 431
  DBUG_VOID_RETURN;
}

void ha_ndbcluster::no_uncommitted_rows_reset(THD *thd)
{
432 433
  if (m_ha_not_exact_count)
    return;
434
  DBUG_ENTER("ha_ndbcluster::no_uncommitted_rows_reset");
435 436 437
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  thd_ndb->count++;
  thd_ndb->error= 0;
438 439 440
  DBUG_VOID_RETURN;
}

441 442
/*
  Take care of the error that occured in NDB
443

444
  RETURN
445
    0   No error
446 447 448
    #   The mapped error code
*/

449
void ha_ndbcluster::invalidate_dictionary_cache(bool global)
450 451
{
  NDBDICT *dict= get_ndb()->getDictionary();
452
  DBUG_ENTER("invalidate_dictionary_cache");
453
  DBUG_PRINT("info", ("invalidating %s", m_tabname));
454

455
  if (global)
456
  {
457 458 459 460
    const NDBTAB *tab= dict->getTable(m_tabname);
    if (!tab)
      DBUG_VOID_RETURN;
    if (tab->getObjectStatus() == NdbDictionary::Object::Invalid)
461 462 463 464 465 466 467 468
    {
      // Global cache has already been invalidated
      dict->removeCachedTable(m_tabname);
      global= FALSE;
    }
    else
      dict->invalidateTable(m_tabname);
  }
469 470
  else
    dict->removeCachedTable(m_tabname);
471
  table->s->version=0L;			/* Free when thread is ready */
472
  /* Invalidate indexes */
473
  for (uint i= 0; i < table->s->keys; i++)
474 475 476 477 478
  {
    NDBINDEX *index = (NDBINDEX *) m_index[i].index;
    NDBINDEX *unique_index = (NDBINDEX *) m_index[i].unique_index;
    NDB_INDEX_TYPE idx_type= m_index[i].type;

479 480 481
    switch (idx_type) {
    case PRIMARY_KEY_ORDERED_INDEX:
    case ORDERED_INDEX:
482 483 484 485
      if (global)
        dict->invalidateIndex(index->getName(), m_tabname);
      else
        dict->removeCachedIndex(index->getName(), m_tabname);
serg@serg.mylan's avatar
serg@serg.mylan committed
486
      break;
487
    case UNIQUE_ORDERED_INDEX:
488 489 490 491
      if (global)
        dict->invalidateIndex(index->getName(), m_tabname);
      else
        dict->removeCachedIndex(index->getName(), m_tabname);
492
    case UNIQUE_INDEX:
493 494 495 496
      if (global)
        dict->invalidateIndex(unique_index->getName(), m_tabname);
      else
        dict->removeCachedIndex(unique_index->getName(), m_tabname);
497
      break;
498 499
    case PRIMARY_KEY_INDEX:
    case UNDEFINED_INDEX:
500 501 502
      break;
    }
  }
503
  DBUG_VOID_RETURN;
504
}
505

506
int ha_ndbcluster::ndb_err(NdbTransaction *trans)
507
{
508
  int res;
509
  NdbError err= trans->getNdbError();
510 511 512 513 514
  DBUG_ENTER("ndb_err");
  
  ERR_PRINT(err);
  switch (err.classification) {
  case NdbError::SchemaError:
515
  {
516 517 518 519 520 521 522
    /* Close other open handlers not used by any thread */
    TABLE_LIST table_list;
    bzero((char*) &table_list,sizeof(table_list));
    table_list.db= m_dbname;
    table_list.alias= table_list.table_name= m_tabname;
    close_cached_tables(current_thd, 0, &table_list);

523 524
    invalidate_dictionary_cache(TRUE);

525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
    if (err.code==284)
    {
      /*
         Check if the table is _really_ gone or if the table has
         been alterend and thus changed table id
       */
      NDBDICT *dict= get_ndb()->getDictionary();
      DBUG_PRINT("info", ("Check if table %s is really gone", m_tabname));
      if (!(dict->getTable(m_tabname)))
      {
        err= dict->getNdbError();
        DBUG_PRINT("info", ("Table not found, error: %d", err.code));
        if (err.code != 709)
          DBUG_RETURN(1);
      }
540
      DBUG_PRINT("info", ("Table exists but must have changed"));
541
    }
542
    break;
543
  }
544 545 546
  default:
    break;
  }
547 548
  res= ndb_to_mysql_error(&err);
  DBUG_PRINT("info", ("transformed ndbcluster error %d to mysql error %d", 
549
                      err.code, res));
550
  if (res == HA_ERR_FOUND_DUPP_KEY)
551 552
  {
    if (m_rows_to_insert == 1)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
553
      m_dupkey= table->s->primary_key;
554
    else
monty@mishka.local's avatar
monty@mishka.local committed
555 556
    {
      /* We are batching inserts, offending key is not available */
557
      m_dupkey= (uint) -1;
monty@mishka.local's avatar
monty@mishka.local committed
558
    }
559
  }
560
  DBUG_RETURN(res);
561 562 563
}


564
/*
565
  Override the default get_error_message in order to add the 
566 567 568
  error message of NDB 
 */

569
bool ha_ndbcluster::get_error_message(int error, 
570
                                      String *buf)
571
{
572
  DBUG_ENTER("ha_ndbcluster::get_error_message");
573
  DBUG_PRINT("enter", ("error: %d", error));
574

575
  Ndb *ndb= get_ndb();
576
  if (!ndb)
577
    DBUG_RETURN(FALSE);
578

579
  const NdbError err= ndb->getNdbError(error);
580 581 582 583
  bool temporary= err.status==NdbError::TemporaryError;
  buf->set(err.message, strlen(err.message), &my_charset_bin);
  DBUG_PRINT("exit", ("message: %s, temporary: %d", buf->ptr(), temporary));
  DBUG_RETURN(temporary);
584 585 586
}


tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
587
#ifndef DBUG_OFF
pekka@mysql.com's avatar
pekka@mysql.com committed
588 589 590 591
/*
  Check if type is supported by NDB.
*/

tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
592
static bool ndb_supported_type(enum_field_types type)
pekka@mysql.com's avatar
pekka@mysql.com committed
593 594
{
  switch (type) {
pekka@mysql.com's avatar
pekka@mysql.com committed
595 596 597 598 599 600 601
  case MYSQL_TYPE_TINY:        
  case MYSQL_TYPE_SHORT:
  case MYSQL_TYPE_LONG:
  case MYSQL_TYPE_INT24:       
  case MYSQL_TYPE_LONGLONG:
  case MYSQL_TYPE_FLOAT:
  case MYSQL_TYPE_DOUBLE:
602 603
  case MYSQL_TYPE_DECIMAL:    
  case MYSQL_TYPE_NEWDECIMAL:
pekka@mysql.com's avatar
pekka@mysql.com committed
604 605 606 607 608 609 610 611
  case MYSQL_TYPE_TIMESTAMP:
  case MYSQL_TYPE_DATETIME:    
  case MYSQL_TYPE_DATE:
  case MYSQL_TYPE_NEWDATE:
  case MYSQL_TYPE_TIME:        
  case MYSQL_TYPE_YEAR:        
  case MYSQL_TYPE_STRING:      
  case MYSQL_TYPE_VAR_STRING:
pekka@mysql.com's avatar
pekka@mysql.com committed
612
  case MYSQL_TYPE_VARCHAR:
pekka@mysql.com's avatar
pekka@mysql.com committed
613 614 615 616 617 618
  case MYSQL_TYPE_TINY_BLOB:
  case MYSQL_TYPE_BLOB:    
  case MYSQL_TYPE_MEDIUM_BLOB:   
  case MYSQL_TYPE_LONG_BLOB:  
  case MYSQL_TYPE_ENUM:
  case MYSQL_TYPE_SET:         
619
  case MYSQL_TYPE_BIT:
620
  case MYSQL_TYPE_GEOMETRY:
621
    return TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
622
  case MYSQL_TYPE_NULL:   
pekka@mysql.com's avatar
pekka@mysql.com committed
623
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
624
  }
625
  return FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
626
}
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
627
#endif /* !DBUG_OFF */
pekka@mysql.com's avatar
pekka@mysql.com committed
628 629


630 631 632 633 634
/*
  Instruct NDB to set the value of the hidden primary key
*/

bool ha_ndbcluster::set_hidden_key(NdbOperation *ndb_op,
635
                                   uint fieldnr, const byte *field_ptr)
636 637 638
{
  DBUG_ENTER("set_hidden_key");
  DBUG_RETURN(ndb_op->equal(fieldnr, (char*)field_ptr,
639
                            NDB_HIDDEN_PRIMARY_KEY_LENGTH) != 0);
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656
}


/*
  Instruct NDB to set the value of one primary key attribute
*/

int ha_ndbcluster::set_ndb_key(NdbOperation *ndb_op, Field *field,
                               uint fieldnr, const byte *field_ptr)
{
  uint32 pack_len= field->pack_length();
  DBUG_ENTER("set_ndb_key");
  DBUG_PRINT("enter", ("%d: %s, ndb_type: %u, len=%d", 
                       fieldnr, field->field_name, field->type(),
                       pack_len));
  DBUG_DUMP("key", (char*)field_ptr, pack_len);
  
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
657 658 659 660
  DBUG_ASSERT(ndb_supported_type(field->type()));
  DBUG_ASSERT(! (field->flags & BLOB_FLAG));
  // Common implementation for most field types
  DBUG_RETURN(ndb_op->equal(fieldnr, (char*) field_ptr, pack_len) != 0);
661 662 663 664 665 666 667 668
}


/*
 Instruct NDB to set the value of one attribute
*/

int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field, 
669
                                 uint fieldnr, bool *set_blob_value)
670 671 672 673 674 675 676 677
{
  const byte* field_ptr= field->ptr;
  uint32 pack_len=  field->pack_length();
  DBUG_ENTER("set_ndb_value");
  DBUG_PRINT("enter", ("%d: %s, type: %u, len=%d, is_null=%s", 
                       fieldnr, field->field_name, field->type(), 
                       pack_len, field->is_null()?"Y":"N"));
  DBUG_DUMP("value", (char*) field_ptr, pack_len);
pekka@mysql.com's avatar
pekka@mysql.com committed
678

tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
679
  DBUG_ASSERT(ndb_supported_type(field->type()));
680
  {
681
    // ndb currently does not support size 0
682
    uint32 empty_field;
683 684
    if (pack_len == 0)
    {
685 686 687
      pack_len= sizeof(empty_field);
      field_ptr= (byte *)&empty_field;
      if (field->is_null())
688
        empty_field= 0;
689
      else
690
        empty_field= 1;
691
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
692 693
    if (! (field->flags & BLOB_FLAG))
    {
694 695
      if (field->type() != MYSQL_TYPE_BIT)
      {
696 697 698 699 700 701 702
        if (field->is_null())
          // Set value to NULL
          DBUG_RETURN((ndb_op->setValue(fieldnr, 
                                        (char*)NULL, pack_len) != 0));
        // Common implementation for most field types
        DBUG_RETURN(ndb_op->setValue(fieldnr, 
                                     (char*)field_ptr, pack_len) != 0);
703 704 705
      }
      else // if (field->type() == MYSQL_TYPE_BIT)
      {
706
        longlong bits= field->val_int();
707
 
708 709
        // Round up bit field length to nearest word boundry
        pack_len= ((pack_len + 3) >> 2) << 2;
710 711 712 713 714
        DBUG_ASSERT(pack_len <= 8);
        if (field->is_null())
          // Set value to NULL
          DBUG_RETURN((ndb_op->setValue(fieldnr, (char*)NULL, pack_len) != 0));
        DBUG_PRINT("info", ("bit field"));
715
        DBUG_DUMP("value", (char*)&bits, pack_len);
716
#ifdef WORDS_BIGENDIAN
717 718 719 720 721
        if (pack_len < 5)
        {
          DBUG_RETURN(ndb_op->setValue(fieldnr, 
                                       ((char*)&bits)+4, pack_len) != 0);
        }
722
#endif
723
        DBUG_RETURN(ndb_op->setValue(fieldnr, (char*)&bits, pack_len) != 0);
724
      }
pekka@mysql.com's avatar
pekka@mysql.com committed
725 726
    }
    // Blob type
727
    NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr);
pekka@mysql.com's avatar
pekka@mysql.com committed
728 729 730 731 732 733 734 735 736 737 738 739
    if (ndb_blob != NULL)
    {
      if (field->is_null())
        DBUG_RETURN(ndb_blob->setNull() != 0);

      Field_blob *field_blob= (Field_blob*)field;

      // Get length and pointer to data
      uint32 blob_len= field_blob->get_length(field_ptr);
      char* blob_ptr= NULL;
      field_blob->get_ptr(&blob_ptr);

740 741 742
      // Looks like NULL ptr signals length 0 blob
      if (blob_ptr == NULL) {
        DBUG_ASSERT(blob_len == 0);
743
        blob_ptr= (char*)"";
744
      }
pekka@mysql.com's avatar
pekka@mysql.com committed
745

elliot@mysql.com's avatar
elliot@mysql.com committed
746 747
      DBUG_PRINT("value", ("set blob ptr=%p len=%u",
                           blob_ptr, blob_len));
pekka@mysql.com's avatar
pekka@mysql.com committed
748 749
      DBUG_DUMP("value", (char*)blob_ptr, min(blob_len, 26));

750
      if (set_blob_value)
751
        *set_blob_value= TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
752 753 754 755
      // No callback needed to write value
      DBUG_RETURN(ndb_blob->setValue(blob_ptr, blob_len) != 0);
    }
    DBUG_RETURN(1);
756
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
}


/*
  Callback to read all blob values.
  - not done in unpack_record because unpack_record is valid
    after execute(Commit) but reading blobs is not
  - may only generate read operations; they have to be executed
    somewhere before the data is available
  - due to single buffer for all blobs, we let the last blob
    process all blobs (last so that all are active)
  - null bit is still set in unpack_record
  - TODO allocate blob part aligned buffers
*/

772
NdbBlob::ActiveHook g_get_ndb_blobs_value;
pekka@mysql.com's avatar
pekka@mysql.com committed
773

774
int g_get_ndb_blobs_value(NdbBlob *ndb_blob, void *arg)
pekka@mysql.com's avatar
pekka@mysql.com committed
775
{
776
  DBUG_ENTER("g_get_ndb_blobs_value");
pekka@mysql.com's avatar
pekka@mysql.com committed
777 778 779
  if (ndb_blob->blobsNextBlob() != NULL)
    DBUG_RETURN(0);
  ha_ndbcluster *ha= (ha_ndbcluster *)arg;
780
  DBUG_RETURN(ha->get_ndb_blobs_value(ndb_blob, ha->m_blobs_offset));
pekka@mysql.com's avatar
pekka@mysql.com committed
781 782
}

783 784
int ha_ndbcluster::get_ndb_blobs_value(NdbBlob *last_ndb_blob,
				       my_ptrdiff_t ptrdiff)
pekka@mysql.com's avatar
pekka@mysql.com committed
785 786 787 788 789 790 791 792
{
  DBUG_ENTER("get_ndb_blobs_value");

  // Field has no field number so cannot use TABLE blob_field
  // Loop twice, first only counting total buffer size
  for (int loop= 0; loop <= 1; loop++)
  {
    uint32 offset= 0;
793
    for (uint i= 0; i < table->s->fields; i++)
pekka@mysql.com's avatar
pekka@mysql.com committed
794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
    {
      Field *field= table->field[i];
      NdbValue value= m_value[i];
      if (value.ptr != NULL && (field->flags & BLOB_FLAG))
      {
        Field_blob *field_blob= (Field_blob *)field;
        NdbBlob *ndb_blob= value.blob;
        Uint64 blob_len= 0;
        if (ndb_blob->getLength(blob_len) != 0)
          DBUG_RETURN(-1);
        // Align to Uint64
        uint32 blob_size= blob_len;
        if (blob_size % 8 != 0)
          blob_size+= 8 - blob_size % 8;
        if (loop == 1)
        {
810
          char *buf= m_blobs_buffer + offset;
pekka@mysql.com's avatar
pekka@mysql.com committed
811 812
          uint32 len= 0xffffffff;  // Max uint32
          DBUG_PRINT("value", ("read blob ptr=%x len=%u",
joreland@mysql.com's avatar
joreland@mysql.com committed
813
                               (UintPtr)buf, (uint)blob_len));
pekka@mysql.com's avatar
pekka@mysql.com committed
814 815 816
          if (ndb_blob->readData(buf, len) != 0)
            DBUG_RETURN(-1);
          DBUG_ASSERT(len == blob_len);
817 818
          // Ugly hack assumes only ptr needs to be changed
          field_blob->ptr+= ptrdiff;
pekka@mysql.com's avatar
pekka@mysql.com committed
819
          field_blob->set_ptr(len, buf);
820
          field_blob->ptr-= ptrdiff;
pekka@mysql.com's avatar
pekka@mysql.com committed
821 822 823 824
        }
        offset+= blob_size;
      }
    }
825
    if (loop == 0 && offset > m_blobs_buffer_size)
pekka@mysql.com's avatar
pekka@mysql.com committed
826
    {
827 828
      my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
      m_blobs_buffer_size= 0;
pekka@mysql.com's avatar
pekka@mysql.com committed
829
      DBUG_PRINT("value", ("allocate blobs buffer size %u", offset));
830 831
      m_blobs_buffer= my_malloc(offset, MYF(MY_WME));
      if (m_blobs_buffer == NULL)
pekka@mysql.com's avatar
pekka@mysql.com committed
832
        DBUG_RETURN(-1);
833
      m_blobs_buffer_size= offset;
pekka@mysql.com's avatar
pekka@mysql.com committed
834
    }
835
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
836
  DBUG_RETURN(0);
837 838 839 840 841
}


/*
  Instruct NDB to fetch one field
pekka@mysql.com's avatar
pekka@mysql.com committed
842 843
  - data is read directly into buffer provided by field
    if field is NULL, data is read into memory provided by NDBAPI
844 845
*/

pekka@mysql.com's avatar
pekka@mysql.com committed
846
int ha_ndbcluster::get_ndb_value(NdbOperation *ndb_op, Field *field,
847
                                 uint fieldnr, byte* buf)
848 849
{
  DBUG_ENTER("get_ndb_value");
pekka@mysql.com's avatar
pekka@mysql.com committed
850 851 852 853 854
  DBUG_PRINT("enter", ("fieldnr: %d flags: %o", fieldnr,
                       (int)(field != NULL ? field->flags : 0)));

  if (field != NULL)
  {
tulin@dl145c.mysql.com's avatar
tulin@dl145c.mysql.com committed
855 856
      DBUG_ASSERT(buf);
      DBUG_ASSERT(ndb_supported_type(field->type()));
pekka@mysql.com's avatar
pekka@mysql.com committed
857 858
      DBUG_ASSERT(field->ptr != NULL);
      if (! (field->flags & BLOB_FLAG))
859
      { 
860 861
        if (field->type() != MYSQL_TYPE_BIT)
        {
862 863 864 865 866 867 868 869
          byte *field_buf;
          if (field->pack_length() != 0)
            field_buf= buf + (field->ptr - table->record[0]);
          else
            field_buf= (byte *)&dummy_buf;
          m_value[fieldnr].rec= ndb_op->getValue(fieldnr, 
                                                 field_buf);
        }
870 871 872 873
        else // if (field->type() == MYSQL_TYPE_BIT)
        {
          m_value[fieldnr].rec= ndb_op->getValue(fieldnr);
        }
pekka@mysql.com's avatar
pekka@mysql.com committed
874 875 876 877 878 879 880 881 882
        DBUG_RETURN(m_value[fieldnr].rec == NULL);
      }

      // Blob type
      NdbBlob *ndb_blob= ndb_op->getBlobHandle(fieldnr);
      m_value[fieldnr].blob= ndb_blob;
      if (ndb_blob != NULL)
      {
        // Set callback
883
	m_blobs_offset= buf - (byte*) table->record[0];
pekka@mysql.com's avatar
pekka@mysql.com committed
884
        void *arg= (void *)this;
885
        DBUG_RETURN(ndb_blob->setActiveHook(g_get_ndb_blobs_value, arg) != 0);
pekka@mysql.com's avatar
pekka@mysql.com committed
886 887 888 889 890
      }
      DBUG_RETURN(1);
  }

  // Used for hidden key only
891
  m_value[fieldnr].rec= ndb_op->getValue(fieldnr, m_ref);
pekka@mysql.com's avatar
pekka@mysql.com committed
892 893 894 895 896 897 898 899 900
  DBUG_RETURN(m_value[fieldnr].rec == NULL);
}


/*
  Check if any set or get of blob value in current query.
*/
bool ha_ndbcluster::uses_blob_value(bool all_fields)
{
901
  if (table->s->blob_fields == 0)
902
    return FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
903
  if (all_fields)
904
    return TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
905
  {
906
    uint no_fields= table->s->fields;
pekka@mysql.com's avatar
pekka@mysql.com committed
907
    int i;
908
    THD *thd= current_thd;
pekka@mysql.com's avatar
pekka@mysql.com committed
909 910 911 912 913 914
    // They always put blobs at the end..
    for (i= no_fields - 1; i >= 0; i--)
    {
      Field *field= table->field[i];
      if (thd->query_id == field->query_id)
      {
915
        return TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
916 917 918
      }
    }
  }
919
  return FALSE;
920 921 922 923 924 925 926 927 928 929 930 931 932
}


/*
  Get metadata for this table from NDB 

  IMPLEMENTATION
    - check that frm-file on disk is equal to frm-file
      of table accessed in NDB
*/

int ha_ndbcluster::get_metadata(const char *path)
{
933 934
  Ndb *ndb= get_ndb();
  NDBDICT *dict= ndb->getDictionary();
935 936
  const NDBTAB *tab;
  int error;
937
  bool invalidating_ndb_table= FALSE;
938

939 940 941
  DBUG_ENTER("get_metadata");
  DBUG_PRINT("enter", ("m_tabname: %s, path: %s", m_tabname, path));

942 943 944 945 946 947
  do {
    const void *data, *pack_data;
    uint length, pack_length;

    if (!(tab= dict->getTable(m_tabname)))
      ERR_RETURN(dict->getNdbError());
948
    // Check if thread has stale local cache
949 950 951 952 953 954 955
    if (tab->getObjectStatus() == NdbDictionary::Object::Invalid)
    {
      invalidate_dictionary_cache(FALSE);
      if (!(tab= dict->getTable(m_tabname)))
         ERR_RETURN(dict->getNdbError());
      DBUG_PRINT("info", ("Table schema version: %d", tab->getObjectVersion()));
    }
956 957 958 959 960
    /*
      Compare FrmData in NDB with frm file from disk.
    */
    error= 0;
    if (readfrm(path, &data, &length) ||
961
        packfrm(data, length, &pack_data, &pack_length))
962 963 964 965 966
    {
      my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
      my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
      DBUG_RETURN(1);
    }
967
    
968
    if ((pack_length != tab->getFrmLength()) || 
969
        (memcmp(pack_data, tab->getFrmData(), pack_length)))
970 971 972
    {
      if (!invalidating_ndb_table)
      {
973
        DBUG_PRINT("info", ("Invalidating table"));
974
        invalidate_dictionary_cache(TRUE);
975
        invalidating_ndb_table= TRUE;
976 977 978
      }
      else
      {
979 980 981 982 983 984 985 986
        DBUG_PRINT("error", 
                   ("metadata, pack_length: %d getFrmLength: %d memcmp: %d", 
                    pack_length, tab->getFrmLength(),
                    memcmp(pack_data, tab->getFrmData(), pack_length)));      
        DBUG_DUMP("pack_data", (char*)pack_data, pack_length);
        DBUG_DUMP("frm", (char*)tab->getFrmData(), tab->getFrmLength());
        error= 3;
        invalidating_ndb_table= FALSE;
987 988 989 990
      }
    }
    else
    {
991
      invalidating_ndb_table= FALSE;
992 993 994 995 996
    }
    my_free((char*)data, MYF(0));
    my_free((char*)pack_data, MYF(0));
  } while (invalidating_ndb_table);

997 998
  if (error)
    DBUG_RETURN(error);
999
  
1000
  m_table_version= tab->getObjectVersion();
1001 1002 1003 1004
  m_table= (void *)tab; 
  m_table_info= NULL; // Set in external lock
  
  DBUG_RETURN(build_index_list(ndb, table, ILBP_OPEN));
1005
}
1006

1007
static int fix_unique_index_attr_order(NDB_INDEX_DATA &data,
1008 1009
                                       const NDBINDEX *index,
                                       KEY *key_info)
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
{
  DBUG_ENTER("fix_unique_index_attr_order");
  unsigned sz= index->getNoOfIndexColumns();

  if (data.unique_index_attrid_map)
    my_free((char*)data.unique_index_attrid_map, MYF(0));
  data.unique_index_attrid_map= (unsigned char*)my_malloc(sz,MYF(MY_WME));

  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  DBUG_ASSERT(key_info->key_parts == sz);
  for (unsigned i= 0; key_part != end; key_part++, i++) 
  {
    const char *field_name= key_part->field->field_name;
#ifndef DBUG_OFF
   data.unique_index_attrid_map[i]= 255;
#endif
    for (unsigned j= 0; j < sz; j++)
    {
1029
      const NDBCOL *c= index->getColumn(j);
msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
1030
      if (strcmp(field_name, c->getName()) == 0)
1031
      {
1032 1033
        data.unique_index_attrid_map[i]= j;
        break;
1034 1035 1036 1037 1038 1039
      }
    }
    DBUG_ASSERT(data.unique_index_attrid_map[i] != 255);
  }
  DBUG_RETURN(0);
}
1040

1041 1042


1043
int ha_ndbcluster::build_index_list(Ndb *ndb, TABLE *tab, enum ILBP phase)
1044
{
1045
  uint i;
1046
  int error= 0;
1047
  const char *index_name;
1048
  char unique_index_name[FN_LEN];
1049
  static const char* unique_suffix= "$unique";
1050
  KEY* key_info= tab->key_info;
1051
  const char **key_name= tab->s->keynames.type_names;
1052
  NDBDICT *dict= ndb->getDictionary();
1053
  DBUG_ENTER("ha_ndbcluster::build_index_list");
1054
  
1055
  m_has_unique_index= FALSE;
1056
  // Save information about all known indexes
1057
  for (i= 0; i < tab->s->keys; i++, key_info++, key_name++)
1058
  {
1059
    index_name= *key_name;
1060
    NDB_INDEX_TYPE idx_type= get_index_type_from_table(i);
1061
    m_index[i].type= idx_type;
1062
    if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
1063
    {
1064
      m_has_unique_index= TRUE;
1065 1066
      strxnmov(unique_index_name, FN_LEN, index_name, unique_suffix, NullS);
      DBUG_PRINT("info", ("Created unique index name \'%s\' for index %d",
1067
                          unique_index_name, i));
1068
    }
1069 1070 1071
    // Create secondary indexes if in create phase
    if (phase == ILBP_CREATE)
    {
1072 1073
      DBUG_PRINT("info", ("Creating index %u: %s", i, index_name));      
      switch (idx_type){
1074
        
1075
      case PRIMARY_KEY_INDEX:
1076 1077
        // Do nothing, already created
        break;
1078
      case PRIMARY_KEY_ORDERED_INDEX:
1079 1080
        error= create_ordered_index(index_name, key_info);
        break;
1081
      case UNIQUE_ORDERED_INDEX:
1082 1083 1084
        if (!(error= create_ordered_index(index_name, key_info)))
          error= create_unique_index(unique_index_name, key_info);
        break;
1085
      case UNIQUE_INDEX:
1086 1087 1088
        if (!(error= check_index_fields_not_null(i)))
          error= create_unique_index(unique_index_name, key_info);
        break;
1089
      case ORDERED_INDEX:
1090 1091
        error= create_ordered_index(index_name, key_info);
        break;
1092
      default:
1093 1094
        DBUG_ASSERT(FALSE);
        break;
1095 1096 1097
      }
      if (error)
      {
1098 1099 1100
        DBUG_PRINT("error", ("Failed to create index %u", i));
        drop_table();
        break;
1101 1102 1103
      }
    }
    // Add handles to index objects
1104
    if (idx_type != PRIMARY_KEY_INDEX && idx_type != UNIQUE_INDEX)
1105
    {
1106
      DBUG_PRINT("info", ("Get handle to index %s", index_name));
1107
      const NDBINDEX *index= dict->getIndex(index_name, m_tabname);
1108
      if (!index) DBUG_RETURN(1);
mskold@mysql.com's avatar
mskold@mysql.com committed
1109
      m_index[i].index= (void *) index;
1110
    }
1111
    if (idx_type == UNIQUE_ORDERED_INDEX || idx_type == UNIQUE_INDEX)
1112
    {
1113 1114
      DBUG_PRINT("info", ("Get handle to unique_index %s", unique_index_name));
      const NDBINDEX *index= dict->getIndex(unique_index_name, m_tabname);
1115
      if (!index) DBUG_RETURN(1);
mskold@mysql.com's avatar
mskold@mysql.com committed
1116
      m_index[i].unique_index= (void *) index;
1117 1118
      error= fix_unique_index_attr_order(m_index[i], index, key_info);
    }
1119
  }
1120 1121
  
  DBUG_RETURN(error);
1122 1123
}

1124

1125 1126 1127 1128
/*
  Decode the type of an index from information 
  provided in table object
*/
1129
NDB_INDEX_TYPE ha_ndbcluster::get_index_type_from_table(uint inx) const
1130
{
1131
  bool is_hash_index=  (table->key_info[inx].algorithm == HA_KEY_ALG_HASH);
1132
  if (inx == table->s->primary_key)
1133
    return is_hash_index ? PRIMARY_KEY_INDEX : PRIMARY_KEY_ORDERED_INDEX;
1134 1135 1136 1137

  return ((table->key_info[inx].flags & HA_NOSAME) ? 
          (is_hash_index ? UNIQUE_INDEX : UNIQUE_ORDERED_INDEX) :
          ORDERED_INDEX);
1138
} 
1139

1140 1141 1142 1143 1144
int ha_ndbcluster::check_index_fields_not_null(uint inx)
{
  KEY* key_info= table->key_info + inx;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
1145
  DBUG_ENTER("ha_ndbcluster::check_index_fields_not_null");
1146 1147 1148 1149 1150 1151
  
  for (; key_part != end; key_part++) 
    {
      Field* field= key_part->field;
      if (field->maybe_null())
      {
1152 1153 1154
        my_printf_error(ER_NULL_COLUMN_IN_INDEX,ER(ER_NULL_COLUMN_IN_INDEX),
                        MYF(0),field->field_name);
        DBUG_RETURN(ER_NULL_COLUMN_IN_INDEX);
1155 1156 1157 1158 1159
      }
    }
  
  DBUG_RETURN(0);
}
1160 1161 1162

void ha_ndbcluster::release_metadata()
{
1163
  uint i;
1164

1165 1166 1167 1168
  DBUG_ENTER("release_metadata");
  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));

  m_table= NULL;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1169
  m_table_info= NULL;
1170

1171
  // Release index list 
1172 1173
  for (i= 0; i < MAX_KEY; i++)
  {
1174 1175
    m_index[i].unique_index= NULL;      
    m_index[i].index= NULL;      
1176 1177 1178 1179 1180
    if (m_index[i].unique_index_attrid_map)
    {
      my_free((char *)m_index[i].unique_index_attrid_map, MYF(0));
      m_index[i].unique_index_attrid_map= NULL;
    }
1181 1182
  }

1183 1184 1185
  DBUG_VOID_RETURN;
}

pekka@mysql.com's avatar
pekka@mysql.com committed
1186
int ha_ndbcluster::get_ndb_lock_type(enum thr_lock_type type)
1187
{
1188
  DBUG_ENTER("ha_ndbcluster::get_ndb_lock_type");
1189
  if (type >= TL_WRITE_ALLOW_WRITE)
1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
  {
    DBUG_PRINT("info", ("Using exclusive lock"));
    DBUG_RETURN(NdbOperation::LM_Exclusive);
  }
  else if (type ==  TL_READ_WITH_SHARED_LOCKS ||
	   uses_blob_value(m_retrieve_all_fields))
  {
    DBUG_PRINT("info", ("Using read lock"));
    DBUG_RETURN(NdbOperation::LM_Read);
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
1200
  else
1201 1202 1203 1204
  {
    DBUG_PRINT("info", ("Using committed read"));
    DBUG_RETURN(NdbOperation::LM_CommittedRead);
  }
1205 1206
}

1207 1208 1209 1210 1211 1212
static const ulong index_type_flags[]=
{
  /* UNDEFINED_INDEX */
  0,                         

  /* PRIMARY_KEY_INDEX */
1213
  HA_ONLY_WHOLE_INDEX, 
1214 1215

  /* PRIMARY_KEY_ORDERED_INDEX */
1216
  /* 
mskold@mysql.com's avatar
mskold@mysql.com committed
1217
     Enable HA_KEYREAD_ONLY when "sorted" indexes are supported, 
1218 1219 1220
     thus ORDERD BY clauses can be optimized by reading directly 
     through the index.
  */
mskold@mysql.com's avatar
mskold@mysql.com committed
1221
  // HA_KEYREAD_ONLY | 
1222
  HA_READ_NEXT |
1223
  HA_READ_PREV |
1224 1225
  HA_READ_RANGE |
  HA_READ_ORDER,
1226 1227

  /* UNIQUE_INDEX */
1228
  HA_ONLY_WHOLE_INDEX,
1229

1230
  /* UNIQUE_ORDERED_INDEX */
1231
  HA_READ_NEXT |
1232
  HA_READ_PREV |
1233 1234
  HA_READ_RANGE |
  HA_READ_ORDER,
1235

1236
  /* ORDERED_INDEX */
1237
  HA_READ_NEXT |
1238
  HA_READ_PREV |
1239 1240
  HA_READ_RANGE |
  HA_READ_ORDER
1241 1242 1243 1244 1245 1246 1247
};

static const int index_flags_size= sizeof(index_type_flags)/sizeof(ulong);

inline NDB_INDEX_TYPE ha_ndbcluster::get_index_type(uint idx_no) const
{
  DBUG_ASSERT(idx_no < MAX_KEY);
1248
  return m_index[idx_no].type;
1249 1250 1251 1252 1253 1254 1255 1256 1257 1258
}


/*
  Get the flags for an index

  RETURN
    flags depending on the type of the index.
*/

1259 1260
inline ulong ha_ndbcluster::index_flags(uint idx_no, uint part,
                                        bool all_parts) const 
1261
{ 
1262
  DBUG_ENTER("ha_ndbcluster::index_flags");
1263
  DBUG_PRINT("info", ("idx_no: %d", idx_no));
1264
  DBUG_ASSERT(get_index_type_from_table(idx_no) < index_flags_size);
1265 1266
  DBUG_RETURN(index_type_flags[get_index_type_from_table(idx_no)] | 
              HA_KEY_SCAN_NOT_ROR);
1267 1268
}

pekka@mysql.com's avatar
pekka@mysql.com committed
1269 1270
static void shrink_varchar(Field* field, const byte* & ptr, char* buf)
{
1271
  if (field->type() == MYSQL_TYPE_VARCHAR && ptr != NULL) {
pekka@mysql.com's avatar
pekka@mysql.com committed
1272
    Field_varstring* f= (Field_varstring*)field;
pekka@mysql.com's avatar
pekka@mysql.com committed
1273
    if (f->length_bytes == 1) {
pekka@mysql.com's avatar
pekka@mysql.com committed
1274 1275 1276 1277 1278
      uint pack_len= field->pack_length();
      DBUG_ASSERT(1 <= pack_len && pack_len <= 256);
      if (ptr[1] == 0) {
        buf[0]= ptr[0];
      } else {
1279
        DBUG_ASSERT(FALSE);
pekka@mysql.com's avatar
pekka@mysql.com committed
1280 1281 1282 1283 1284 1285 1286
        buf[0]= 255;
      }
      memmove(buf + 1, ptr + 2, pack_len - 1);
      ptr= buf;
    }
  }
}
1287 1288 1289

int ha_ndbcluster::set_primary_key(NdbOperation *op, const byte *key)
{
1290
  KEY* key_info= table->key_info + table->s->primary_key;
1291 1292 1293 1294 1295 1296 1297
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  DBUG_ENTER("set_primary_key");

  for (; key_part != end; key_part++) 
  {
    Field* field= key_part->field;
pekka@mysql.com's avatar
pekka@mysql.com committed
1298 1299 1300
    const byte* ptr= key;
    char buf[256];
    shrink_varchar(field, ptr, buf);
1301
    if (set_ndb_key(op, field, 
1302
                    key_part->fieldnr-1, ptr))
1303
      ERR_RETURN(op->getNdbError());
pekka@mysql.com's avatar
pekka@mysql.com committed
1304
    key += key_part->store_length;
1305 1306 1307 1308 1309
  }
  DBUG_RETURN(0);
}


1310
int ha_ndbcluster::set_primary_key_from_record(NdbOperation *op, const byte *record)
1311
{
1312
  KEY* key_info= table->key_info + table->s->primary_key;
1313 1314
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
1315
  DBUG_ENTER("set_primary_key_from_record");
1316 1317 1318 1319 1320

  for (; key_part != end; key_part++) 
  {
    Field* field= key_part->field;
    if (set_ndb_key(op, field, 
1321
		    key_part->fieldnr-1, record+key_part->offset))
1322 1323 1324 1325 1326
      ERR_RETURN(op->getNdbError());
  }
  DBUG_RETURN(0);
}

1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344
int ha_ndbcluster::set_index_key_from_record(NdbOperation *op, const byte *record, uint keyno)
{
  KEY* key_info= table->key_info + keyno;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  uint i;
  DBUG_ENTER("set_index_key_from_record");
                                                                                
  for (i= 0; key_part != end; key_part++, i++)
  {
    Field* field= key_part->field;
    if (set_ndb_key(op, field, m_index[keyno].unique_index_attrid_map[i],
                    record+key_part->offset))
      ERR_RETURN(m_active_trans->getNdbError());
  }
  DBUG_RETURN(0);
}

1345 1346
int 
ha_ndbcluster::set_index_key(NdbOperation *op, 
1347 1348
                             const KEY *key_info, 
                             const byte * key_ptr)
1349
{
1350
  DBUG_ENTER("ha_ndbcluster::set_index_key");
1351 1352 1353 1354 1355 1356
  uint i;
  KEY_PART_INFO* key_part= key_info->key_part;
  KEY_PART_INFO* end= key_part+key_info->key_parts;
  
  for (i= 0; key_part != end; key_part++, i++) 
  {
pekka@mysql.com's avatar
pekka@mysql.com committed
1357 1358 1359 1360
    Field* field= key_part->field;
    const byte* ptr= key_part->null_bit ? key_ptr + 1 : key_ptr;
    char buf[256];
    shrink_varchar(field, ptr, buf);
tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
1361
    if (set_ndb_key(op, field, m_index[active_index].unique_index_attrid_map[i], ptr))
1362 1363 1364 1365 1366
      ERR_RETURN(m_active_trans->getNdbError());
    key_ptr+= key_part->store_length;
  }
  DBUG_RETURN(0);
}
1367

1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380
inline 
int ha_ndbcluster::define_read_attrs(byte* buf, NdbOperation* op)
{
  uint i;
  THD *thd= current_thd;

  DBUG_ENTER("define_read_attrs");  

  // Define attributes to read
  for (i= 0; i < table->s->fields; i++) 
  {
    Field *field= table->field[i];
    if ((thd->query_id == field->query_id) ||
1381 1382
        ((field->flags & PRI_KEY_FLAG)) || 
        m_retrieve_all_fields)
1383 1384
    {      
      if (get_ndb_value(op, field, i, buf))
1385
        ERR_RETURN(op->getNdbError());
1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408
    } 
    else 
    {
      m_value[i].ptr= NULL;
    }
  }
    
  if (table->s->primary_key == MAX_KEY) 
  {
    DBUG_PRINT("info", ("Getting hidden key"));
    // Scanning table with no primary key
    int hidden_no= table->s->fields;      
#ifndef DBUG_OFF
    const NDBTAB *tab= (const NDBTAB *) m_table;    
    if (!tab->getColumn(hidden_no))
      DBUG_RETURN(1);
#endif
    if (get_ndb_value(op, NULL, hidden_no, NULL))
      ERR_RETURN(op->getNdbError());
  }
  DBUG_RETURN(0);
} 

1409 1410 1411 1412
/*
  Read one record from NDB using primary key
*/

1413
int ha_ndbcluster::pk_read(const byte *key, uint key_len, byte *buf) 
1414
{
1415
  uint no_fields= table->s->fields;
1416 1417
  NdbConnection *trans= m_active_trans;
  NdbOperation *op;
1418

1419 1420 1421 1422
  int res;
  DBUG_ENTER("pk_read");
  DBUG_PRINT("enter", ("key_len: %u", key_len));
  DBUG_DUMP("key", (char*)key, key_len);
1423

1424 1425
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
joreland@mysql.com's avatar
joreland@mysql.com committed
1426
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || 
1427
      op->readTuple(lm) != 0)
1428
    ERR_RETURN(trans->getNdbError());
1429
  
1430
  if (table->s->primary_key == MAX_KEY) 
1431 1432 1433 1434 1435
  {
    // This table has no primary key, use "hidden" primary key
    DBUG_PRINT("info", ("Using hidden key"));
    DBUG_DUMP("key", (char*)key, 8);    
    if (set_hidden_key(op, no_fields, key))
1436
      ERR_RETURN(trans->getNdbError());
1437
    
1438
    // Read key at the same time, for future reference
1439
    if (get_ndb_value(op, NULL, no_fields, NULL))
1440
      ERR_RETURN(trans->getNdbError());
1441 1442 1443 1444 1445 1446 1447
  } 
  else 
  {
    if ((res= set_primary_key(op, key)))
      return res;
  }
  
1448
  if ((res= define_read_attrs(buf, op)))
1449
    DBUG_RETURN(res);
1450
  
1451
  if (execute_no_commit_ie(this,trans,false) != 0) 
1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }

  // The value have now been fetched from NDB  
  unpack_record(buf);
  table->status= 0;     
  DBUG_RETURN(0);
}

1463 1464 1465 1466 1467 1468
/*
  Read one complementing record from NDB using primary key from old_data
*/

int ha_ndbcluster::complemented_pk_read(const byte *old_data, byte *new_data)
{
1469
  uint no_fields= table->s->fields, i;
1470
  NdbTransaction *trans= m_active_trans;
1471 1472 1473 1474
  NdbOperation *op;
  THD *thd= current_thd;
  DBUG_ENTER("complemented_pk_read");

1475
  if (m_retrieve_all_fields)
1476 1477 1478
    // We have allready retrieved all fields, nothing to complement
    DBUG_RETURN(0);

1479 1480
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
joreland@mysql.com's avatar
joreland@mysql.com committed
1481
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) || 
1482
      op->readTuple(lm) != 0)
1483
    ERR_RETURN(trans->getNdbError());
1484
  int res;
mskold@mysql.com's avatar
mskold@mysql.com committed
1485
  if ((res= set_primary_key_from_record(op, old_data)))
1486
    ERR_RETURN(trans->getNdbError());
1487 1488 1489 1490
  // Read all unreferenced non-key field(s)
  for (i= 0; i < no_fields; i++) 
  {
    Field *field= table->field[i];
1491
    if (!((field->flags & PRI_KEY_FLAG) ||
1492
          (thd->query_id == field->query_id)))
1493
    {
1494
      if (get_ndb_value(op, field, i, new_data))
1495
        ERR_RETURN(trans->getNdbError());
1496 1497
    }
  }
1498
  if (execute_no_commit(this,trans,false) != 0) 
1499 1500 1501 1502 1503 1504 1505 1506
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }

  // The value have now been fetched from NDB  
  unpack_record(new_data);
  table->status= 0;     
1507 1508 1509 1510 1511 1512 1513 1514

  /**
   * restore m_value
   */
  for (i= 0; i < no_fields; i++) 
  {
    Field *field= table->field[i];
    if (!((field->flags & PRI_KEY_FLAG) ||
1515
          (thd->query_id == field->query_id)))
1516 1517 1518 1519 1520
    {
      m_value[i].ptr= NULL;
    }
  }
  
1521 1522 1523
  DBUG_RETURN(0);
}

1524
/*
1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583
 * Check that all operations between first and last all
 * have gotten the errcode
 * If checking for HA_ERR_KEY_NOT_FOUND then update m_dupkey
 * for all succeeding operations
 */
bool ha_ndbcluster::check_all_operations_for_error(NdbTransaction *trans,
                                                   const NdbOperation *first,
                                                   const NdbOperation *last,
                                                   uint errcode)
{
  const NdbOperation *op= first;
  DBUG_ENTER("ha_ndbcluster::check_all_operations_for_error");

  while(op)
  {
    NdbError err= op->getNdbError();
    if (err.status != NdbError::Success)
    {
      if (ndb_to_mysql_error(&err) != (int) errcode)
        DBUG_RETURN(false);
      if (op == last) break;
      op= trans->getNextCompletedOperation(op);
    }
    else
    {
      // We found a duplicate
      if (op->getType() == NdbOperation::UniqueIndexAccess)
      {
        if (errcode == HA_ERR_KEY_NOT_FOUND)
        {
          NdbIndexOperation *iop= (NdbIndexOperation *) op;
          const NDBINDEX *index= iop->getIndex();
          // Find the key_no of the index
          for(uint i= 0; i<table->s->keys; i++)
          {
            if (m_index[i].unique_index == index)
            {
              m_dupkey= i;
              break;
            }
          }
        }
      }
      else
      {
        // Must have been primary key access
        DBUG_ASSERT(op->getType() == NdbOperation::PrimaryKeyAccess);
        if (errcode == HA_ERR_KEY_NOT_FOUND)
          m_dupkey= table->s->primary_key;
      }
      DBUG_RETURN(false);      
    }
  }
  DBUG_RETURN(true);
}

/*
 * Peek to check if any rows already exist with conflicting
 * primary key or unique index values
1584 1585
*/

1586
int ha_ndbcluster::peek_indexed_rows(const byte *record)
1587
{
1588
  NdbTransaction *trans= m_active_trans;
1589
  NdbOperation *op;
1590 1591 1592 1593
  const NdbOperation *first, *last;
  uint i;
  int res;
  DBUG_ENTER("peek_indexed_rows");
1594

1595
  NdbOperation::LockMode lm= NdbOperation::LM_Read;
1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628
  first= NULL;
  if (table->s->primary_key != MAX_KEY)
  {
    /*
     * Fetch any row with colliding primary key
     */
    if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) ||
        op->readTuple(lm) != 0)
      ERR_RETURN(trans->getNdbError());
    
    first= op;
    if ((res= set_primary_key_from_record(op, record)))
      ERR_RETURN(trans->getNdbError());
  }
  /*
   * Fetch any rows with colliding unique indexes
   */
  KEY* key_info;
  KEY_PART_INFO *key_part, *end;
  for (i= 0, key_info= table->key_info; i < table->s->keys; i++, key_info++)
  {
    if (i != table->s->primary_key &&
        key_info->flags & HA_NOSAME)
    {
      // A unique index is defined on table
      NdbIndexOperation *iop;
      NDBINDEX *unique_index = (NDBINDEX *) m_index[i].unique_index;
      key_part= key_info->key_part;
      end= key_part + key_info->key_parts;
      if (!(iop= trans->getNdbIndexOperation(unique_index,
                                             (const NDBTAB *) m_table)) ||
          iop->readTuple(lm) != 0)
        ERR_RETURN(trans->getNdbError());
1629

1630 1631 1632 1633 1634 1635 1636 1637
      if (!first)
        first= iop;
      if ((res= set_index_key_from_record(iop, record, i)))
        ERR_RETURN(trans->getNdbError());
    }
  }
  last= trans->getLastDefinedOperation();
  if (first)
1638
    res= execute_no_commit_ie(this,trans,false);
1639 1640 1641 1642 1643 1644 1645 1646
  else
  {
    // Table has no keys
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
  }
  if (check_all_operations_for_error(trans, first, last, 
                                     HA_ERR_KEY_NOT_FOUND))
1647 1648 1649 1650
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  } 
1651 1652 1653 1654
  else
  {
    DBUG_PRINT("info", ("m_dupkey %d", m_dupkey));
  }
1655 1656
  DBUG_RETURN(0);
}
1657

1658 1659 1660 1661 1662
/*
  Read one record from NDB using unique secondary index
*/

int ha_ndbcluster::unique_index_read(const byte *key,
1663
                                     uint key_len, byte *buf)
1664
{
1665
  int res;
1666
  NdbTransaction *trans= m_active_trans;
1667
  NdbIndexOperation *op;
1668
  DBUG_ENTER("ha_ndbcluster::unique_index_read");
1669 1670 1671
  DBUG_PRINT("enter", ("key_len: %u, index: %u", key_len, active_index));
  DBUG_DUMP("key", (char*)key, key_len);
  
1672 1673
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
1674
  if (!(op= trans->getNdbIndexOperation((NDBINDEX *) 
1675
                                        m_index[active_index].unique_index, 
joreland@mysql.com's avatar
joreland@mysql.com committed
1676
                                        (const NDBTAB *) m_table)) ||
1677
      op->readTuple(lm) != 0)
1678 1679 1680
    ERR_RETURN(trans->getNdbError());
  
  // Set secondary index key(s)
1681
  if ((res= set_index_key(op, table->key_info + active_index, key)))
1682 1683
    DBUG_RETURN(res);
  
1684
  if ((res= define_read_attrs(buf, op)))
1685
    DBUG_RETURN(res);
1686

1687
  if (execute_no_commit_ie(this,trans,false) != 0) 
1688 1689 1690 1691 1692 1693 1694 1695 1696 1697
  {
    table->status= STATUS_NOT_FOUND;
    DBUG_RETURN(ndb_err(trans));
  }
  // The value have now been fetched from NDB
  unpack_record(buf);
  table->status= 0;
  DBUG_RETURN(0);
}

1698
inline int ha_ndbcluster::fetch_next(NdbScanOperation* cursor)
1699 1700
{
  DBUG_ENTER("fetch_next");
1701
  int check;
1702
  NdbTransaction *trans= m_active_trans;
1703
  
1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727
    if (m_lock_tuple)
  {
    /*
      Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
      (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
      LOCK WITH SHARE MODE) and row was not explictly unlocked 
      with unlock_row() call
    */
      NdbConnection *trans= m_active_trans;
      NdbOperation *op;
      // Lock row
      DBUG_PRINT("info", ("Keeping lock on scanned row"));
      
      if (!(op= m_active_cursor->lockCurrentTuple()))
      {
	m_lock_tuple= false;
	ERR_RETURN(trans->getNdbError());
      }
      m_ops_pending++;
  }
  m_lock_tuple= false;

  bool contact_ndb= m_lock.type < TL_WRITE_ALLOW_WRITE &&
                    m_lock.type != TL_READ_WITH_SHARED_LOCKS;
1728 1729
  do {
    DBUG_PRINT("info", ("Call nextResult, contact_ndb: %d", contact_ndb));
pekka@mysql.com's avatar
pekka@mysql.com committed
1730 1731 1732
    /*
      We can only handle one tuple with blobs at a time.
    */
1733
    if (m_ops_pending && m_blobs_pending)
pekka@mysql.com's avatar
pekka@mysql.com committed
1734
    {
1735
      if (execute_no_commit(this,trans,false) != 0)
1736
        DBUG_RETURN(ndb_err(trans));
1737 1738
      m_ops_pending= 0;
      m_blobs_pending= FALSE;
pekka@mysql.com's avatar
pekka@mysql.com committed
1739
    }
1740 1741
    
    if ((check= cursor->nextResult(contact_ndb, m_force_send)) == 0)
1742
    {
1743 1744 1745 1746 1747 1748 1749
      /*
	Explicitly lock tuple if "select for update" or
	"select lock in share mode"
      */
      m_lock_tuple= (m_lock.type == TL_WRITE_ALLOW_WRITE
		     || 
		     m_lock.type == TL_READ_WITH_SHARED_LOCKS);
1750 1751 1752 1753 1754 1755
      DBUG_RETURN(0);
    } 
    else if (check == 1 || check == 2)
    {
      // 1: No more records
      // 2: No more cached records
1756
      
1757
      /*
1758 1759 1760
        Before fetching more rows and releasing lock(s),
        all pending update or delete operations should 
        be sent to NDB
1761
      */
1762 1763
      DBUG_PRINT("info", ("ops_pending: %d", m_ops_pending));    
      if (m_ops_pending)
1764
      {
1765 1766
        if (m_transaction_on)
        {
1767
          if (execute_no_commit(this,trans,false) != 0)
1768 1769 1770 1771 1772 1773
            DBUG_RETURN(-1);
        }
        else
        {
          if  (execute_commit(this,trans) != 0)
            DBUG_RETURN(-1);
1774
          if (trans->restart() != 0)
1775 1776 1777 1778 1779 1780
          {
            DBUG_ASSERT(0);
            DBUG_RETURN(-1);
          }
        }
        m_ops_pending= 0;
1781
      }
1782 1783
      contact_ndb= (check == 2);
    }
1784 1785 1786 1787
    else
    {
      DBUG_RETURN(-1);
    }
1788
  } while (check == 2);
1789

1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800
  DBUG_RETURN(1);
}

/*
  Get the next record of a started scan. Try to fetch
  it locally from NdbApi cached records if possible, 
  otherwise ask NDB for more.

  NOTE
  If this is a update/delete make sure to not contact 
  NDB before any pending ops have been sent to NDB.
1801

1802 1803 1804 1805 1806 1807 1808
*/

inline int ha_ndbcluster::next_result(byte *buf)
{  
  int res;
  DBUG_ENTER("next_result");
    
1809 1810 1811
  if (!m_active_cursor)
    DBUG_RETURN(HA_ERR_END_OF_FILE);
  
1812
  if ((res= fetch_next(m_active_cursor)) == 0)
1813 1814 1815 1816 1817 1818 1819
  {
    DBUG_PRINT("info", ("One more record found"));    
    
    unpack_record(buf);
    table->status= 0;
    DBUG_RETURN(0);
  }
1820
  else if (res == 1)
1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831
  {
    // No more records
    table->status= STATUS_NOT_FOUND;
    
    DBUG_PRINT("info", ("No more records"));
    DBUG_RETURN(HA_ERR_END_OF_FILE);
  }
  else
  {
    DBUG_RETURN(ndb_err(m_active_trans));
  }
1832 1833
}

1834
/*
1835
  Set bounds for ordered index scan.
1836 1837
*/

joreland@mysql.com's avatar
joreland@mysql.com committed
1838
int ha_ndbcluster::set_bounds(NdbIndexScanOperation *op,
1839 1840
                              const key_range *keys[2],
                              uint range_no)
1841
{
1842 1843 1844 1845
  const KEY *const key_info= table->key_info + active_index;
  const uint key_parts= key_info->key_parts;
  uint key_tot_len[2];
  uint tot_len;
1846
  uint i, j;
1847 1848

  DBUG_ENTER("set_bounds");
1849
  DBUG_PRINT("info", ("key_parts=%d", key_parts));
1850

1851
  for (j= 0; j <= 1; j++)
1852
  {
1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865
    const key_range *key= keys[j];
    if (key != NULL)
    {
      // for key->flag see ha_rkey_function
      DBUG_PRINT("info", ("key %d length=%d flag=%d",
                          j, key->length, key->flag));
      key_tot_len[j]= key->length;
    }
    else
    {
      DBUG_PRINT("info", ("key %d not present", j));
      key_tot_len[j]= 0;
    }
1866 1867
  }
  tot_len= 0;
1868

1869 1870 1871 1872
  for (i= 0; i < key_parts; i++)
  {
    KEY_PART_INFO *key_part= &key_info->key_part[i];
    Field *field= key_part->field;
1873
#ifndef DBUG_OFF
1874
    uint part_len= key_part->length;
1875
#endif
1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889
    uint part_store_len= key_part->store_length;
    // Info about each key part
    struct part_st {
      bool part_last;
      const key_range *key;
      const byte *part_ptr;
      bool part_null;
      int bound_type;
      const char* bound_ptr;
    };
    struct part_st part[2];

    for (j= 0; j <= 1; j++)
    {
1890
      struct part_st &p= part[j];
1891 1892 1893 1894 1895 1896 1897
      p.key= NULL;
      p.bound_type= -1;
      if (tot_len < key_tot_len[j])
      {
        p.part_last= (tot_len + part_store_len >= key_tot_len[j]);
        p.key= keys[j];
        p.part_ptr= &p.key->key[tot_len];
joreland@mysql.com's avatar
joreland@mysql.com committed
1898
        p.part_null= key_part->null_bit && *p.part_ptr;
1899
        p.bound_ptr= (const char *)
joreland@mysql.com's avatar
joreland@mysql.com committed
1900
          p.part_null ? 0 : key_part->null_bit ? p.part_ptr + 1 : p.part_ptr;
1901 1902 1903 1904 1905 1906 1907 1908

        if (j == 0)
        {
          switch (p.key->flag)
          {
            case HA_READ_KEY_EXACT:
              p.bound_type= NdbIndexScanOperation::BoundEQ;
              break;
1909
            // ascending
1910 1911 1912 1913 1914 1915 1916 1917 1918
            case HA_READ_KEY_OR_NEXT:
              p.bound_type= NdbIndexScanOperation::BoundLE;
              break;
            case HA_READ_AFTER_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundLE;
              else
                p.bound_type= NdbIndexScanOperation::BoundLT;
              break;
1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931
            // descending
            case HA_READ_PREFIX_LAST:           // weird
              p.bound_type= NdbIndexScanOperation::BoundEQ;
              break;
            case HA_READ_PREFIX_LAST_OR_PREV:   // weird
              p.bound_type= NdbIndexScanOperation::BoundGE;
              break;
            case HA_READ_BEFORE_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundGE;
              else
                p.bound_type= NdbIndexScanOperation::BoundGT;
              break;
1932 1933 1934 1935 1936 1937 1938
            default:
              break;
          }
        }
        if (j == 1) {
          switch (p.key->flag)
          {
1939
            // ascending
1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950
            case HA_READ_BEFORE_KEY:
              if (! p.part_last)
                p.bound_type= NdbIndexScanOperation::BoundGE;
              else
                p.bound_type= NdbIndexScanOperation::BoundGT;
              break;
            case HA_READ_AFTER_KEY:     // weird
              p.bound_type= NdbIndexScanOperation::BoundGE;
              break;
            default:
              break;
1951
            // descending strangely sets no end key
1952 1953
          }
        }
1954

1955 1956 1957
        if (p.bound_type == -1)
        {
          DBUG_PRINT("error", ("key %d unknown flag %d", j, p.key->flag));
1958
          DBUG_ASSERT(FALSE);
1959
          // Stop setting bounds but continue with what we have
1960
          op->end_of_bound(range_no);
1961 1962 1963 1964
          DBUG_RETURN(0);
        }
      }
    }
1965

1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982
    // Seen with e.g. b = 1 and c > 1
    if (part[0].bound_type == NdbIndexScanOperation::BoundLE &&
        part[1].bound_type == NdbIndexScanOperation::BoundGE &&
        memcmp(part[0].part_ptr, part[1].part_ptr, part_store_len) == 0)
    {
      DBUG_PRINT("info", ("replace LE/GE pair by EQ"));
      part[0].bound_type= NdbIndexScanOperation::BoundEQ;
      part[1].bound_type= -1;
    }
    // Not seen but was in previous version
    if (part[0].bound_type == NdbIndexScanOperation::BoundEQ &&
        part[1].bound_type == NdbIndexScanOperation::BoundGE &&
        memcmp(part[0].part_ptr, part[1].part_ptr, part_store_len) == 0)
    {
      DBUG_PRINT("info", ("remove GE from EQ/GE pair"));
      part[1].bound_type= -1;
    }
1983

1984 1985
    for (j= 0; j <= 1; j++)
    {
1986
      struct part_st &p= part[j];
1987 1988 1989 1990 1991 1992 1993 1994 1995
      // Set bound if not done with this key
      if (p.key != NULL)
      {
        DBUG_PRINT("info", ("key %d:%d offset=%d length=%d last=%d bound=%d",
                            j, i, tot_len, part_len, p.part_last, p.bound_type));
        DBUG_DUMP("info", (const char*)p.part_ptr, part_store_len);

        // Set bound if not cancelled via type -1
        if (p.bound_type != -1)
1996
        {
pekka@mysql.com's avatar
pekka@mysql.com committed
1997 1998 1999
          const char* ptr= p.bound_ptr;
          char buf[256];
          shrink_varchar(field, ptr, buf);
tomas@poseidon.ndb.mysql.com's avatar
Merge  
tomas@poseidon.ndb.mysql.com committed
2000
          if (op->setBound(i, p.bound_type, ptr))
2001
            ERR_RETURN(op->getNdbError());
2002
        }
2003 2004 2005 2006
      }
    }

    tot_len+= part_store_len;
2007
  }
2008
  op->end_of_bound(range_no);
2009 2010 2011
  DBUG_RETURN(0);
}

2012
/*
2013
  Start ordered index scan in NDB
2014 2015
*/

2016
int ha_ndbcluster::ordered_index_scan(const key_range *start_key,
2017 2018
                                      const key_range *end_key,
                                      bool sorted, bool descending, byte* buf)
2019
{  
2020
  int res;
joreland@mysql.com's avatar
joreland@mysql.com committed
2021
  bool restart;
2022
  NdbTransaction *trans= m_active_trans;
joreland@mysql.com's avatar
joreland@mysql.com committed
2023
  NdbIndexScanOperation *op;
2024

2025 2026 2027
  DBUG_ENTER("ha_ndbcluster::ordered_index_scan");
  DBUG_PRINT("enter", ("index: %u, sorted: %d, descending: %d",
             active_index, sorted, descending));  
2028
  DBUG_PRINT("enter", ("Starting new ordered scan on %s", m_tabname));
pekka@mysql.com's avatar
pekka@mysql.com committed
2029

2030 2031
  // Check that sorted seems to be initialised
  DBUG_ASSERT(sorted == 0 || sorted == 1);
2032
  
2033
  if (m_active_cursor == 0)
joreland@mysql.com's avatar
joreland@mysql.com committed
2034
  {
2035
    restart= FALSE;
joreland@mysql.com's avatar
joreland@mysql.com committed
2036 2037
    NdbOperation::LockMode lm=
      (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
2038
    bool need_pk = (lm == NdbOperation::LM_Read);
joreland@mysql.com's avatar
joreland@mysql.com committed
2039
    if (!(op= trans->getNdbIndexScanOperation((NDBINDEX *)
2040 2041
                                              m_index[active_index].index, 
                                              (const NDBTAB *) m_table)) ||
2042
        op->readTuples(lm, 0, parallelism, sorted, descending, false, need_pk))
joreland@mysql.com's avatar
joreland@mysql.com committed
2043
      ERR_RETURN(trans->getNdbError());
2044
    m_active_cursor= op;
joreland@mysql.com's avatar
joreland@mysql.com committed
2045
  } else {
2046
    restart= TRUE;
2047
    op= (NdbIndexScanOperation*)m_active_cursor;
joreland@mysql.com's avatar
joreland@mysql.com committed
2048 2049 2050
    
    DBUG_ASSERT(op->getSorted() == sorted);
    DBUG_ASSERT(op->getLockMode() == 
2051
                (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
2052
    if (op->reset_bounds(m_force_send))
joreland@mysql.com's avatar
joreland@mysql.com committed
2053 2054
      DBUG_RETURN(ndb_err(m_active_trans));
  }
2055
  
2056
  {
2057
    const key_range *keys[2]= { start_key, end_key };
2058 2059 2060
    res= set_bounds(op, keys);
    if (res)
      DBUG_RETURN(res);
2061
  }
2062 2063 2064

  if (!restart && generate_scan_filter(m_cond_stack, op))
    DBUG_RETURN(ndb_err(trans));
2065
  
2066
  if (!restart && (res= define_read_attrs(buf, op)))
2067
  {
2068
    DBUG_RETURN(res);
joreland@mysql.com's avatar
joreland@mysql.com committed
2069
  }
2070

2071
  if (execute_no_commit(this,trans,false) != 0)
2072 2073 2074 2075
    DBUG_RETURN(ndb_err(trans));
  
  DBUG_RETURN(next_result(buf));
}
2076 2077

/*
2078
  Start full table scan in NDB
2079 2080 2081 2082
 */

int ha_ndbcluster::full_table_scan(byte *buf)
{
2083
  int res;
2084
  NdbScanOperation *op;
2085
  NdbTransaction *trans= m_active_trans;
2086 2087 2088 2089

  DBUG_ENTER("full_table_scan");  
  DBUG_PRINT("enter", ("Starting new scan on %s", m_tabname));

2090 2091
  NdbOperation::LockMode lm=
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
2092
  bool need_pk = (lm == NdbOperation::LM_Read);
2093
  if (!(op=trans->getNdbScanOperation((const NDBTAB *) m_table)) ||
2094 2095 2096
      op->readTuples(lm, 
		     (need_pk)?NdbScanOperation::SF_KeyInfo:0, 
		     parallelism))
2097
    ERR_RETURN(trans->getNdbError());
2098
  m_active_cursor= op;
2099 2100
  if (generate_scan_filter(m_cond_stack, op))
    DBUG_RETURN(ndb_err(trans));
2101
  if ((res= define_read_attrs(buf, op)))
2102 2103
    DBUG_RETURN(res);

2104
  if (execute_no_commit(this,trans,false) != 0)
2105 2106 2107
    DBUG_RETURN(ndb_err(trans));
  DBUG_PRINT("exit", ("Scan started successfully"));
  DBUG_RETURN(next_result(buf));
2108 2109
}

2110 2111 2112 2113 2114
/*
  Insert one record into NDB
*/
int ha_ndbcluster::write_row(byte *record)
{
mskold@mysql.com's avatar
mskold@mysql.com committed
2115
  bool has_auto_increment;
2116
  uint i;
2117
  NdbTransaction *trans= m_active_trans;
2118 2119
  NdbOperation *op;
  int res;
2120 2121
  THD *thd= current_thd;

2122
  DBUG_ENTER("write_row");
2123

2124 2125
  has_auto_increment= (table->next_number_field && record == table->record[0]);
  if (table->s->primary_key != MAX_KEY)
2126
  {
2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144
    /*
     * Increase any auto_incremented primary key
     */
    if (has_auto_increment) 
    {
      THD *thd= table->in_use;
      
      m_skip_auto_increment= FALSE;
      update_auto_increment();
      /* Ensure that handler is always called for auto_increment values */
      thd->next_insert_id= 0;
      m_skip_auto_increment= !auto_increment_column_changed;
    }
  }
  
  /*
   * If IGNORE the ignore constraint violations on primary and unique keys
   */
2145
  if (!m_use_write && m_ignore_dup_key)
2146
  {
2147 2148 2149 2150 2151
    /*
      compare if expression with that in start_bulk_insert()
      start_bulk_insert will set parameters to ensure that each
      write_row is committed individually
    */
2152
    int peek_res= peek_indexed_rows(record);
2153 2154 2155 2156 2157 2158 2159
    
    if (!peek_res) 
    {
      DBUG_RETURN(HA_ERR_FOUND_DUPP_KEY);
    }
    if (peek_res != HA_ERR_KEY_NOT_FOUND)
      DBUG_RETURN(peek_res);
2160
  }
2161

2162
  statistic_increment(thd->status_var.ha_write_count, &LOCK_status);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2163 2164
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
    table->timestamp_field->set_time();
2165

joreland@mysql.com's avatar
joreland@mysql.com committed
2166
  if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)))
2167 2168 2169 2170 2171 2172
    ERR_RETURN(trans->getNdbError());

  res= (m_use_write) ? op->writeTuple() :op->insertTuple(); 
  if (res != 0)
    ERR_RETURN(trans->getNdbError());  
 
2173
  if (table->s->primary_key == MAX_KEY) 
2174 2175
  {
    // Table has hidden primary key
2176
    Ndb *ndb= get_ndb();
2177 2178
    int ret;
    Uint64 auto_value;
2179 2180
    uint retries= NDB_AUTO_INCREMENT_RETRIES;
    do {
2181 2182
      ret= ndb->getAutoIncrementValue((const NDBTAB *) m_table, auto_value, 1);
    } while (ret == -1 && 
2183 2184
             --retries &&
             ndb->getNdbError().status == NdbError::TemporaryError);
2185
    if (ret == -1)
2186
      ERR_RETURN(ndb->getNdbError());
2187
    if (set_hidden_key(op, table->s->fields, (const byte*)&auto_value))
2188 2189 2190 2191 2192
      ERR_RETURN(op->getNdbError());
  } 
  else 
  {
    int res;
2193

2194
    if ((res= set_primary_key_from_record(op, record)))
2195
      return res;  
2196 2197 2198
  }

  // Set non-key attribute(s)
2199
  bool set_blob_value= FALSE;
2200
  for (i= 0; i < table->s->fields; i++) 
2201 2202 2203
  {
    Field *field= table->field[i];
    if (!(field->flags & PRI_KEY_FLAG) &&
2204
        set_ndb_value(op, field, i, &set_blob_value))
2205
    {
2206
      m_skip_auto_increment= TRUE;
2207
      ERR_RETURN(op->getNdbError());
2208
    }
2209 2210
  }

2211 2212
  m_rows_changed++;

2213 2214 2215 2216 2217 2218 2219
  /*
    Execute write operation
    NOTE When doing inserts with many values in 
    each INSERT statement it should not be necessary
    to NoCommit the transaction between each row.
    Find out how this is detected!
  */
2220
  m_rows_inserted++;
2221
  no_uncommitted_rows_update(1);
2222
  m_bulk_insert_not_flushed= TRUE;
2223
  if ((m_rows_to_insert == (ha_rows) 1) || 
2224
      ((m_rows_inserted % m_bulk_insert_rows) == 0) ||
2225
      m_primary_key_update ||
2226
      set_blob_value)
2227 2228 2229
  {
    // Send rows to NDB
    DBUG_PRINT("info", ("Sending inserts to NDB, "\
2230 2231
                        "rows_inserted:%d, bulk_insert_rows: %d", 
                        (int)m_rows_inserted, (int)m_bulk_insert_rows));
2232

2233
    m_bulk_insert_not_flushed= FALSE;
2234
    if (m_transaction_on)
2235
    {
2236
      if (execute_no_commit(this,trans,false) != 0)
2237
      {
2238 2239 2240
        m_skip_auto_increment= TRUE;
        no_uncommitted_rows_execute_failure();
        DBUG_RETURN(ndb_err(trans));
2241
      }
2242 2243
    }
    else
2244
    {
2245
      if (execute_commit(this,trans) != 0)
2246
      {
2247 2248 2249
        m_skip_auto_increment= TRUE;
        no_uncommitted_rows_execute_failure();
        DBUG_RETURN(ndb_err(trans));
2250
      }
2251
      if (trans->restart() != 0)
2252
      {
2253 2254
        DBUG_ASSERT(0);
        DBUG_RETURN(-1);
2255
      }
2256
    }
2257
  }
2258
  if ((has_auto_increment) && (m_skip_auto_increment))
mskold@mysql.com's avatar
mskold@mysql.com committed
2259
  {
2260
    Ndb *ndb= get_ndb();
2261
    Uint64 next_val= (Uint64) table->next_number_field->val_int() + 1;
2262
    char buff[22];
mskold@mysql.com's avatar
mskold@mysql.com committed
2263
    DBUG_PRINT("info", 
2264 2265
               ("Trying to set next auto increment value to %s",
                llstr(next_val, buff)));
2266
    if (ndb->setAutoIncrementValue((const NDBTAB *) m_table, next_val, TRUE)
2267
        == -1)
2268
      ERR_RETURN(ndb->getNdbError());
2269
  }
2270
  m_skip_auto_increment= TRUE;
2271

2272 2273 2274 2275 2276 2277 2278
  DBUG_RETURN(0);
}


/* Compare if a key in a row has changed */

int ha_ndbcluster::key_cmp(uint keynr, const byte * old_row,
2279
                           const byte * new_row)
2280 2281 2282 2283 2284 2285 2286 2287 2288
{
  KEY_PART_INFO *key_part=table->key_info[keynr].key_part;
  KEY_PART_INFO *end=key_part+table->key_info[keynr].key_parts;

  for (; key_part != end ; key_part++)
  {
    if (key_part->null_bit)
    {
      if ((old_row[key_part->null_offset] & key_part->null_bit) !=
2289 2290
          (new_row[key_part->null_offset] & key_part->null_bit))
        return 1;
2291
    }
2292
    if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
2293 2294 2295
    {

      if (key_part->field->cmp_binary((char*) (old_row + key_part->offset),
2296 2297 2298
                                      (char*) (new_row + key_part->offset),
                                      (ulong) key_part->length))
        return 1;
2299 2300 2301 2302
    }
    else
    {
      if (memcmp(old_row+key_part->offset, new_row+key_part->offset,
2303 2304
                 key_part->length))
        return 1;
2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316
    }
  }
  return 0;
}

/*
  Update one record in NDB using primary key
*/

int ha_ndbcluster::update_row(const byte *old_data, byte *new_data)
{
  THD *thd= current_thd;
2317
  NdbTransaction *trans= m_active_trans;
2318
  NdbScanOperation* cursor= m_active_cursor;
2319 2320 2321 2322
  NdbOperation *op;
  uint i;
  DBUG_ENTER("update_row");
  
2323
  statistic_increment(thd->status_var.ha_update_count, &LOCK_status);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2324
  if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
2325
  {
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2326
    table->timestamp_field->set_time();
2327 2328 2329
    // Set query_id so that field is really updated
    table->timestamp_field->query_id= thd->query_id;
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2330

2331
  /* Check for update of primary key for special handling */  
2332 2333
  if ((table->s->primary_key != MAX_KEY) &&
      (key_cmp(table->s->primary_key, old_data, new_data)))
2334
  {
2335
    int read_res, insert_res, delete_res, undo_res;
2336

2337
    DBUG_PRINT("info", ("primary key update, doing pk read+delete+insert"));
2338
    // Get all old fields, since we optimize away fields not in query
2339
    read_res= complemented_pk_read(old_data, new_data);
2340 2341 2342 2343 2344
    if (read_res)
    {
      DBUG_PRINT("info", ("pk read failed"));
      DBUG_RETURN(read_res);
    }
2345
    // Delete old row
2346
    m_primary_key_update= TRUE;
2347
    delete_res= delete_row(old_data);
2348
    m_primary_key_update= FALSE;
2349 2350 2351
    if (delete_res)
    {
      DBUG_PRINT("info", ("delete failed"));
2352
      DBUG_RETURN(delete_res);
2353
    }     
2354 2355
    // Insert new row
    DBUG_PRINT("info", ("delete succeded"));
2356
    m_primary_key_update= TRUE;
2357
    insert_res= write_row(new_data);
2358
    m_primary_key_update= FALSE;
2359 2360 2361 2362 2363
    if (insert_res)
    {
      DBUG_PRINT("info", ("insert failed"));
      if (trans->commitStatus() == NdbConnection::Started)
      {
2364
        // Undo delete_row(old_data)
2365
        m_primary_key_update= TRUE;
2366 2367 2368 2369 2370 2371
        undo_res= write_row((byte *)old_data);
        if (undo_res)
          push_warning(current_thd, 
                       MYSQL_ERROR::WARN_LEVEL_WARN, 
                       undo_res, 
                       "NDB failed undoing delete at primary key update");
2372 2373 2374 2375 2376
        m_primary_key_update= FALSE;
      }
      DBUG_RETURN(insert_res);
    }
    DBUG_PRINT("info", ("delete+insert succeeded"));
2377
    DBUG_RETURN(0);
2378
  }
2379

2380
  if (cursor)
2381
  {
2382 2383 2384 2385 2386 2387 2388 2389
    /*
      We are scanning records and want to update the record
      that was just found, call updateTuple on the cursor 
      to take over the lock to a new update operation
      And thus setting the primary key of the record from 
      the active record in cursor
    */
    DBUG_PRINT("info", ("Calling updateTuple on cursor"));
2390
    if (!(op= cursor->updateCurrentTuple()))
2391
      ERR_RETURN(trans->getNdbError());
2392
    m_lock_tuple= false;
2393
    m_ops_pending++;
2394
    if (uses_blob_value(FALSE))
2395
      m_blobs_pending= TRUE;
2396 2397 2398
  }
  else
  {  
joreland@mysql.com's avatar
joreland@mysql.com committed
2399
    if (!(op= trans->getNdbOperation((const NDBTAB *) m_table)) ||
2400
        op->updateTuple() != 0)
2401 2402
      ERR_RETURN(trans->getNdbError());  
    
2403
    if (table->s->primary_key == MAX_KEY) 
2404 2405 2406 2407 2408
    {
      // This table has no primary key, use "hidden" primary key
      DBUG_PRINT("info", ("Using hidden key"));
      
      // Require that the PK for this record has previously been 
2409 2410
      // read into m_ref
      DBUG_DUMP("key", m_ref, NDB_HIDDEN_PRIMARY_KEY_LENGTH);
2411
      
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2412
      if (set_hidden_key(op, table->s->fields, m_ref))
2413
        ERR_RETURN(op->getNdbError());
2414 2415 2416 2417
    } 
    else 
    {
      int res;
2418
      if ((res= set_primary_key_from_record(op, old_data)))
2419
        DBUG_RETURN(res);
2420
    }
2421 2422
  }

2423 2424
  m_rows_changed++;

2425
  // Set non-key attribute(s)
2426
  for (i= 0; i < table->s->fields; i++) 
2427 2428
  {
    Field *field= table->field[i];
2429
    if (((thd->query_id == field->query_id) || m_retrieve_all_fields) &&
2430
        (!(field->flags & PRI_KEY_FLAG)) &&
2431
        set_ndb_value(op, field, i))
2432 2433
      ERR_RETURN(op->getNdbError());
  }
2434

2435
  // Execute update operation
2436
  if (!cursor && execute_no_commit(this,trans,false) != 0) {
2437
    no_uncommitted_rows_execute_failure();
2438
    DBUG_RETURN(ndb_err(trans));
2439
  }
2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450
  
  DBUG_RETURN(0);
}


/*
  Delete one record from NDB, using primary key 
*/

int ha_ndbcluster::delete_row(const byte *record)
{
2451
  THD *thd= current_thd;
2452
  NdbTransaction *trans= m_active_trans;
2453
  NdbScanOperation* cursor= m_active_cursor;
2454 2455 2456
  NdbOperation *op;
  DBUG_ENTER("delete_row");

2457
  statistic_increment(thd->status_var.ha_delete_count,&LOCK_status);
2458
  m_rows_changed++;
2459

2460
  if (cursor)
2461
  {
2462
    /*
2463
      We are scanning records and want to delete the record
2464
      that was just found, call deleteTuple on the cursor 
2465
      to take over the lock to a new delete operation
2466 2467 2468 2469
      And thus setting the primary key of the record from 
      the active record in cursor
    */
    DBUG_PRINT("info", ("Calling deleteTuple on cursor"));
2470
    if (cursor->deleteCurrentTuple() != 0)
2471
      ERR_RETURN(trans->getNdbError());     
2472
    m_lock_tuple= false;
2473
    m_ops_pending++;
2474

2475 2476
    no_uncommitted_rows_update(-1);

2477 2478 2479
    if (!m_primary_key_update)
      // If deleting from cursor, NoCommit will be handled in next_result
      DBUG_RETURN(0);
2480 2481
  }
  else
2482
  {
2483
    
joreland@mysql.com's avatar
joreland@mysql.com committed
2484
    if (!(op=trans->getNdbOperation((const NDBTAB *) m_table)) || 
2485
        op->deleteTuple() != 0)
2486 2487
      ERR_RETURN(trans->getNdbError());
    
2488 2489
    no_uncommitted_rows_update(-1);
    
2490
    if (table->s->primary_key == MAX_KEY) 
2491 2492 2493 2494
    {
      // This table has no primary key, use "hidden" primary key
      DBUG_PRINT("info", ("Using hidden key"));
      
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2495
      if (set_hidden_key(op, table->s->fields, m_ref))
2496
        ERR_RETURN(op->getNdbError());
2497 2498 2499 2500
    } 
    else 
    {
      int res;
2501 2502
      if ((res= set_primary_key_from_record(op, record)))
        return res;  
2503
    }
2504
  }
2505

2506
  // Execute delete operation
2507
  if (execute_no_commit(this,trans,false) != 0) {
2508
    no_uncommitted_rows_execute_failure();
2509
    DBUG_RETURN(ndb_err(trans));
2510
  }
2511 2512
  DBUG_RETURN(0);
}
2513
  
2514 2515 2516 2517 2518
/*
  Unpack a record read from NDB 

  SYNOPSIS
    unpack_record()
2519
    buf                 Buffer to store read row
2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531

  NOTE
    The data for each row is read directly into the
    destination buffer. This function is primarily 
    called in order to check if any fields should be 
    set to null.
*/

void ha_ndbcluster::unpack_record(byte* buf)
{
  uint row_offset= (uint) (buf - table->record[0]);
  Field **field, **end;
pekka@mysql.com's avatar
pekka@mysql.com committed
2532
  NdbValue *value= m_value;
2533
  DBUG_ENTER("unpack_record");
2534

joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
2535
  end= table->field + table->s->fields;
2536 2537
  
  // Set null flag(s)
2538
  bzero(buf, table->s->null_bytes);
joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
2539
  for (field= table->field;
2540 2541 2542
       field < end;
       field++, value++)
  {
pekka@mysql.com's avatar
pekka@mysql.com committed
2543 2544 2545 2546 2547 2548
    if ((*value).ptr)
    {
      if (! ((*field)->flags & BLOB_FLAG))
      {
        if ((*value).rec->isNULL())
         (*field)->set_null(row_offset);
2549 2550 2551 2552 2553 2554
        else if ((*field)->type() == MYSQL_TYPE_BIT)
        {
          uint pack_len= (*field)->pack_length();
          if (pack_len < 5)
          {
            DBUG_PRINT("info", ("bit field H'%.8X", 
2555
                                (*value).rec->u_32_value()));
2556
            ((Field_bit *) *field)->store((longlong) 
2557 2558
                                          (*value).rec->u_32_value(),
                                          FALSE);
2559 2560 2561 2562 2563 2564 2565
          }
          else
          {
            DBUG_PRINT("info", ("bit field H'%.8X%.8X",
                                *(Uint32 *)(*value).rec->aRef(),
                                *((Uint32 *)(*value).rec->aRef()+1)));
            ((Field_bit *) *field)->store((longlong)
2566 2567
                                          (*value).rec->u_64_value(), TRUE);
          }
2568
        }
pekka@mysql.com's avatar
pekka@mysql.com committed
2569 2570 2571 2572
      }
      else
      {
        NdbBlob* ndb_blob= (*value).blob;
2573
        bool isNull= TRUE;
2574 2575 2576
#ifndef DBUG_OFF
        int ret= 
#endif
2577
          ndb_blob->getNull(isNull);
pekka@mysql.com's avatar
pekka@mysql.com committed
2578 2579
        DBUG_ASSERT(ret == 0);
        if (isNull)
2580
          (*field)->set_null(row_offset);
pekka@mysql.com's avatar
pekka@mysql.com committed
2581 2582
      }
    }
2583
  }
2584
  
2585 2586
#ifndef DBUG_OFF
  // Read and print all values that was fetched
2587
  if (table->s->primary_key == MAX_KEY)
2588 2589
  {
    // Table with hidden primary key
2590
    int hidden_no= table->s->fields;
2591
    char buff[22];
joreland@mysql.com's avatar
joreland@mysql.com committed
2592
    const NDBTAB *tab= (const NDBTAB *) m_table;
2593
    const NDBCOL *hidden_col= tab->getColumn(hidden_no);
2594
    const NdbRecAttr* rec= m_value[hidden_no].rec;
2595
    DBUG_ASSERT(rec);
2596
    DBUG_PRINT("hidden", ("%d: %s \"%s\"", hidden_no, 
2597
			  hidden_col->getName(),
2598
                          llstr(rec->u_64_value(), buff)));
2599
  }
2600
  print_results();
2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613
#endif
  DBUG_VOID_RETURN;
}

/*
  Utility function to print/dump the fetched field
 */

void ha_ndbcluster::print_results()
{
  DBUG_ENTER("print_results");

#ifndef DBUG_OFF
2614
  const NDBTAB *tab= (const NDBTAB*) m_table;
2615

2616 2617
  if (!_db_on_)
    DBUG_VOID_RETURN;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2618

2619
  char buf_type[MAX_FIELD_WIDTH], buf_val[MAX_FIELD_WIDTH];
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2620
  String type(buf_type, sizeof(buf_type), &my_charset_bin);
2621
  String val(buf_val, sizeof(buf_val), &my_charset_bin);
2622
  for (uint f= 0; f < table->s->fields; f++)
2623
  {
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2624
    /* Use DBUG_PRINT since DBUG_FILE cannot be filtered out */
2625
    char buf[2000];
2626
    Field *field;
2627
    void* ptr;
pekka@mysql.com's avatar
pekka@mysql.com committed
2628
    NdbValue value;
2629

2630
    buf[0]= 0;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2631
    field= table->field[f];
pekka@mysql.com's avatar
pekka@mysql.com committed
2632
    if (!(value= m_value[f]).ptr)
2633
    {
2634
      strmov(buf, "not read");
2635
      goto print_value;
2636
    }
2637

2638
    ptr= field->ptr;
pekka@mysql.com's avatar
pekka@mysql.com committed
2639 2640

    if (! (field->flags & BLOB_FLAG))
2641
    {
pekka@mysql.com's avatar
pekka@mysql.com committed
2642 2643
      if (value.rec->isNULL())
      {
2644
        strmov(buf, "NULL");
2645
        goto print_value;
pekka@mysql.com's avatar
pekka@mysql.com committed
2646
      }
2647 2648 2649 2650 2651
      type.length(0);
      val.length(0);
      field->sql_type(type);
      field->val_str(&val);
      my_snprintf(buf, sizeof(buf), "%s %s", type.c_ptr(), val.c_ptr());
pekka@mysql.com's avatar
pekka@mysql.com committed
2652 2653 2654
    }
    else
    {
2655
      NdbBlob *ndb_blob= value.blob;
2656
      bool isNull= TRUE;
pekka@mysql.com's avatar
pekka@mysql.com committed
2657
      ndb_blob->getNull(isNull);
2658 2659
      if (isNull)
        strmov(buf, "NULL");
2660
    }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2661

2662
print_value:
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
2663
    DBUG_PRINT("value", ("%u,%s: %s", f, field->field_name, buf));
2664 2665 2666 2667 2668 2669 2670 2671
  }
#endif
  DBUG_VOID_RETURN;
}


int ha_ndbcluster::index_init(uint index)
{
2672
  DBUG_ENTER("ha_ndbcluster::index_init");
2673
  DBUG_PRINT("enter", ("index: %u", index));
2674 2675 2676 2677 2678 2679
 /*
    Locks are are explicitly released in scan
    unless m_lock.type == TL_READ_HIGH_PRIORITY
    and no sub-sequent call to unlock_row()
   */
  m_lock_tuple= false;
2680 2681 2682 2683 2684 2685
  DBUG_RETURN(handler::index_init(index));
}


int ha_ndbcluster::index_end()
{
2686
  DBUG_ENTER("ha_ndbcluster::index_end");
2687
  DBUG_RETURN(close_scan());
2688 2689
}

2690 2691 2692 2693 2694 2695 2696 2697
/**
 * Check if key contains null
 */
static
int
check_null_in_key(const KEY* key_info, const byte *key, uint key_len)
{
  KEY_PART_INFO *curr_part, *end_part;
2698
  const byte* end_ptr= key + key_len;
2699 2700 2701 2702 2703 2704
  curr_part= key_info->key_part;
  end_part= curr_part + key_info->key_parts;
  

  for (; curr_part != end_part && key < end_ptr; curr_part++)
  {
2705
    if (curr_part->null_bit && *key)
2706 2707 2708 2709 2710 2711
      return 1;

    key += curr_part->store_length;
  }
  return 0;
}
2712 2713

int ha_ndbcluster::index_read(byte *buf,
2714 2715
                              const byte *key, uint key_len, 
                              enum ha_rkey_function find_flag)
2716
{
2717
  DBUG_ENTER("ha_ndbcluster::index_read");
2718 2719 2720
  DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", 
                       active_index, key_len, find_flag));

joreland@mysql.com's avatar
joreland@mysql.com committed
2721
  int error;
2722 2723
  ndb_index_type type= get_index_type(active_index);
  const KEY* key_info= table->key_info+active_index;
joreland@mysql.com's avatar
joreland@mysql.com committed
2724 2725 2726 2727 2728
  switch (type){
  case PRIMARY_KEY_ORDERED_INDEX:
  case PRIMARY_KEY_INDEX:
    if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len)
    {
2729
      if (m_active_cursor && (error= close_scan()))
2730
        DBUG_RETURN(error);
joreland@mysql.com's avatar
joreland@mysql.com committed
2731 2732 2733 2734 2735 2736 2737 2738 2739
      DBUG_RETURN(pk_read(key, key_len, buf));
    }
    else if (type == PRIMARY_KEY_INDEX)
    {
      DBUG_RETURN(1);
    }
    break;
  case UNIQUE_ORDERED_INDEX:
  case UNIQUE_INDEX:
2740
    if (find_flag == HA_READ_KEY_EXACT && key_info->key_length == key_len &&
2741
        !check_null_in_key(key_info, key, key_len))
joreland@mysql.com's avatar
joreland@mysql.com committed
2742
    {
2743
      if (m_active_cursor && (error= close_scan()))
2744
        DBUG_RETURN(error);
joreland@mysql.com's avatar
joreland@mysql.com committed
2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755
      DBUG_RETURN(unique_index_read(key, key_len, buf));
    }
    else if (type == UNIQUE_INDEX)
    {
      DBUG_RETURN(1);
    }
    break;
  case ORDERED_INDEX:
    break;
  default:
  case UNDEFINED_INDEX:
2756
    DBUG_ASSERT(FALSE);
2757
    DBUG_RETURN(1);
joreland@mysql.com's avatar
joreland@mysql.com committed
2758 2759 2760
    break;
  }
  
2761
  key_range start_key;
2762 2763 2764
  start_key.key= key;
  start_key.length= key_len;
  start_key.flag= find_flag;
2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776
  bool descending= FALSE;
  switch (find_flag) {
  case HA_READ_KEY_OR_PREV:
  case HA_READ_BEFORE_KEY:
  case HA_READ_PREFIX_LAST:
  case HA_READ_PREFIX_LAST_OR_PREV:
    descending= TRUE;
    break;
  default:
    break;
  }
  error= ordered_index_scan(&start_key, 0, TRUE, descending, buf);  
joreland@mysql.com's avatar
joreland@mysql.com committed
2777
  DBUG_RETURN(error == HA_ERR_END_OF_FILE ? HA_ERR_KEY_NOT_FOUND : error);
2778 2779 2780 2781
}


int ha_ndbcluster::index_read_idx(byte *buf, uint index_no, 
2782 2783
                              const byte *key, uint key_len, 
                              enum ha_rkey_function find_flag)
2784
{
2785
  statistic_increment(current_thd->status_var.ha_read_key_count, &LOCK_status);
2786
  DBUG_ENTER("ha_ndbcluster::index_read_idx");
2787 2788 2789 2790 2791 2792 2793 2794
  DBUG_PRINT("enter", ("index_no: %u, key_len: %u", index_no, key_len));  
  index_init(index_no);  
  DBUG_RETURN(index_read(buf, key, key_len, find_flag));
}


int ha_ndbcluster::index_next(byte *buf)
{
2795
  DBUG_ENTER("ha_ndbcluster::index_next");
2796
  statistic_increment(current_thd->status_var.ha_read_next_count,
2797
                      &LOCK_status);
2798
  DBUG_RETURN(next_result(buf));
2799 2800 2801 2802 2803
}


int ha_ndbcluster::index_prev(byte *buf)
{
2804
  DBUG_ENTER("ha_ndbcluster::index_prev");
2805
  statistic_increment(current_thd->status_var.ha_read_prev_count,
2806
                      &LOCK_status);
2807
  DBUG_RETURN(next_result(buf));
2808 2809 2810 2811 2812
}


int ha_ndbcluster::index_first(byte *buf)
{
2813
  DBUG_ENTER("ha_ndbcluster::index_first");
2814
  statistic_increment(current_thd->status_var.ha_read_first_count,
2815
                      &LOCK_status);
2816 2817 2818
  // Start the ordered index scan and fetch the first row

  // Only HA_READ_ORDER indexes get called by index_first
2819
  DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf));
2820 2821 2822 2823 2824
}


int ha_ndbcluster::index_last(byte *buf)
{
2825
  DBUG_ENTER("ha_ndbcluster::index_last");
2826
  statistic_increment(current_thd->status_var.ha_read_last_count,&LOCK_status);
2827
  DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf));
2828 2829
}

2830 2831 2832 2833 2834
int ha_ndbcluster::index_read_last(byte * buf, const byte * key, uint key_len)
{
  DBUG_ENTER("ha_ndbcluster::index_read_last");
  DBUG_RETURN(index_read(buf, key, key_len, HA_READ_PREFIX_LAST));
}
2835

2836 2837
inline
int ha_ndbcluster::read_range_first_to_buf(const key_range *start_key,
2838 2839 2840
                                           const key_range *end_key,
                                           bool eq_r, bool sorted,
                                           byte* buf)
2841
{
2842
  KEY* key_info;
2843 2844
  int error= 1; 
  DBUG_ENTER("ha_ndbcluster::read_range_first_to_buf");
2845
  DBUG_PRINT("info", ("eq_r: %d, sorted: %d", eq_r, sorted));
2846

2847
  switch (get_index_type(active_index)){
2848
  case PRIMARY_KEY_ORDERED_INDEX:
2849
  case PRIMARY_KEY_INDEX:
2850 2851
    key_info= table->key_info + active_index;
    if (start_key && 
2852 2853
        start_key->length == key_info->key_length &&
        start_key->flag == HA_READ_KEY_EXACT)
2854
    {
2855
      if (m_active_cursor && (error= close_scan()))
2856
        DBUG_RETURN(error);
2857 2858 2859
      error= pk_read(start_key->key, start_key->length, buf);      
      DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
    }
2860
    break;
2861
  case UNIQUE_ORDERED_INDEX:
2862
  case UNIQUE_INDEX:
2863
    key_info= table->key_info + active_index;
2864
    if (start_key && start_key->length == key_info->key_length &&
2865 2866
        start_key->flag == HA_READ_KEY_EXACT && 
        !check_null_in_key(key_info, start_key->key, start_key->length))
2867
    {
2868
      if (m_active_cursor && (error= close_scan()))
2869
        DBUG_RETURN(error);
2870 2871 2872
      error= unique_index_read(start_key->key, start_key->length, buf);
      DBUG_RETURN(error == HA_ERR_KEY_NOT_FOUND ? HA_ERR_END_OF_FILE : error);
    }
2873 2874 2875 2876
    break;
  default:
    break;
  }
2877 2878

  // Start the ordered index scan and fetch the first row
2879
  error= ordered_index_scan(start_key, end_key, sorted, FALSE, buf);
2880 2881 2882
  DBUG_RETURN(error);
}

2883

joreland@mysql.com's avatar
joreland@mysql.com committed
2884
int ha_ndbcluster::read_range_first(const key_range *start_key,
2885 2886
                                    const key_range *end_key,
                                    bool eq_r, bool sorted)
joreland@mysql.com's avatar
joreland@mysql.com committed
2887 2888 2889 2890 2891
{
  byte* buf= table->record[0];
  DBUG_ENTER("ha_ndbcluster::read_range_first");
  
  DBUG_RETURN(read_range_first_to_buf(start_key,
2892 2893 2894 2895
                                      end_key,
                                      eq_r, 
                                      sorted,
                                      buf));
joreland@mysql.com's avatar
joreland@mysql.com committed
2896 2897
}

2898
int ha_ndbcluster::read_range_next()
2899 2900 2901 2902 2903 2904
{
  DBUG_ENTER("ha_ndbcluster::read_range_next");
  DBUG_RETURN(next_result(table->record[0]));
}


2905 2906
int ha_ndbcluster::rnd_init(bool scan)
{
2907
  NdbScanOperation *cursor= m_active_cursor;
2908 2909
  DBUG_ENTER("rnd_init");
  DBUG_PRINT("enter", ("scan: %d", scan));
2910
  // Check if scan is to be restarted
mskold@mysql.com's avatar
mskold@mysql.com committed
2911 2912 2913 2914
  if (cursor)
  {
    if (!scan)
      DBUG_RETURN(1);
2915
    if (cursor->restart(m_force_send) != 0)
2916 2917 2918 2919
    {
      DBUG_ASSERT(0);
      DBUG_RETURN(-1);
    }
mskold@mysql.com's avatar
mskold@mysql.com committed
2920
  }
2921
  index_init(table->s->primary_key);
2922 2923 2924
  DBUG_RETURN(0);
}

2925 2926
int ha_ndbcluster::close_scan()
{
2927
  NdbTransaction *trans= m_active_trans;
2928 2929
  DBUG_ENTER("close_scan");

2930 2931
  m_multi_cursor= 0;
  if (!m_active_cursor && !m_multi_cursor)
2932 2933
    DBUG_RETURN(1);

2934
  NdbScanOperation *cursor= m_active_cursor ? m_active_cursor : m_multi_cursor;
2935
  
2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947
  if (m_lock_tuple)
  {
    /*
      Lock level m_lock.type either TL_WRITE_ALLOW_WRITE
      (SELECT FOR UPDATE) or TL_READ_WITH_SHARED_LOCKS (SELECT
      LOCK WITH SHARE MODE) and row was not explictly unlocked 
      with unlock_row() call
    */
      NdbOperation *op;
      // Lock row
      DBUG_PRINT("info", ("Keeping lock on scanned row"));
      
2948
      if (!(op= cursor->lockCurrentTuple()))
2949 2950 2951 2952 2953 2954
      {
	m_lock_tuple= false;
	ERR_RETURN(trans->getNdbError());
      }
      m_ops_pending++;      
  }
2955
  m_lock_tuple= false;
2956
  if (m_ops_pending)
2957 2958 2959 2960 2961
  {
    /*
      Take over any pending transactions to the 
      deleteing/updating transaction before closing the scan    
    */
2962
    DBUG_PRINT("info", ("ops_pending: %d", m_ops_pending));    
2963
    if (execute_no_commit(this,trans,false) != 0) {
2964
      no_uncommitted_rows_execute_failure();
2965
      DBUG_RETURN(ndb_err(trans));
2966
    }
2967
    m_ops_pending= 0;
2968 2969
  }
  
2970
  cursor->close(m_force_send, TRUE);
2971
  m_active_cursor= m_multi_cursor= NULL;
mskold@mysql.com's avatar
mskold@mysql.com committed
2972
  DBUG_RETURN(0);
2973
}
2974 2975 2976 2977

int ha_ndbcluster::rnd_end()
{
  DBUG_ENTER("rnd_end");
2978
  DBUG_RETURN(close_scan());
2979 2980 2981 2982 2983 2984
}


int ha_ndbcluster::rnd_next(byte *buf)
{
  DBUG_ENTER("rnd_next");
2985
  statistic_increment(current_thd->status_var.ha_read_rnd_next_count,
2986
                      &LOCK_status);
2987

2988
  if (!m_active_cursor)
2989 2990
    DBUG_RETURN(full_table_scan(buf));
  DBUG_RETURN(next_result(buf));
2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003
}


/*
  An "interesting" record has been found and it's pk 
  retrieved by calling position
  Now it's time to read the record from db once 
  again
*/

int ha_ndbcluster::rnd_pos(byte *buf, byte *pos)
{
  DBUG_ENTER("rnd_pos");
3004
  statistic_increment(current_thd->status_var.ha_read_rnd_count,
3005
                      &LOCK_status);
3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025
  // The primary key for the record is stored in pos
  // Perform a pk_read using primary key "index"
  DBUG_RETURN(pk_read(pos, ref_length, buf));  
}


/*
  Store the primary key of this record in ref 
  variable, so that the row can be retrieved again later
  using "reference" in rnd_pos
*/

void ha_ndbcluster::position(const byte *record)
{
  KEY *key_info;
  KEY_PART_INFO *key_part;
  KEY_PART_INFO *end;
  byte *buff;
  DBUG_ENTER("position");

3026
  if (table->s->primary_key != MAX_KEY) 
3027
  {
3028
    key_info= table->key_info + table->s->primary_key;
3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043
    key_part= key_info->key_part;
    end= key_part + key_info->key_parts;
    buff= ref;
    
    for (; key_part != end; key_part++) 
    {
      if (key_part->null_bit) {
        /* Store 0 if the key part is a NULL part */      
        if (record[key_part->null_offset]
            & key_part->null_bit) {
          *buff++= 1;
          continue;
        }      
        *buff++= 0;
      }
3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063

      size_t len = key_part->length;
      const byte * ptr = record + key_part->offset;
      Field *field = key_part->field;
      if ((field->type() ==  MYSQL_TYPE_VARCHAR) &&
	  ((Field_varstring*)field)->length_bytes == 1)
      {
	/** 
	 * Keys always use 2 bytes length
	 */
	buff[0] = ptr[0];
	buff[1] = 0;
	memcpy(buff+2, ptr + 1, len);	
	len += 2;
      }
      else
      {
	memcpy(buff, ptr, len);
      }
      buff += len;
3064 3065 3066 3067 3068 3069
    }
  } 
  else 
  {
    // No primary key, get hidden key
    DBUG_PRINT("info", ("Getting hidden key"));
3070
#ifndef DBUG_OFF
3071
    int hidden_no= table->s->fields;
joreland@mysql.com's avatar
joreland@mysql.com committed
3072
    const NDBTAB *tab= (const NDBTAB *) m_table;  
3073 3074 3075 3076
    const NDBCOL *hidden_col= tab->getColumn(hidden_no);
    DBUG_ASSERT(hidden_col->getPrimaryKey() && 
                hidden_col->getAutoIncrement() &&
                ref_length == NDB_HIDDEN_PRIMARY_KEY_LENGTH);
3077
#endif
3078
    memcpy(ref, m_ref, ref_length);
3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097
  }
  
  DBUG_DUMP("ref", (char*)ref, ref_length);
  DBUG_VOID_RETURN;
}


void ha_ndbcluster::info(uint flag)
{
  DBUG_ENTER("info");
  DBUG_PRINT("enter", ("flag: %d", flag));
  
  if (flag & HA_STATUS_POS)
    DBUG_PRINT("info", ("HA_STATUS_POS"));
  if (flag & HA_STATUS_NO_LOCK)
    DBUG_PRINT("info", ("HA_STATUS_NO_LOCK"));
  if (flag & HA_STATUS_TIME)
    DBUG_PRINT("info", ("HA_STATUS_TIME"));
  if (flag & HA_STATUS_VARIABLE)
3098
  {
3099
    DBUG_PRINT("info", ("HA_STATUS_VARIABLE"));
3100 3101
    if (m_table_info)
    {
3102
      if (m_ha_not_exact_count)
3103
        records= 100;
3104
      else
3105
        records_update();
3106 3107 3108
    }
    else
    {
3109 3110 3111
      if ((my_errno= check_ndb_connection()))
        DBUG_VOID_RETURN;
      Ndb *ndb= get_ndb();
3112
      struct Ndb_statistics stat;
3113
      ndb->setDatabaseName(m_dbname);
3114
      if (current_thd->variables.ndb_use_exact_count &&
3115
          ndb_get_table_statistics(ndb, m_tabname, &stat) == 0)
3116
      {
3117 3118 3119
        mean_rec_length= stat.row_size;
        data_file_length= stat.fragment_memory;
        records= stat.row_count;
3120 3121 3122
      }
      else
      {
3123 3124
        mean_rec_length= 0;
        records= 100;
3125
      }
3126
    }
3127
  }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
3128 3129 3130 3131 3132
  if (flag & HA_STATUS_CONST)
  {
    DBUG_PRINT("info", ("HA_STATUS_CONST"));
    set_rec_per_key();
  }
3133
  if (flag & HA_STATUS_ERRKEY)
3134
  {
3135
    DBUG_PRINT("info", ("HA_STATUS_ERRKEY"));
3136
    errkey= m_dupkey;
3137
  }
3138
  if (flag & HA_STATUS_AUTO)
3139
  {
3140
    DBUG_PRINT("info", ("HA_STATUS_AUTO"));
3141 3142 3143 3144
    if (m_table)
    {
      Ndb *ndb= get_ndb();
      
3145
      Uint64 auto_increment_value64;
3146
      if (ndb->readAutoIncrementValue((const NDBTAB *) m_table,
3147
                                      auto_increment_value64) == -1)
3148 3149 3150 3151 3152 3153
      {
        const NdbError err= ndb->getNdbError();
        sql_print_error("Error %lu in readAutoIncrementValue(): %s",
                        (ulong) err.code, err.message);
        auto_increment_value= ~(Uint64)0;
      }
3154 3155
      else
        auto_increment_value= (ulonglong)auto_increment_value64;
3156 3157
    }
  }
3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173
  DBUG_VOID_RETURN;
}


int ha_ndbcluster::extra(enum ha_extra_function operation)
{
  DBUG_ENTER("extra");
  switch (operation) {
  case HA_EXTRA_NORMAL:              /* Optimize for space (def) */
    DBUG_PRINT("info", ("HA_EXTRA_NORMAL"));
    break;
  case HA_EXTRA_QUICK:                 /* Optimize for speed */
    DBUG_PRINT("info", ("HA_EXTRA_QUICK"));
    break;
  case HA_EXTRA_RESET:                 /* Reset database to after open */
    DBUG_PRINT("info", ("HA_EXTRA_RESET"));
3174 3175
    DBUG_PRINT("info", ("Clearing condition stack"));
    cond_clear();
3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244
    break;
  case HA_EXTRA_CACHE:                 /* Cash record in HA_rrnd() */
    DBUG_PRINT("info", ("HA_EXTRA_CACHE"));
    break;
  case HA_EXTRA_NO_CACHE:              /* End cacheing of records (def) */
    DBUG_PRINT("info", ("HA_EXTRA_NO_CACHE"));
    break;
  case HA_EXTRA_NO_READCHECK:          /* No readcheck on update */
    DBUG_PRINT("info", ("HA_EXTRA_NO_READCHECK"));
    break;
  case HA_EXTRA_READCHECK:             /* Use readcheck (def) */
    DBUG_PRINT("info", ("HA_EXTRA_READCHECK"));
    break;
  case HA_EXTRA_KEYREAD:               /* Read only key to database */
    DBUG_PRINT("info", ("HA_EXTRA_KEYREAD"));
    break;
  case HA_EXTRA_NO_KEYREAD:            /* Normal read of records (def) */
    DBUG_PRINT("info", ("HA_EXTRA_NO_KEYREAD"));
    break;
  case HA_EXTRA_NO_USER_CHANGE:        /* No user is allowed to write */
    DBUG_PRINT("info", ("HA_EXTRA_NO_USER_CHANGE"));
    break;
  case HA_EXTRA_KEY_CACHE:
    DBUG_PRINT("info", ("HA_EXTRA_KEY_CACHE"));
    break;
  case HA_EXTRA_NO_KEY_CACHE:
    DBUG_PRINT("info", ("HA_EXTRA_NO_KEY_CACHE"));
    break;
  case HA_EXTRA_WAIT_LOCK:            /* Wait until file is avalably (def) */
    DBUG_PRINT("info", ("HA_EXTRA_WAIT_LOCK"));
    break;
  case HA_EXTRA_NO_WAIT_LOCK:         /* If file is locked, return quickly */
    DBUG_PRINT("info", ("HA_EXTRA_NO_WAIT_LOCK"));
    break;
  case HA_EXTRA_WRITE_CACHE:           /* Use write cache in ha_write() */
    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CACHE"));
    break;
  case HA_EXTRA_FLUSH_CACHE:           /* flush write_record_cache */
    DBUG_PRINT("info", ("HA_EXTRA_FLUSH_CACHE"));
    break;
  case HA_EXTRA_NO_KEYS:               /* Remove all update of keys */
    DBUG_PRINT("info", ("HA_EXTRA_NO_KEYS"));
    break;
  case HA_EXTRA_KEYREAD_CHANGE_POS:         /* Keyread, but change pos */
    DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_CHANGE_POS")); /* xxxxchk -r must be used */
    break;                                  
  case HA_EXTRA_REMEMBER_POS:          /* Remember pos for next/prev */
    DBUG_PRINT("info", ("HA_EXTRA_REMEMBER_POS"));
    break;
  case HA_EXTRA_RESTORE_POS:
    DBUG_PRINT("info", ("HA_EXTRA_RESTORE_POS"));
    break;
  case HA_EXTRA_REINIT_CACHE:          /* init cache from current record */
    DBUG_PRINT("info", ("HA_EXTRA_REINIT_CACHE"));
    break;
  case HA_EXTRA_FORCE_REOPEN:          /* Datafile have changed on disk */
    DBUG_PRINT("info", ("HA_EXTRA_FORCE_REOPEN"));
    break;
  case HA_EXTRA_FLUSH:                 /* Flush tables to disk */
    DBUG_PRINT("info", ("HA_EXTRA_FLUSH"));
    break;
  case HA_EXTRA_NO_ROWS:               /* Don't write rows */
    DBUG_PRINT("info", ("HA_EXTRA_NO_ROWS"));
    break;
  case HA_EXTRA_RESET_STATE:           /* Reset positions */
    DBUG_PRINT("info", ("HA_EXTRA_RESET_STATE"));
    break;
  case HA_EXTRA_IGNORE_DUP_KEY:       /* Dup keys don't rollback everything*/
    DBUG_PRINT("info", ("HA_EXTRA_IGNORE_DUP_KEY"));
3245 3246
    DBUG_PRINT("info", ("Ignoring duplicate key"));
    m_ignore_dup_key= TRUE;
3247 3248 3249
    break;
  case HA_EXTRA_NO_IGNORE_DUP_KEY:
    DBUG_PRINT("info", ("HA_EXTRA_NO_IGNORE_DUP_KEY"));
3250
    m_ignore_dup_key= FALSE;
3251 3252
    break;
  case HA_EXTRA_RETRIEVE_ALL_COLS:    /* Retrieve all columns, not just those
3253 3254
                                         where field->query_id is the same as
                                         the current query id */
3255
    DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_ALL_COLS"));
3256
    m_retrieve_all_fields= TRUE;
3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268
    break;
  case HA_EXTRA_PREPARE_FOR_DELETE:
    DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_DELETE"));
    break;
  case HA_EXTRA_PREPARE_FOR_UPDATE:     /* Remove read cache if problems */
    DBUG_PRINT("info", ("HA_EXTRA_PREPARE_FOR_UPDATE"));
    break;
  case HA_EXTRA_PRELOAD_BUFFER_SIZE: 
    DBUG_PRINT("info", ("HA_EXTRA_PRELOAD_BUFFER_SIZE"));
    break;
  case HA_EXTRA_RETRIEVE_PRIMARY_KEY: 
    DBUG_PRINT("info", ("HA_EXTRA_RETRIEVE_PRIMARY_KEY"));
3269
    m_retrieve_primary_key= TRUE;
3270 3271 3272 3273 3274 3275
    break;
  case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: 
    DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_UNIQUE"));
    break;
  case HA_EXTRA_CHANGE_KEY_TO_DUP: 
    DBUG_PRINT("info", ("HA_EXTRA_CHANGE_KEY_TO_DUP"));
3276 3277
  case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
    DBUG_PRINT("info", ("HA_EXTRA_KEYREAD_PRESERVE_FIELDS"));
3278
    break;
3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291
  case HA_EXTRA_WRITE_CAN_REPLACE:
    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CAN_REPLACE"));
    if (!m_has_unique_index)
    {
      DBUG_PRINT("info", ("Turning ON use of write instead of insert"));
      m_use_write= TRUE;
    }
    break;
  case HA_EXTRA_WRITE_CANNOT_REPLACE:
    DBUG_PRINT("info", ("HA_EXTRA_WRITE_CANNOT_REPLACE"));
    DBUG_PRINT("info", ("Turning OFF use of write instead of insert"));
    m_use_write= FALSE;
    break;
3292 3293 3294 3295 3296
  }
  
  DBUG_RETURN(0);
}

3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309
/* 
   Start of an insert, remember number of rows to be inserted, it will
   be used in write_row and get_autoincrement to send an optimal number
   of rows in each roundtrip to the server

   SYNOPSIS
   rows     number of rows to insert, 0 if unknown

*/

void ha_ndbcluster::start_bulk_insert(ha_rows rows)
{
  int bytes, batch;
joreland@mysql.com's avatar
joreland@mysql.com committed
3310
  const NDBTAB *tab= (const NDBTAB *) m_table;    
3311 3312

  DBUG_ENTER("start_bulk_insert");
pekka@mysql.com's avatar
pekka@mysql.com committed
3313
  DBUG_PRINT("enter", ("rows: %d", (int)rows));
3314
  
3315
  m_rows_inserted= (ha_rows) 0;
3316
  if (!m_use_write && m_ignore_dup_key)
3317 3318 3319
  {
    /*
      compare if expression with that in write_row
3320
      we have a situation where peek_indexed_rows() will be called
3321 3322 3323 3324 3325 3326 3327 3328
      so we cannot batch
    */
    DBUG_PRINT("info", ("Batching turned off as duplicate key is "
                        "ignored by using peek_row"));
    m_rows_to_insert= 1;
    m_bulk_insert_rows= 1;
    DBUG_VOID_RETURN;
  }
3329
  if (rows == (ha_rows) 0)
3330
  {
3331 3332
    /* We don't know how many will be inserted, guess */
    m_rows_to_insert= m_autoincrement_prefetch;
3333
  }
3334 3335
  else
    m_rows_to_insert= rows; 
3336 3337 3338 3339 3340 3341 3342 3343

  /* 
    Calculate how many rows that should be inserted
    per roundtrip to NDB. This is done in order to minimize the 
    number of roundtrips as much as possible. However performance will 
    degrade if too many bytes are inserted, thus it's limited by this 
    calculation.   
  */
3344
  const int bytesperbatch= 8192;
3345
  bytes= 12 + tab->getRowSizeInBytes() + 4 * tab->getNoOfColumns();
3346
  batch= bytesperbatch/bytes;
3347 3348
  batch= batch == 0 ? 1 : batch;
  DBUG_PRINT("info", ("batch: %d, bytes: %d", batch, bytes));
3349
  m_bulk_insert_rows= batch;
3350 3351 3352 3353 3354 3355 3356 3357 3358

  DBUG_VOID_RETURN;
}

/*
  End of an insert
 */
int ha_ndbcluster::end_bulk_insert()
{
3359 3360
  int error= 0;

3361
  DBUG_ENTER("end_bulk_insert");
3362
  // Check if last inserts need to be flushed
3363
  if (m_bulk_insert_not_flushed)
3364
  {
3365
    NdbTransaction *trans= m_active_trans;
3366 3367 3368
    // Send rows to NDB
    DBUG_PRINT("info", ("Sending inserts to NDB, "\
                        "rows_inserted:%d, bulk_insert_rows: %d", 
3369
                        (int) m_rows_inserted, (int) m_bulk_insert_rows)); 
3370
    m_bulk_insert_not_flushed= FALSE;
3371 3372
    if (m_transaction_on)
    {
3373
      if (execute_no_commit(this, trans,false) != 0)
3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385
      {
        no_uncommitted_rows_execute_failure();
        my_errno= error= ndb_err(trans);
      }
    }
    else
    {
      if (execute_commit(this, trans) != 0)
      {
        no_uncommitted_rows_execute_failure();
        my_errno= error= ndb_err(trans);
      }
3386 3387 3388 3389 3390
      else
      {
        int res= trans->restart();
        DBUG_ASSERT(res == 0);
      }
3391
    }
3392 3393
  }

3394 3395
  m_rows_inserted= (ha_rows) 0;
  m_rows_to_insert= (ha_rows) 1;
3396
  DBUG_RETURN(error);
3397 3398
}

3399 3400 3401 3402

int ha_ndbcluster::extra_opt(enum ha_extra_function operation, ulong cache_size)
{
  DBUG_ENTER("extra_opt");
pekka@mysql.com's avatar
pekka@mysql.com committed
3403
  DBUG_PRINT("enter", ("cache_size: %lu", cache_size));
3404 3405 3406
  DBUG_RETURN(extra(operation));
}

3407 3408 3409 3410
static const char *ha_ndbcluster_exts[] = {
 ha_ndb_ext,
 NullS
};
3411

3412
const char** ha_ndbcluster::bas_ext() const
3413 3414 3415
{
  return ha_ndbcluster_exts;
}
3416 3417 3418 3419 3420 3421 3422 3423 3424

/*
  How many seeks it will take to read through the table
  This is to be comparable to the number returned by records_in_range so
  that we can decide if we should scan the table or use keys.
*/

double ha_ndbcluster::scan_time()
{
3425 3426 3427
  DBUG_ENTER("ha_ndbcluster::scan_time()");
  double res= rows2double(records*1000);
  DBUG_PRINT("exit", ("table: %s value: %f", 
3428
                      m_tabname, res));
3429
  DBUG_RETURN(res);
3430 3431
}

3432 3433 3434 3435 3436 3437 3438
/*
  Convert MySQL table locks into locks supported by Ndb Cluster.
  Note that MySQL Cluster does currently not support distributed
  table locks, so to be safe one should set cluster in Single
  User Mode, before relying on table locks when updating tables
  from several MySQL servers
*/
3439 3440 3441 3442 3443 3444 3445 3446

THR_LOCK_DATA **ha_ndbcluster::store_lock(THD *thd,
                                          THR_LOCK_DATA **to,
                                          enum thr_lock_type lock_type)
{
  DBUG_ENTER("store_lock");
  if (lock_type != TL_IGNORE && m_lock.type == TL_UNLOCK) 
  {
3447

3448 3449 3450
    /* If we are not doing a LOCK TABLE, then allow multiple
       writers */
    
3451 3452 3453
    /* Since NDB does not currently have table locks
       this is treated as a ordinary lock */

3454
    if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469
         lock_type <= TL_WRITE) && !thd->in_lock_tables)      
      lock_type= TL_WRITE_ALLOW_WRITE;
    
    /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
       MySQL would use the lock TL_READ_NO_INSERT on t2, and that
       would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
       to t2. Convert the lock to a normal read lock to allow
       concurrent inserts to t2. */
    
    if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
      lock_type= TL_READ;
    
    m_lock.type=lock_type;
  }
  *to++= &m_lock;
3470 3471

  DBUG_PRINT("exit", ("lock_type: %d", lock_type));
3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493
  
  DBUG_RETURN(to);
}

#ifndef DBUG_OFF
#define PRINT_OPTION_FLAGS(t) { \
      if (t->options & OPTION_NOT_AUTOCOMMIT) \
        DBUG_PRINT("thd->options", ("OPTION_NOT_AUTOCOMMIT")); \
      if (t->options & OPTION_BEGIN) \
        DBUG_PRINT("thd->options", ("OPTION_BEGIN")); \
      if (t->options & OPTION_TABLE_LOCK) \
        DBUG_PRINT("thd->options", ("OPTION_TABLE_LOCK")); \
}
#else
#define PRINT_OPTION_FLAGS(t)
#endif


/*
  As MySQL will execute an external lock for every new table it uses
  we can use this to start the transactions.
  If we are in auto_commit mode we just need to start a transaction
3494
  for the statement, this will be stored in thd_ndb.stmt.
3495
  If not, we have to start a master transaction if there doesn't exist
3496
  one from before, this will be stored in thd_ndb.all
3497 3498 3499
 
  When a table lock is held one transaction will be started which holds
  the table lock and for each statement a hupp transaction will be started  
3500
  If we are locking the table then:
3501
  - save the NdbDictionary::Table for easy access
3502 3503
  - save reference to table statistics
  - refresh list of the indexes for the table if needed (if altered)
3504 3505 3506 3507 3508
 */

int ha_ndbcluster::external_lock(THD *thd, int lock_type)
{
  int error=0;
3509
  NdbTransaction* trans= NULL;
3510 3511 3512 3513 3514 3515

  DBUG_ENTER("external_lock");
  /*
    Check that this handler instance has a connection
    set up to the Ndb object of thd
   */
3516
  if (check_ndb_connection(thd))
3517
    DBUG_RETURN(1);
3518

3519
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
3520
  Ndb *ndb= thd_ndb->ndb;
3521

3522 3523
  DBUG_PRINT("enter", ("thd: %x, thd_ndb: %x, thd_ndb->lock_count: %d",
                       thd, thd_ndb, thd_ndb->lock_count));
3524

3525 3526
  if (lock_type != F_UNLCK)
  {
3527
    DBUG_PRINT("info", ("lock_type != F_UNLCK"));
3528 3529 3530 3531
    if (!thd->transaction.on)
      m_transaction_on= FALSE;
    else
      m_transaction_on= thd->variables.ndb_use_transactions;
3532
    if (!thd_ndb->lock_count++)
3533 3534
    {
      PRINT_OPTION_FLAGS(thd);
3535
      if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) 
3536 3537
      {
        // Autocommit transaction
3538
        DBUG_ASSERT(!thd_ndb->stmt);
3539 3540
        DBUG_PRINT("trans",("Starting transaction stmt"));      

3541
        trans= ndb->startTransaction();
3542
        if (trans == NULL)
3543
          ERR_RETURN(ndb->getNdbError());
3544
        no_uncommitted_rows_reset(thd);
3545
        thd_ndb->stmt= trans;
3546
	thd_ndb->query_state&= NDB_QUERY_NORMAL;
3547
        trans_register_ha(thd, FALSE, &ndbcluster_hton);
3548 3549 3550
      } 
      else 
      { 
3551
        if (!thd_ndb->all)
3552
        {
3553 3554 3555 3556
          // Not autocommit transaction
          // A "master" transaction ha not been started yet
          DBUG_PRINT("trans",("starting transaction, all"));
          
3557
          trans= ndb->startTransaction();
3558
          if (trans == NULL)
3559
            ERR_RETURN(ndb->getNdbError());
3560
          no_uncommitted_rows_reset(thd);
3561
          thd_ndb->all= trans; 
3562
	  thd_ndb->query_state&= NDB_QUERY_NORMAL;
3563
          trans_register_ha(thd, TRUE, &ndbcluster_hton);
3564 3565 3566 3567 3568 3569 3570 3571

          /*
            If this is the start of a LOCK TABLE, a table look 
            should be taken on the table in NDB
           
            Check if it should be read or write lock
           */
          if (thd->options & (OPTION_TABLE_LOCK))
3572
          {
3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591
            //lockThisTable();
            DBUG_PRINT("info", ("Locking the table..." ));
          }

        }
      }
    }
    /*
      This is the place to make sure this handler instance
      has a started transaction.
     
      The transaction is started by the first handler on which 
      MySQL Server calls external lock
     
      Other handlers in the same stmt or transaction should use 
      the same NDB transaction. This is done by setting up the m_active_trans
      pointer to point to the NDB transaction. 
     */

3592 3593 3594
    // store thread specific data first to set the right context
    m_force_send=          thd->variables.ndb_force_send;
    m_ha_not_exact_count= !thd->variables.ndb_use_exact_count;
3595 3596
    m_autoincrement_prefetch= 
      (ha_rows) thd->variables.ndb_autoincrement_prefetch_sz;
3597

3598
    m_active_trans= thd_ndb->all ? thd_ndb->all : thd_ndb->stmt;
3599
    DBUG_ASSERT(m_active_trans);
3600
    // Start of transaction
3601
    m_rows_changed= 0;
3602
    m_retrieve_all_fields= FALSE;
3603
    m_retrieve_primary_key= FALSE;
3604
    m_ops_pending= 0;
3605
    {
3606
      NDBDICT *dict= ndb->getDictionary();
3607 3608 3609
      const NDBTAB *tab;
      void *tab_info;
      if (!(tab= dict->getTable(m_tabname, &tab_info)))
3610
        ERR_RETURN(dict->getNdbError());
3611 3612 3613
      DBUG_PRINT("info", ("Table schema version: %d", 
                          tab->getObjectVersion()));
      // Check if thread has stale local cache
3614 3615 3616 3617
      // New transaction must not use old tables... (trans != 0)
      // Running might...
      if ((trans && tab->getObjectStatus() != NdbDictionary::Object::Retrieved)
	  || tab->getObjectStatus() == NdbDictionary::Object::Invalid)
3618 3619
      {
        invalidate_dictionary_cache(FALSE);
3620
        if (!(tab= dict->getTable(m_tabname, &tab_info)))
3621 3622 3623 3624
          ERR_RETURN(dict->getNdbError());
        DBUG_PRINT("info", ("Table schema version: %d", 
                            tab->getObjectVersion()));
      }
3625
      if (m_table_version < tab->getObjectVersion())
3626 3627 3628 3629 3630 3631 3632
      {
        /*
          The table has been altered, caller has to retry
        */
        NdbError err= ndb->getNdbError(NDB_INVALID_SCHEMA_OBJECT);
        DBUG_RETURN(ndb_to_mysql_error(&err));
      }
3633 3634 3635 3636
      if (m_table != (void *)tab)
      {
        m_table= (void *)tab;
        m_table_version = tab->getObjectVersion();
3637
        if ((my_errno= build_index_list(ndb, table, ILBP_OPEN)))
3638
          DBUG_RETURN(my_errno);
3639 3640 3641

        const void *data, *pack_data;
        uint length, pack_length;
3642
        if (readfrm(table->s->path, &data, &length) ||
3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653
            packfrm(data, length, &pack_data, &pack_length) ||
            pack_length != tab->getFrmLength() ||
            memcmp(pack_data, tab->getFrmData(), pack_length))
        {
          my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
          my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
          NdbError err= ndb->getNdbError(NDB_INVALID_SCHEMA_OBJECT);
          DBUG_RETURN(ndb_to_mysql_error(&err));
        }
        my_free((char*)data, MYF(MY_ALLOW_ZERO_PTR));
        my_free((char*)pack_data, MYF(MY_ALLOW_ZERO_PTR));
3654
      }
3655 3656
      m_table_info= tab_info;
    }
3657
    no_uncommitted_rows_init(thd);
3658 3659
  }
  else
3660
  {
3661
    DBUG_PRINT("info", ("lock_type == F_UNLCK"));
3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679

    if (ndb_cache_check_time && m_rows_changed)
    {
      DBUG_PRINT("info", ("Rows has changed and util thread is running"));
      if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
      {
        DBUG_PRINT("info", ("Add share to list of tables to be invalidated"));
        /* NOTE push_back allocates memory using transactions mem_root! */
        thd_ndb->changed_tables.push_back(m_share, &thd->transaction.mem_root);
      }

      pthread_mutex_lock(&m_share->mutex);
      DBUG_PRINT("info", ("Invalidating commit_count"));
      m_share->commit_count= 0;
      m_share->commit_count_lock++;
      pthread_mutex_unlock(&m_share->mutex);
    }

3680
    if (!--thd_ndb->lock_count)
3681 3682 3683 3684
    {
      DBUG_PRINT("trans", ("Last external_lock"));
      PRINT_OPTION_FLAGS(thd);

3685
      if (thd_ndb->stmt)
3686 3687 3688 3689 3690 3691 3692
      {
        /*
          Unlock is done without a transaction commit / rollback.
          This happens if the thread didn't update any rows
          We must in this case close the transaction to release resources
        */
        DBUG_PRINT("trans",("ending non-updating transaction"));
3693
        ndb->closeTransaction(m_active_trans);
3694
        thd_ndb->stmt= NULL;
3695 3696
      }
    }
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
3697
    m_table_info= NULL;
3698

3699 3700 3701 3702 3703 3704 3705 3706 3707
    /*
      This is the place to make sure this handler instance
      no longer are connected to the active transaction.

      And since the handler is no longer part of the transaction 
      it can't have open cursors, ops or blobs pending.
    */
    m_active_trans= NULL;    

3708 3709
    if (m_active_cursor)
      DBUG_PRINT("warning", ("m_active_cursor != NULL"));
3710 3711
    m_active_cursor= NULL;

3712 3713 3714 3715
    if (m_multi_cursor)
      DBUG_PRINT("warning", ("m_multi_cursor != NULL"));
    m_multi_cursor= NULL;
    
3716
    if (m_blobs_pending)
3717
      DBUG_PRINT("warning", ("blobs_pending != 0"));
3718
    m_blobs_pending= 0;
3719
    
3720
    if (m_ops_pending)
3721
      DBUG_PRINT("warning", ("ops_pending != 0L"));
3722
    m_ops_pending= 0;
3723 3724 3725 3726
  }
  DBUG_RETURN(error);
}

3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742
/*
  Unlock the last row read in an open scan.
  Rows are unlocked by default in ndb, but
  for SELECT FOR UPDATE and SELECT LOCK WIT SHARE MODE
  locks are kept if unlock_row() is not called.
*/

void ha_ndbcluster::unlock_row() 
{
  DBUG_ENTER("unlock_row");

  DBUG_PRINT("info", ("Unlocking row"));
  m_lock_tuple= false;
  DBUG_VOID_RETURN;
}

3743
/*
3744 3745 3746 3747 3748
  Start a transaction for running a statement if one is not
  already running in a transaction. This will be the case in
  a BEGIN; COMMIT; block
  When using LOCK TABLE's external_lock will start a transaction
  since ndb does not currently does not support table locking
3749 3750
*/

serg@serg.mylan's avatar
serg@serg.mylan committed
3751
int ha_ndbcluster::start_stmt(THD *thd, thr_lock_type lock_type)
3752 3753 3754 3755 3756
{
  int error=0;
  DBUG_ENTER("start_stmt");
  PRINT_OPTION_FLAGS(thd);

3757
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
3758
  NdbTransaction *trans= (thd_ndb->stmt)?thd_ndb->stmt:thd_ndb->all;
3759
  if (!trans){
3760
    Ndb *ndb= thd_ndb->ndb;
3761
    DBUG_PRINT("trans",("Starting transaction stmt"));  
3762
    trans= ndb->startTransaction();
3763
    if (trans == NULL)
3764
      ERR_RETURN(ndb->getNdbError());
3765
    no_uncommitted_rows_reset(thd);
3766 3767
    thd_ndb->stmt= trans;
    trans_register_ha(thd, FALSE, &ndbcluster_hton);
3768
  }
3769
  thd_ndb->query_state&= NDB_QUERY_NORMAL;
3770
  m_active_trans= trans;
3771

3772
  // Start of statement
3773
  m_retrieve_all_fields= FALSE;
3774
  m_retrieve_primary_key= FALSE;
3775
  m_ops_pending= 0;    
3776 3777 3778 3779 3780 3781
  
  DBUG_RETURN(error);
}


/*
3782
  Commit a transaction started in NDB
3783 3784
 */

3785
int ndbcluster_commit(THD *thd, bool all)
3786 3787
{
  int res= 0;
3788 3789 3790
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NdbTransaction *trans= all ? thd_ndb->all : thd_ndb->stmt;
3791 3792 3793

  DBUG_ENTER("ndbcluster_commit");
  DBUG_PRINT("transaction",("%s",
3794
                            trans == thd_ndb->stmt ?
3795 3796 3797
                            "stmt" : "all"));
  DBUG_ASSERT(ndb && trans);

3798
  if (execute_commit(thd,trans) != 0)
3799 3800
  {
    const NdbError err= trans->getNdbError();
3801
    const NdbOperation *error_op= trans->getNdbErrorOperation();
3802
    ERR_PRINT(err);
3803
    res= ndb_to_mysql_error(&err);
3804
    if (res != -1)
3805
      ndbcluster_print_error(res, error_op);
3806
  }
3807
  ndb->closeTransaction(trans);
3808

3809
  if (all)
3810 3811 3812
    thd_ndb->all= NULL;
  else
    thd_ndb->stmt= NULL;
3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826

  /* Clear commit_count for tables changed by transaction */
  NDB_SHARE* share;
  List_iterator_fast<NDB_SHARE> it(thd_ndb->changed_tables);
  while ((share= it++))
  {
    pthread_mutex_lock(&share->mutex);
    DBUG_PRINT("info", ("Invalidate commit_count for %s, share->commit_count: %d ", share->table_name, share->commit_count));
    share->commit_count= 0;
    share->commit_count_lock++;
    pthread_mutex_unlock(&share->mutex);
  }
  thd_ndb->changed_tables.empty();

3827 3828 3829 3830 3831 3832 3833 3834
  DBUG_RETURN(res);
}


/*
  Rollback a transaction started in NDB
 */

3835
int ndbcluster_rollback(THD *thd, bool all)
3836 3837
{
  int res= 0;
3838 3839 3840
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
  Ndb *ndb= thd_ndb->ndb;
  NdbTransaction *trans= all ? thd_ndb->all : thd_ndb->stmt;
3841 3842 3843

  DBUG_ENTER("ndbcluster_rollback");
  DBUG_PRINT("transaction",("%s",
3844
                            trans == thd_ndb->stmt ? 
3845 3846 3847
                            "stmt" : "all"));
  DBUG_ASSERT(ndb && trans);

3848
  if (trans->execute(NdbTransaction::Rollback) != 0)
3849 3850
  {
    const NdbError err= trans->getNdbError();
3851
    const NdbOperation *error_op= trans->getNdbErrorOperation();
3852 3853
    ERR_PRINT(err);     
    res= ndb_to_mysql_error(&err);
3854 3855
    if (res != -1) 
      ndbcluster_print_error(res, error_op);
3856 3857
  }
  ndb->closeTransaction(trans);
3858

3859
  if (all)
3860 3861 3862 3863
    thd_ndb->all= NULL;
  else
    thd_ndb->stmt= NULL;

3864 3865 3866
  /* Clear list of tables changed by transaction */
  thd_ndb->changed_tables.empty();

3867
  DBUG_RETURN(res);
3868 3869 3870 3871
}


/*
pekka@mysql.com's avatar
pekka@mysql.com committed
3872 3873 3874
  Define NDB column based on Field.
  Returns 0 or mysql error code.
  Not member of ha_ndbcluster because NDBCOL cannot be declared.
pekka@mysql.com's avatar
pekka@mysql.com committed
3875 3876 3877

  MySQL text types with character set "binary" are mapped to true
  NDB binary types without a character set.  This may change.
3878 3879
 */

pekka@mysql.com's avatar
pekka@mysql.com committed
3880 3881 3882
static int create_ndb_column(NDBCOL &col,
                             Field *field,
                             HA_CREATE_INFO *info)
3883
{
pekka@mysql.com's avatar
pekka@mysql.com committed
3884
  // Set name
msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
3885
  col.setName(field->field_name);
pekka@mysql.com's avatar
pekka@mysql.com committed
3886 3887
  // Get char set
  CHARSET_INFO *cs= field->charset();
pekka@mysql.com's avatar
pekka@mysql.com committed
3888 3889 3890 3891
  // Set type and sizes
  const enum enum_field_types mysql_type= field->real_type();
  switch (mysql_type) {
  // Numeric types
3892
  case MYSQL_TYPE_TINY:        
pekka@mysql.com's avatar
pekka@mysql.com committed
3893 3894 3895 3896 3897 3898
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Tinyunsigned);
    else
      col.setType(NDBCOL::Tinyint);
    col.setLength(1);
    break;
3899
  case MYSQL_TYPE_SHORT:
pekka@mysql.com's avatar
pekka@mysql.com committed
3900 3901 3902 3903 3904 3905
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Smallunsigned);
    else
      col.setType(NDBCOL::Smallint);
    col.setLength(1);
    break;
3906
  case MYSQL_TYPE_LONG:
pekka@mysql.com's avatar
pekka@mysql.com committed
3907 3908 3909 3910 3911 3912
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Unsigned);
    else
      col.setType(NDBCOL::Int);
    col.setLength(1);
    break;
3913
  case MYSQL_TYPE_INT24:       
pekka@mysql.com's avatar
pekka@mysql.com committed
3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Mediumunsigned);
    else
      col.setType(NDBCOL::Mediumint);
    col.setLength(1);
    break;
  case MYSQL_TYPE_LONGLONG:
    if (field->flags & UNSIGNED_FLAG)
      col.setType(NDBCOL::Bigunsigned);
    else
      col.setType(NDBCOL::Bigint);
    col.setLength(1);
3926 3927
    break;
  case MYSQL_TYPE_FLOAT:
pekka@mysql.com's avatar
pekka@mysql.com committed
3928 3929 3930
    col.setType(NDBCOL::Float);
    col.setLength(1);
    break;
3931
  case MYSQL_TYPE_DOUBLE:
pekka@mysql.com's avatar
pekka@mysql.com committed
3932 3933 3934
    col.setType(NDBCOL::Double);
    col.setLength(1);
    break;
3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954
  case MYSQL_TYPE_DECIMAL:    
    {
      Field_decimal *f= (Field_decimal*)field;
      uint precision= f->pack_length();
      uint scale= f->decimals();
      if (field->flags & UNSIGNED_FLAG)
      {
        col.setType(NDBCOL::Olddecimalunsigned);
        precision-= (scale > 0);
      }
      else
      {
        col.setType(NDBCOL::Olddecimal);
        precision-= 1 + (scale > 0);
      }
      col.setPrecision(precision);
      col.setScale(scale);
      col.setLength(1);
    }
    break;
3955 3956 3957
  case MYSQL_TYPE_NEWDECIMAL:    
    {
      Field_new_decimal *f= (Field_new_decimal*)field;
3958
      uint precision= f->precision;
3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972
      uint scale= f->decimals();
      if (field->flags & UNSIGNED_FLAG)
      {
        col.setType(NDBCOL::Decimalunsigned);
      }
      else
      {
        col.setType(NDBCOL::Decimal);
      }
      col.setPrecision(precision);
      col.setScale(scale);
      col.setLength(1);
    }
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
3973 3974 3975 3976 3977
  // Date types
  case MYSQL_TYPE_DATETIME:    
    col.setType(NDBCOL::Datetime);
    col.setLength(1);
    break;
3978 3979 3980 3981
  case MYSQL_TYPE_DATE: // ?
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
3982
  case MYSQL_TYPE_NEWDATE:
3983 3984 3985
    col.setType(NDBCOL::Date);
    col.setLength(1);
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
3986
  case MYSQL_TYPE_TIME:        
3987 3988 3989
    col.setType(NDBCOL::Time);
    col.setLength(1);
    break;
3990 3991 3992 3993 3994 3995 3996
  case MYSQL_TYPE_YEAR:
    col.setType(NDBCOL::Year);
    col.setLength(1);
    break;
  case MYSQL_TYPE_TIMESTAMP:
    col.setType(NDBCOL::Timestamp);
    col.setLength(1);
pekka@mysql.com's avatar
pekka@mysql.com committed
3997 3998 3999
    break;
  // Char types
  case MYSQL_TYPE_STRING:      
4000
    if (field->pack_length() == 0)
4001 4002 4003 4004
    {
      col.setType(NDBCOL::Bit);
      col.setLength(1);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4005
    else if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
4006
    {
pekka@mysql.com's avatar
pekka@mysql.com committed
4007
      col.setType(NDBCOL::Binary);
4008
      col.setLength(field->pack_length());
pekka@mysql.com's avatar
pekka@mysql.com committed
4009
    }
4010
    else
4011 4012 4013
    {
      col.setType(NDBCOL::Char);
      col.setCharset(cs);
4014
      col.setLength(field->pack_length());
4015
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4016
    break;
pekka@mysql.com's avatar
pekka@mysql.com committed
4017 4018 4019 4020 4021 4022
  case MYSQL_TYPE_VAR_STRING: // ?
  case MYSQL_TYPE_VARCHAR:
    {
      Field_varstring* f= (Field_varstring*)field;
      if (f->length_bytes == 1)
      {
pekka@mysql.com's avatar
pekka@mysql.com committed
4023
        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4024 4025 4026 4027 4028 4029 4030 4031
          col.setType(NDBCOL::Varbinary);
        else {
          col.setType(NDBCOL::Varchar);
          col.setCharset(cs);
        }
      }
      else if (f->length_bytes == 2)
      {
pekka@mysql.com's avatar
pekka@mysql.com committed
4032
        if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043
          col.setType(NDBCOL::Longvarbinary);
        else {
          col.setType(NDBCOL::Longvarchar);
          col.setCharset(cs);
        }
      }
      else
      {
        return HA_ERR_UNSUPPORTED;
      }
      col.setLength(field->field_length);
pekka@mysql.com's avatar
pekka@mysql.com committed
4044
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4045 4046 4047 4048
    break;
  // Blob types (all come in as MYSQL_TYPE_BLOB)
  mysql_type_tiny_blob:
  case MYSQL_TYPE_TINY_BLOB:
pekka@mysql.com's avatar
pekka@mysql.com committed
4049
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4050
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4051
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4052
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4053 4054
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4055 4056 4057 4058 4059
    col.setInlineSize(256);
    // No parts
    col.setPartSize(0);
    col.setStripeSize(0);
    break;
4060
  //mysql_type_blob:
4061
  case MYSQL_TYPE_GEOMETRY:
pekka@mysql.com's avatar
pekka@mysql.com committed
4062
  case MYSQL_TYPE_BLOB:    
pekka@mysql.com's avatar
pekka@mysql.com committed
4063
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4064
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4065
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4066
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4067 4068
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084
    // Use "<=" even if "<" is the exact condition
    if (field->max_length() <= (1 << 8))
      goto mysql_type_tiny_blob;
    else if (field->max_length() <= (1 << 16))
    {
      col.setInlineSize(256);
      col.setPartSize(2000);
      col.setStripeSize(16);
    }
    else if (field->max_length() <= (1 << 24))
      goto mysql_type_medium_blob;
    else
      goto mysql_type_long_blob;
    break;
  mysql_type_medium_blob:
  case MYSQL_TYPE_MEDIUM_BLOB:   
pekka@mysql.com's avatar
pekka@mysql.com committed
4085
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4086
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4087
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4088
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4089 4090
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4091 4092 4093 4094 4095 4096
    col.setInlineSize(256);
    col.setPartSize(4000);
    col.setStripeSize(8);
    break;
  mysql_type_long_blob:
  case MYSQL_TYPE_LONG_BLOB:  
pekka@mysql.com's avatar
pekka@mysql.com committed
4097
    if ((field->flags & BINARY_FLAG) && cs == &my_charset_bin)
pekka@mysql.com's avatar
pekka@mysql.com committed
4098
      col.setType(NDBCOL::Blob);
pekka@mysql.com's avatar
pekka@mysql.com committed
4099
    else {
pekka@mysql.com's avatar
pekka@mysql.com committed
4100
      col.setType(NDBCOL::Text);
pekka@mysql.com's avatar
pekka@mysql.com committed
4101 4102
      col.setCharset(cs);
    }
pekka@mysql.com's avatar
pekka@mysql.com committed
4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115
    col.setInlineSize(256);
    col.setPartSize(8000);
    col.setStripeSize(4);
    break;
  // Other types
  case MYSQL_TYPE_ENUM:
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
  case MYSQL_TYPE_SET:         
    col.setType(NDBCOL::Char);
    col.setLength(field->pack_length());
    break;
4116 4117
  case MYSQL_TYPE_BIT:
  {
4118
    int no_of_bits= field->field_length;
4119 4120 4121 4122 4123 4124 4125
    col.setType(NDBCOL::Bit);
    if (!no_of_bits)
      col.setLength(1);
      else
        col.setLength(no_of_bits);
    break;
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
4126 4127 4128 4129 4130
  case MYSQL_TYPE_NULL:        
    goto mysql_type_unsupported;
  mysql_type_unsupported:
  default:
    return HA_ERR_UNSUPPORTED;
4131
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
4132 4133 4134 4135 4136 4137
  // Set nullable and pk
  col.setNullable(field->maybe_null());
  col.setPrimaryKey(field->flags & PRI_KEY_FLAG);
  // Set autoincrement
  if (field->flags & AUTO_INCREMENT_FLAG) 
  {
4138
    char buff[22];
pekka@mysql.com's avatar
pekka@mysql.com committed
4139 4140
    col.setAutoIncrement(TRUE);
    ulonglong value= info->auto_increment_value ?
4141
      info->auto_increment_value : (ulonglong) 1;
4142
    DBUG_PRINT("info", ("Autoincrement key, initial: %s", llstr(value, buff)));
pekka@mysql.com's avatar
pekka@mysql.com committed
4143
    col.setAutoIncrementInitialValue(value);
4144
  }
pekka@mysql.com's avatar
pekka@mysql.com committed
4145
  else
4146
    col.setAutoIncrement(FALSE);
pekka@mysql.com's avatar
pekka@mysql.com committed
4147
  return 0;
4148 4149 4150 4151 4152 4153
}

/*
  Create a table in NDB Cluster
 */

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4154 4155
static void ndb_set_fragmentation(NDBTAB &tab, TABLE *form, uint pk_length)
{
4156 4157 4158 4159 4160
  ha_rows max_rows= form->s->max_rows;
  ha_rows min_rows= form->s->min_rows;
  if (max_rows < min_rows)
    max_rows= min_rows;
  if (max_rows == (ha_rows)0) /* default setting, don't set fragmentation */
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177
    return;
  /**
   * get the number of fragments right
   */
  uint no_fragments;
  {
#if MYSQL_VERSION_ID >= 50000
    uint acc_row_size= 25 + /*safety margin*/ 2;
#else
    uint acc_row_size= pk_length*4;
    /* add acc overhead */
    if (pk_length <= 8)  /* main page will set the limit */
      acc_row_size+= 25 + /*safety margin*/ 2;
    else                /* overflow page will set the limit */
      acc_row_size+= 4 + /*safety margin*/ 4;
#endif
    ulonglong acc_fragment_size= 512*1024*1024;
4178 4179 4180 4181 4182
    /*
     * if not --with-big-tables then max_rows is ulong
     * the warning in this case is misleading though
     */
    ulonglong big_max_rows = (ulonglong)max_rows;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4183
#if MYSQL_VERSION_ID >= 50100
4184
    no_fragments= (big_max_rows*acc_row_size)/acc_fragment_size+1;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4185
#else
4186
    no_fragments= ((big_max_rows*acc_row_size)/acc_fragment_size+1
4187
                   +1/*correct rounding*/)/2;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4188 4189 4190 4191 4192 4193 4194 4195 4196
#endif
  }
  {
    uint no_nodes= g_ndb_cluster_connection->no_db_nodes();
    NDBTAB::FragmentType ftype;
    if (no_fragments > 2*no_nodes)
    {
      ftype= NDBTAB::FragAllLarge;
      if (no_fragments > 4*no_nodes)
4197 4198
        push_warning(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
                     "Ndb might have problems storing the max amount of rows specified");
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4199 4200 4201 4202 4203 4204 4205
    }
    else if (no_fragments > no_nodes)
      ftype= NDBTAB::FragAllMedium;
    else
      ftype= NDBTAB::FragAllSmall;
    tab.setFragmentType(ftype);
  }
4206 4207
  tab.setMaxRows(max_rows);
  tab.setMinRows(min_rows);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4208 4209
}

4210
int ha_ndbcluster::create(const char *name, 
4211 4212
                          TABLE *form, 
                          HA_CREATE_INFO *info)
4213 4214 4215
{
  NDBTAB tab;
  NDBCOL col;
joreland@mysql.com's avatar
joreland@mysql.com committed
4216
  uint pack_length, length, i, pk_length= 0;
4217 4218
  const void *data, *pack_data;
  char name2[FN_HEADLEN];
4219
  bool create_from_engine= (info->table_options & HA_OPTION_CREATE_FROM_ENGINE);
4220

pekka@mysql.com's avatar
pekka@mysql.com committed
4221
  DBUG_ENTER("ha_ndbcluster::create");
4222 4223 4224
  DBUG_PRINT("enter", ("name: %s", name));
  fn_format(name2, name, "", "",2);       // Remove the .frm extension
  set_dbname(name2);
4225 4226
  set_tabname(name2);    

4227 4228 4229 4230 4231 4232
  if (current_thd->lex->sql_command == SQLCOM_TRUNCATE)
  {
    DBUG_PRINT("info", ("Dropping and re-creating table for TRUNCATE"));
    if ((my_errno= delete_table(name)))
      DBUG_RETURN(my_errno);
  }
4233 4234 4235 4236 4237 4238 4239 4240 4241 4242
  if (create_from_engine)
  {
    /*
      Table alreay exists in NDB and frm file has been created by 
      caller.
      Do Ndb specific stuff, such as create a .ndb file
    */
    my_errno= write_ndb_file();
    DBUG_RETURN(my_errno);
  }
4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258

  DBUG_PRINT("table", ("name: %s", m_tabname));  
  tab.setName(m_tabname);
  tab.setLogging(!(info->options & HA_LEX_CREATE_TMP_TABLE));    
   
  // Save frm data for this table
  if (readfrm(name, &data, &length))
    DBUG_RETURN(1);
  if (packfrm(data, length, &pack_data, &pack_length))
    DBUG_RETURN(2);
  
  DBUG_PRINT("info", ("setFrm data=%x, len=%d", pack_data, pack_length));
  tab.setFrm(pack_data, pack_length);      
  my_free((char*)data, MYF(0));
  my_free((char*)pack_data, MYF(0));
  
4259
  for (i= 0; i < form->s->fields; i++) 
4260 4261 4262 4263
  {
    Field *field= form->field[i];
    DBUG_PRINT("info", ("name: %s, type: %u, pack_length: %d", 
                        field->field_name, field->real_type(),
4264
                        field->pack_length()));
4265
    if ((my_errno= create_ndb_column(col, field, info)))
pekka@mysql.com's avatar
pekka@mysql.com committed
4266
      DBUG_RETURN(my_errno);
4267
    tab.addColumn(col);
4268
    if (col.getPrimaryKey())
joreland@mysql.com's avatar
joreland@mysql.com committed
4269
      pk_length += (field->pack_length() + 3) / 4;
4270 4271 4272
  }
  
  // No primary key, create shadow key as 64 bit, auto increment  
4273
  if (form->s->primary_key == MAX_KEY) 
4274 4275 4276 4277 4278
  {
    DBUG_PRINT("info", ("Generating shadow key"));
    col.setName("$PK");
    col.setType(NdbDictionary::Column::Bigunsigned);
    col.setLength(1);
4279
    col.setNullable(FALSE);
4280 4281 4282
    col.setPrimaryKey(TRUE);
    col.setAutoIncrement(TRUE);
    tab.addColumn(col);
joreland@mysql.com's avatar
joreland@mysql.com committed
4283 4284 4285 4286
    pk_length += 2;
  }
  
  // Make sure that blob tables don't have to big part size
4287
  for (i= 0; i < form->s->fields; i++) 
joreland@mysql.com's avatar
joreland@mysql.com committed
4288 4289 4290 4291 4292 4293 4294
  {
    /**
     * The extra +7 concists
     * 2 - words from pk in blob table
     * 5 - from extra words added by tup/dict??
     */
    switch (form->field[i]->real_type()) {
4295
    case MYSQL_TYPE_GEOMETRY:
joreland@mysql.com's avatar
joreland@mysql.com committed
4296 4297 4298 4299
    case MYSQL_TYPE_BLOB:    
    case MYSQL_TYPE_MEDIUM_BLOB:   
    case MYSQL_TYPE_LONG_BLOB: 
    {
4300 4301
      NdbDictionary::Column * col= tab.getColumn(i);
      int size= pk_length + (col->getPartSize()+3)/4 + 7;
4302
      if (size > NDB_MAX_TUPLE_SIZE_IN_WORDS && 
4303
         (pk_length+7) < NDB_MAX_TUPLE_SIZE_IN_WORDS)
joreland@mysql.com's avatar
joreland@mysql.com committed
4304
      {
4305 4306
        size= NDB_MAX_TUPLE_SIZE_IN_WORDS - pk_length - 7;
        col->setPartSize(4*size);
joreland@mysql.com's avatar
joreland@mysql.com committed
4307 4308 4309 4310 4311 4312 4313 4314 4315 4316
      }
      /**
       * If size > NDB_MAX and pk_length+7 >= NDB_MAX
       *   then the table can't be created anyway, so skip
       *   changing part size, and have error later
       */ 
    }
    default:
      break;
    }
4317
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4318 4319 4320

  ndb_set_fragmentation(tab, form, pk_length);

4321
  if ((my_errno= check_ndb_connection()))
4322 4323 4324
    DBUG_RETURN(my_errno);
  
  // Create the table in NDB     
4325 4326
  Ndb *ndb= get_ndb();
  NDBDICT *dict= ndb->getDictionary();
4327
  if (dict->createTable(tab) != 0) 
4328 4329 4330 4331 4332 4333 4334 4335
  {
    const NdbError err= dict->getNdbError();
    ERR_PRINT(err);
    my_errno= ndb_to_mysql_error(&err);
    DBUG_RETURN(my_errno);
  }
  DBUG_PRINT("info", ("Table %s/%s created successfully", 
                      m_dbname, m_tabname));
4336

4337
  // Create secondary indexes
4338
  my_errno= build_index_list(ndb, form, ILBP_CREATE);
4339

4340 4341 4342
  if (!my_errno)
    my_errno= write_ndb_file();

4343 4344 4345 4346
  DBUG_RETURN(my_errno);
}


4347
int ha_ndbcluster::create_ordered_index(const char *name, 
4348
                                        KEY *key_info)
4349
{
4350
  DBUG_ENTER("ha_ndbcluster::create_ordered_index");
4351
  DBUG_RETURN(create_index(name, key_info, FALSE));
4352 4353 4354
}

int ha_ndbcluster::create_unique_index(const char *name, 
4355
                                       KEY *key_info)
4356 4357
{

4358
  DBUG_ENTER("ha_ndbcluster::create_unique_index");
4359
  DBUG_RETURN(create_index(name, key_info, TRUE));
4360 4361 4362
}


4363 4364 4365 4366 4367
/*
  Create an index in NDB Cluster
 */

int ha_ndbcluster::create_index(const char *name, 
4368 4369
                                KEY *key_info,
                                bool unique)
4370
{
4371 4372
  Ndb *ndb= get_ndb();
  NdbDictionary::Dictionary *dict= ndb->getDictionary();
4373 4374 4375
  KEY_PART_INFO *key_part= key_info->key_part;
  KEY_PART_INFO *end= key_part + key_info->key_parts;
  
4376
  DBUG_ENTER("ha_ndbcluster::create_index");
4377
  DBUG_PRINT("enter", ("name: %s ", name));
4378

4379
  NdbDictionary::Index ndb_index(name);
4380
  if (unique)
4381 4382 4383 4384 4385
    ndb_index.setType(NdbDictionary::Index::UniqueHashIndex);
  else 
  {
    ndb_index.setType(NdbDictionary::Index::OrderedIndex);
    // TODO Only temporary ordered indexes supported
4386
    ndb_index.setLogging(FALSE); 
4387 4388 4389 4390 4391 4392 4393
  }
  ndb_index.setTable(m_tabname);

  for (; key_part != end; key_part++) 
  {
    Field *field= key_part->field;
    DBUG_PRINT("info", ("attr: %s", field->field_name));
msvensson@neptunus.(none)'s avatar
msvensson@neptunus.(none) committed
4394
    ndb_index.addColumnName(field->field_name);
4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410
  }
  
  if (dict->createIndex(ndb_index))
    ERR_RETURN(dict->getNdbError());

  // Success
  DBUG_PRINT("info", ("Created index %s", name));
  DBUG_RETURN(0);  
}

/*
  Rename a table in NDB Cluster
*/

int ha_ndbcluster::rename_table(const char *from, const char *to)
{
4411
  NDBDICT *dict;
4412
  char new_tabname[FN_HEADLEN];
4413
  char new_dbname[FN_HEADLEN];
4414 4415
  const NDBTAB *orig_tab;
  int result;
4416 4417
  bool recreate_indexes= FALSE;
  NDBDICT::List index_list;
4418 4419

  DBUG_ENTER("ha_ndbcluster::rename_table");
4420
  DBUG_PRINT("info", ("Renaming %s to %s", from, to));
4421
  set_dbname(from);
4422
  set_dbname(to, new_dbname);
4423 4424 4425
  set_tabname(from);
  set_tabname(to, new_tabname);

4426 4427 4428
  if (check_ndb_connection())
    DBUG_RETURN(my_errno= HA_ERR_NO_CONNECTION);

mskold@mysql.com's avatar
mskold@mysql.com committed
4429 4430
  Ndb *ndb= get_ndb();
  dict= ndb->getDictionary();
4431 4432
  if (!(orig_tab= dict->getTable(m_tabname)))
    ERR_RETURN(dict->getNdbError());
4433 4434 4435 4436 4437 4438 4439
  // Check if thread has stale local cache
  if (orig_tab->getObjectStatus() == NdbDictionary::Object::Invalid)
  {
    dict->removeCachedTable(m_tabname);
    if (!(orig_tab= dict->getTable(m_tabname)))
      ERR_RETURN(dict->getNdbError());
  }
4440 4441 4442 4443 4444 4445
  if (my_strcasecmp(system_charset_info, new_dbname, m_dbname))
  {
    dict->listIndexes(index_list, m_tabname);
    recreate_indexes= TRUE;
  }

4446 4447 4448
  m_table= (void *)orig_tab;
  // Change current database to that of target table
  set_dbname(to);
mskold@mysql.com's avatar
mskold@mysql.com committed
4449
  ndb->setDatabaseName(m_dbname);
4450
  if (!(result= alter_table_name(new_tabname)))
4451
  {
4452 4453
    // Rename .ndb file
    result= handler::rename_table(from, to);
4454
  }
4455

4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483
  // If we are moving tables between databases, we need to recreate
  // indexes
  if (recreate_indexes)
  {
    const NDBTAB *new_tab;
    set_tabname(to);
    if (!(new_tab= dict->getTable(m_tabname)))
      ERR_RETURN(dict->getNdbError());

    for (unsigned i = 0; i < index_list.count; i++) {
        NDBDICT::List::Element& index_el = index_list.elements[i];
	set_dbname(from);
	ndb->setDatabaseName(m_dbname);
	const NDBINDEX * index= dict->getIndex(index_el.name,  *new_tab);
	set_dbname(to);
	ndb->setDatabaseName(m_dbname);
	DBUG_PRINT("info", ("Creating index %s/%s", 
			    m_dbname, index->getName()));
	dict->createIndex(*index);
        DBUG_PRINT("info", ("Dropping index %s/%s", 
			    m_dbname, index->getName()));
	
	set_dbname(from);
	ndb->setDatabaseName(m_dbname);
	dict->dropIndex(*index);
    }
  }

4484 4485 4486 4487 4488 4489 4490 4491
  DBUG_RETURN(result);
}


/*
  Rename a table in NDB Cluster using alter table
 */

4492
int ha_ndbcluster::alter_table_name(const char *to)
4493
{
4494 4495
  Ndb *ndb= get_ndb();
  NDBDICT *dict= ndb->getDictionary();
4496
  const NDBTAB *orig_tab= (const NDBTAB *) m_table;
4497 4498
  DBUG_ENTER("alter_table_name_table");

4499
  NdbDictionary::Table new_tab= *orig_tab;
4500 4501
  new_tab.setName(to);
  if (dict->alterTable(new_tab) != 0)
4502 4503 4504
    ERR_RETURN(dict->getNdbError());

  m_table= NULL;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4505
  m_table_info= NULL;
4506 4507 4508 4509 4510 4511
                                                                             
  DBUG_RETURN(0);
}


/*
4512 4513
  Delete table from NDB Cluster

4514 4515 4516 4517
 */

int ha_ndbcluster::delete_table(const char *name)
{
4518
  DBUG_ENTER("ha_ndbcluster::delete_table");
4519 4520 4521
  DBUG_PRINT("enter", ("name: %s", name));
  set_dbname(name);
  set_tabname(name);
4522

4523 4524
  if (check_ndb_connection())
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
4525 4526

  /* Call ancestor function to delete .ndb file */
4527
  handler::delete_table(name);
4528 4529
  
  /* Drop the table from NDB */
4530 4531 4532 4533 4534
  DBUG_RETURN(drop_table());
}


/*
4535
  Drop table in NDB Cluster
4536 4537 4538 4539
 */

int ha_ndbcluster::drop_table()
{
4540
  THD *thd= current_thd;
4541 4542
  Ndb *ndb= get_ndb();
  NdbDictionary::Dictionary *dict= ndb->getDictionary();
4543

4544 4545 4546 4547
  DBUG_ENTER("drop_table");
  DBUG_PRINT("enter", ("Deleting %s", m_tabname));
  
  release_metadata();
4548
  while (dict->dropTable(m_tabname)) 
4549 4550
  {
    const NdbError err= dict->getNdbError();
4551 4552 4553 4554 4555 4556 4557 4558 4559
    switch (err.status)
    {
      case NdbError::TemporaryError:
        if (!thd->killed)
          continue; // retry indefinitly
        break;
      default:
        break;
    }
4560
    ERR_RETURN(dict->getNdbError());
4561 4562
  }

4563 4564 4565 4566
  DBUG_RETURN(0);
}


4567
ulonglong ha_ndbcluster::get_auto_increment()
4568
{  
4569 4570
  int cache_size;
  Uint64 auto_value;
4571 4572
  DBUG_ENTER("get_auto_increment");
  DBUG_PRINT("enter", ("m_tabname: %s", m_tabname));
4573
  Ndb *ndb= get_ndb();
4574
   
4575
  if (m_rows_inserted > m_rows_to_insert)
4576
  {
4577 4578
    /* We guessed too low */
    m_rows_to_insert+= m_autoincrement_prefetch;
4579
  }
serg@serg.mylan's avatar
serg@serg.mylan committed
4580
  cache_size= 
4581 4582 4583 4584
    (int) ((m_rows_to_insert - m_rows_inserted < m_autoincrement_prefetch) ?
           m_rows_to_insert - m_rows_inserted :
           ((m_rows_to_insert > m_autoincrement_prefetch) ?
            m_rows_to_insert : m_autoincrement_prefetch));
4585
  int ret;
4586 4587
  uint retries= NDB_AUTO_INCREMENT_RETRIES;
  do {
4588 4589 4590 4591 4592
    ret=
      m_skip_auto_increment ? 
      ndb->readAutoIncrementValue((const NDBTAB *) m_table, auto_value) :
      ndb->getAutoIncrementValue((const NDBTAB *) m_table, auto_value, cache_size);
  } while (ret == -1 && 
4593 4594
           --retries &&
           ndb->getNdbError().status == NdbError::TemporaryError);
4595
  if (ret == -1)
4596 4597 4598 4599 4600 4601
  {
    const NdbError err= ndb->getNdbError();
    sql_print_error("Error %lu in ::get_auto_increment(): %s",
                    (ulong) err.code, err.message);
    DBUG_RETURN(~(ulonglong) 0);
  }
4602
  DBUG_RETURN((longlong)auto_value);
4603 4604 4605 4606 4607 4608 4609 4610
}


/*
  Constructor for the NDB Cluster table handler 
 */

ha_ndbcluster::ha_ndbcluster(TABLE *table_arg):
4611
  handler(&ndbcluster_hton, table_arg),
4612 4613 4614
  m_active_trans(NULL),
  m_active_cursor(NULL),
  m_table(NULL),
4615
  m_table_version(-1),
4616
  m_table_info(NULL),
4617
  m_table_flags(HA_REC_NOT_IN_SEQ |
4618 4619 4620 4621
                HA_NULL_IN_KEY |
                HA_AUTO_PART_KEY |
                HA_NO_PREFIX_CHAR_KEYS |
                HA_NEED_READ_RANGE_BUFFER |
4622
                HA_CAN_GEOMETRY |
4623 4624
                HA_CAN_BIT_FIELD |
                HA_PARTIAL_COLUMN_READ),
4625
  m_share(0),
4626
  m_use_write(FALSE),
4627
  m_ignore_dup_key(FALSE),
4628
  m_has_unique_index(FALSE),
4629 4630
  m_primary_key_update(FALSE),
  m_retrieve_all_fields(FALSE),
4631
  m_retrieve_primary_key(FALSE),
4632 4633 4634
  m_rows_to_insert((ha_rows) 1),
  m_rows_inserted((ha_rows) 0),
  m_bulk_insert_rows((ha_rows) 1024),
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4635
  m_rows_changed((ha_rows) 0),
4636 4637 4638 4639
  m_bulk_insert_not_flushed(FALSE),
  m_ops_pending(0),
  m_skip_auto_increment(TRUE),
  m_blobs_pending(0),
4640
  m_blobs_offset(0),
4641 4642
  m_blobs_buffer(0),
  m_blobs_buffer_size(0),
4643 4644 4645
  m_dupkey((uint) -1),
  m_ha_not_exact_count(FALSE),
  m_force_send(TRUE),
4646
  m_autoincrement_prefetch((ha_rows) 32),
4647
  m_transaction_on(TRUE),
mskold@mysql.com's avatar
mskold@mysql.com committed
4648 4649
  m_cond_stack(NULL),
  m_multi_cursor(NULL)
4650
{
4651
  int i;
4652
 
4653 4654 4655 4656 4657
  DBUG_ENTER("ha_ndbcluster");

  m_tabname[0]= '\0';
  m_dbname[0]= '\0';

4658
  records= ~(ha_rows)0; // uninitialized
4659 4660
  block_size= 1024;

4661 4662
  for (i= 0; i < MAX_KEY; i++)
  {
4663 4664 4665 4666
    m_index[i].type= UNDEFINED_INDEX;
    m_index[i].unique_index= NULL;
    m_index[i].index= NULL;
    m_index[i].unique_index_attrid_map= NULL;
4667 4668
  }

4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680
  DBUG_VOID_RETURN;
}


/*
  Destructor for NDB Cluster table handler
 */

ha_ndbcluster::~ha_ndbcluster() 
{
  DBUG_ENTER("~ha_ndbcluster");

4681 4682
  if (m_share)
    free_share(m_share);
4683
  release_metadata();
4684 4685
  my_free(m_blobs_buffer, MYF(MY_ALLOW_ZERO_PTR));
  m_blobs_buffer= 0;
4686 4687

  // Check for open cursor/transaction
4688 4689
  if (m_active_cursor) {
  }
4690
  DBUG_ASSERT(m_active_cursor == NULL);
4691 4692
  if (m_active_trans) {
  }
4693 4694
  DBUG_ASSERT(m_active_trans == NULL);

4695 4696 4697 4698
  // Discard the condition stack
  DBUG_PRINT("info", ("Clearing condition stack"));
  cond_clear();

4699 4700 4701 4702
  DBUG_VOID_RETURN;
}


mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4703

4704 4705 4706 4707 4708 4709 4710 4711
/*
  Open a table for further use
  - fetch metadata for this table from NDB
  - check that table exists
*/

int ha_ndbcluster::open(const char *name, int mode, uint test_if_locked)
{
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4712
  int res;
4713 4714 4715 4716 4717 4718 4719 4720
  KEY *key;
  DBUG_ENTER("open");
  DBUG_PRINT("enter", ("name: %s mode: %d test_if_locked: %d",
                       name, mode, test_if_locked));
  
  // Setup ref_length to make room for the whole 
  // primary key to be written in the ref variable
  
4721
  if (table->s->primary_key != MAX_KEY) 
4722
  {
4723
    key= table->key_info+table->s->primary_key;
4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734
    ref_length= key->key_length;
    DBUG_PRINT("info", (" ref_length: %d", ref_length));
  }
  // Init table lock structure 
  if (!(m_share=get_share(name)))
    DBUG_RETURN(1);
  thr_lock_data_init(&m_share->lock,&m_lock,(void*) 0);
  
  set_dbname(name);
  set_tabname(name);
  
4735 4736
  if (check_ndb_connection()) {
    free_share(m_share); m_share= 0;
4737
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
4738
  }
4739
  
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4740 4741 4742
  res= get_metadata(name);
  if (!res)
    info(HA_STATUS_VARIABLE | HA_STATUS_CONST);
4743

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
4744
  DBUG_RETURN(res);
4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755
}


/*
  Close the table
  - release resources setup by open()
 */

int ha_ndbcluster::close(void)
{
  DBUG_ENTER("close");  
4756
  free_share(m_share); m_share= 0;
4757 4758 4759 4760 4761
  release_metadata();
  DBUG_RETURN(0);
}


4762
Thd_ndb* ha_ndbcluster::seize_thd_ndb()
4763
{
4764 4765
  Thd_ndb *thd_ndb;
  DBUG_ENTER("seize_thd_ndb");
4766

4767
  thd_ndb= new Thd_ndb();
4768 4769 4770
  thd_ndb->ndb->getDictionary()->set_local_table_data_size(
    sizeof(Ndb_local_table_statistics)
    );
4771
  if (thd_ndb->ndb->init(max_transactions) != 0)
4772
  {
4773
    ERR_PRINT(thd_ndb->ndb->getNdbError());
4774 4775 4776 4777 4778 4779
    /*
      TODO 
      Alt.1 If init fails because to many allocated Ndb 
      wait on condition for a Ndb object to be released.
      Alt.2 Seize/release from pool, wait until next release 
    */
4780 4781
    delete thd_ndb;
    thd_ndb= NULL;
4782
  }
4783
  DBUG_RETURN(thd_ndb);
4784 4785 4786
}


4787
void ha_ndbcluster::release_thd_ndb(Thd_ndb* thd_ndb)
4788
{
4789 4790
  DBUG_ENTER("release_thd_ndb");
  delete thd_ndb;
4791 4792 4793 4794 4795
  DBUG_VOID_RETURN;
}


/*
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
4796
  If this thread already has a Thd_ndb object allocated
4797
  in current THD, reuse it. Otherwise
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
4798
  seize a Thd_ndb object, assign it to current THD and use it.
4799 4800 4801
 
*/

4802
Ndb* check_ndb_in_thd(THD* thd)
4803
{
4804
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
4805
  if (!thd_ndb)
4806
  {
magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
4807
    if (!(thd_ndb= ha_ndbcluster::seize_thd_ndb()))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4808
      return NULL;
4809
    set_thd_ndb(thd, thd_ndb);
4810
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
4811
  return thd_ndb->ndb;
4812 4813
}

magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
4814

4815

4816
int ha_ndbcluster::check_ndb_connection(THD* thd)
4817
{
4818
  Ndb *ndb;
4819 4820
  DBUG_ENTER("check_ndb_connection");
  
4821
  if (!(ndb= check_ndb_in_thd(thd)))
4822
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
4823
  ndb->setDatabaseName(m_dbname);
4824 4825 4826
  DBUG_RETURN(0);
}

magnus@neptunus.(none)'s avatar
magnus@neptunus.(none) committed
4827

4828
int ndbcluster_close_connection(THD *thd)
4829
{
4830
  Thd_ndb *thd_ndb= get_thd_ndb(thd);
4831
  DBUG_ENTER("ndbcluster_close_connection");
4832 4833
  if (thd_ndb)
  {
4834
    ha_ndbcluster::release_thd_ndb(thd_ndb);
4835
    set_thd_ndb(thd, NULL); // not strictly required but does not hurt either
4836
  }
4837
  DBUG_RETURN(0);
4838 4839 4840 4841 4842 4843 4844
}


/*
  Try to discover one table from NDB
 */

4845
int ndbcluster_discover(THD* thd, const char *db, const char *name,
4846
                        const void** frmblob, uint* frmlen)
4847 4848 4849 4850
{
  uint len;
  const void* data;
  const NDBTAB* tab;
4851
  Ndb* ndb;
4852
  DBUG_ENTER("ndbcluster_discover");
4853
  DBUG_PRINT("enter", ("db: %s, name: %s", db, name)); 
4854

4855 4856 4857
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);  
  ndb->setDatabaseName(db);
4858

4859
  NDBDICT* dict= ndb->getDictionary();
4860
  dict->set_local_table_data_size(sizeof(Ndb_local_table_statistics));
4861 4862 4863 4864 4865
  dict->invalidateTable(name);
  if (!(tab= dict->getTable(name)))
  {    
    const NdbError err= dict->getNdbError();
    if (err.code == 709)
4866
      DBUG_RETURN(-1);
4867
    ERR_RETURN(err);
4868 4869 4870 4871 4872 4873
  }
  DBUG_PRINT("info", ("Found table %s", tab->getName()));
  
  len= tab->getFrmLength();  
  if (len == 0 || tab->getFrmData() == NULL)
  {
4874 4875
    DBUG_PRINT("error", ("No frm data found."));
    DBUG_RETURN(1);
4876 4877 4878
  }
  
  if (unpackfrm(&data, &len, tab->getFrmData()))
4879 4880 4881 4882
  {
    DBUG_PRINT("error", ("Could not unpack table"));
    DBUG_RETURN(1);
  }
4883 4884 4885 4886 4887 4888 4889 4890

  *frmlen= len;
  *frmblob= data;
  
  DBUG_RETURN(0);
}

/*
4891
  Check if a table exists in NDB
4892

4893
 */
4894

4895
int ndbcluster_table_exists_in_engine(THD* thd, const char *db, const char *name)
4896 4897 4898
{
  const NDBTAB* tab;
  Ndb* ndb;
4899
  DBUG_ENTER("ndbcluster_table_exists_in_engine");
4900
  DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
4901 4902

  if (!(ndb= check_ndb_in_thd(thd)))
4903
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
4904 4905 4906
  ndb->setDatabaseName(db);

  NDBDICT* dict= ndb->getDictionary();
4907
  dict->set_local_table_data_size(sizeof(Ndb_local_table_statistics));
4908 4909
  dict->invalidateTable(name);
  if (!(tab= dict->getTable(name)))
4910
  {
4911 4912 4913 4914 4915
    const NdbError err= dict->getNdbError();
    if (err.code == 709)
      DBUG_RETURN(0);
    ERR_RETURN(err);
  }
4916

4917 4918 4919 4920
  DBUG_PRINT("info", ("Found table %s", tab->getName()));
  DBUG_RETURN(1);
}

4921 4922


4923
extern "C" byte* tables_get_key(const char *entry, uint *length,
4924
                                my_bool not_used __attribute__((unused)))
4925 4926 4927 4928 4929 4930
{
  *length= strlen(entry);
  return (byte*) entry;
}


4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944
/*
  Drop a database in NDB Cluster
 */

int ndbcluster_drop_database(const char *path)
{
  DBUG_ENTER("ndbcluster_drop_database");
  THD *thd= current_thd;
  char dbname[FN_HEADLEN];
  Ndb* ndb;
  NdbDictionary::Dictionary::List list;
  uint i;
  char *tabname;
  List<char> drop_list;
4945
  int ret= 0;
4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971
  ha_ndbcluster::set_dbname(path, (char *)&dbname);
  DBUG_PRINT("enter", ("db: %s", dbname));
  
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);
  
  // List tables in NDB
  NDBDICT *dict= ndb->getDictionary();
  if (dict->listObjects(list, 
                        NdbDictionary::Object::UserTable) != 0)
    ERR_RETURN(dict->getNdbError());
  for (i= 0 ; i < list.count ; i++)
  {
    NdbDictionary::Dictionary::List::Element& t= list.elements[i];
    DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name));     
    
    // Add only tables that belongs to db
    if (my_strcasecmp(system_charset_info, t.database, dbname))
      continue;
    DBUG_PRINT("info", ("%s must be dropped", t.name));     
    drop_list.push_back(thd->strdup(t.name));
  }
  // Drop any tables belonging to database
  ndb->setDatabaseName(dbname);
  List_iterator_fast<char> it(drop_list);
  while ((tabname=it++))
4972
  {
4973
    while (dict->dropTable(tabname))
4974 4975
    {
      const NdbError err= dict->getNdbError();
4976 4977 4978 4979 4980 4981 4982 4983 4984 4985
      switch (err.status)
      {
        case NdbError::TemporaryError:
          if (!thd->killed)
            continue; // retry indefinitly
          break;
        default:
          break;
      }
      if (err.code != 709) // 709: No such table existed
4986 4987
      {
        ERR_PRINT(err);
4988
        ret= ndb_to_mysql_error(&err);
4989
      }
4990
      break;
4991 4992 4993
    }
  }
  DBUG_RETURN(ret);      
4994 4995 4996
}


4997
int ndbcluster_find_files(THD *thd,const char *db,const char *path,
4998
                          const char *wild, bool dir, List<char> *files)
4999
{
5000 5001 5002
  DBUG_ENTER("ndbcluster_find_files");
  DBUG_PRINT("enter", ("db: %s", db));
  { // extra bracket to avoid gcc 2.95.3 warning
5003
  uint i;
5004
  Ndb* ndb;
5005
  char name[FN_REFLEN];
5006
  HASH ndb_tables, ok_tables;
5007
  NdbDictionary::Dictionary::List list;
5008 5009 5010 5011

  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(HA_ERR_NO_CONNECTION);

5012
  if (dir)
5013
    DBUG_RETURN(0); // Discover of databases not yet supported
5014

5015
  // List tables in NDB
5016
  NDBDICT *dict= ndb->getDictionary();
5017
  if (dict->listObjects(list, 
5018
                        NdbDictionary::Object::UserTable) != 0)
5019
    ERR_RETURN(dict->getNdbError());
5020

5021
  if (hash_init(&ndb_tables, system_charset_info,list.count,0,0,
5022
                (hash_get_key)tables_get_key,0,0))
5023 5024 5025 5026 5027 5028
  {
    DBUG_PRINT("error", ("Failed to init HASH ndb_tables"));
    DBUG_RETURN(-1);
  }

  if (hash_init(&ok_tables, system_charset_info,32,0,0,
5029
                (hash_get_key)tables_get_key,0,0))
5030 5031 5032 5033 5034 5035
  {
    DBUG_PRINT("error", ("Failed to init HASH ok_tables"));
    hash_free(&ndb_tables);
    DBUG_RETURN(-1);
  }  

5036 5037 5038
  for (i= 0 ; i < list.count ; i++)
  {
    NdbDictionary::Dictionary::List::Element& t= list.elements[i];
5039
    DBUG_PRINT("info", ("Found %s/%s in NDB", t.database, t.name));     
5040

5041 5042 5043
    // Add only tables that belongs to db
    if (my_strcasecmp(system_charset_info, t.database, db))
      continue;
5044

5045
    // Apply wildcard to list of tables in NDB
5046
    if (wild)
5047
    {
5048 5049
      if (lower_case_table_names)
      {
5050 5051
        if (wild_case_compare(files_charset_info, t.name, wild))
          continue;
5052 5053
      }
      else if (wild_compare(t.name,wild,0))
5054
        continue;
5055
    }
5056 5057
    DBUG_PRINT("info", ("Inserting %s into ndb_tables hash", t.name));     
    my_hash_insert(&ndb_tables, (byte*)thd->strdup(t.name));
5058 5059
  }

5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074
  char *file_name;
  List_iterator<char> it(*files);
  List<char> delete_list;
  while ((file_name=it++))
  {
    DBUG_PRINT("info", ("%s", file_name));     
    if (hash_search(&ndb_tables, file_name, strlen(file_name)))
    {
      DBUG_PRINT("info", ("%s existed in NDB _and_ on disk ", file_name));
      // File existed in NDB and as frm file, put in ok_tables list
      my_hash_insert(&ok_tables, (byte*)file_name);
      continue;
    }
    
    // File is not in NDB, check for .ndb file with this name
5075
    (void)strxnmov(name, FN_REFLEN, 
5076
                   mysql_data_home,"/",db,"/",file_name,ha_ndb_ext,NullS);
5077
    DBUG_PRINT("info", ("Check access for %s", name));
5078
    if (access(name, F_OK))
5079 5080 5081 5082
    {
      DBUG_PRINT("info", ("%s did not exist on disk", name));     
      // .ndb file did not exist on disk, another table type
      continue;
5083
    }
5084

5085 5086 5087
    DBUG_PRINT("info", ("%s existed on disk", name));     
    // The .ndb file exists on disk, but it's not in list of tables in ndb
    // Verify that handler agrees table is gone.
5088
    if (ndbcluster_table_exists_in_engine(thd, db, file_name) == 0)    
5089 5090 5091 5092 5093 5094 5095
    {
      DBUG_PRINT("info", ("NDB says %s does not exists", file_name));     
      it.remove();
      // Put in list of tables to remove from disk
      delete_list.push_back(thd->strdup(file_name));
    }
  }
5096

5097 5098 5099 5100
  // Check for new files to discover
  DBUG_PRINT("info", ("Checking for new files to discover"));       
  List<char> create_list;
  for (i= 0 ; i < ndb_tables.records ; i++)
5101
  {
5102 5103
    file_name= hash_element(&ndb_tables, i);
    if (!hash_search(&ok_tables, file_name, strlen(file_name)))
5104
    {
5105 5106 5107 5108 5109 5110
      DBUG_PRINT("info", ("%s must be discovered", file_name));       
      // File is in list of ndb tables and not in ok_tables
      // This table need to be created
      create_list.push_back(thd->strdup(file_name));
    }
  }
5111

5112 5113
  // Lock mutex before deleting and creating frm files
  pthread_mutex_lock(&LOCK_open);
5114

5115 5116 5117 5118 5119
  if (!global_read_lock)
  {
    // Delete old files
    List_iterator_fast<char> it3(delete_list);
    while ((file_name=it3++))
5120 5121
    {
      DBUG_PRINT("info", ("Remove table %s/%s", db, file_name));
5122 5123 5124 5125
      // Delete the table and all related files
      TABLE_LIST table_list;
      bzero((char*) &table_list,sizeof(table_list));
      table_list.db= (char*) db;
5126
      table_list.alias= table_list.table_name= (char*)file_name;
5127
      (void)mysql_rm_table_part2(thd, &table_list,
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5128 5129 5130 5131
                                                                 /* if_exists */ FALSE,
                                                                 /* drop_temporary */ FALSE,
                                                                 /* drop_view */ FALSE,
                                                                 /* dont_log_query*/ TRUE);
5132 5133
      /* Clear error message that is returned when table is deleted */
      thd->clear_error();
5134 5135 5136
    }
  }

5137 5138 5139 5140
  // Create new files
  List_iterator_fast<char> it2(create_list);
  while ((file_name=it2++))
  {  
5141
    DBUG_PRINT("info", ("Table %s need discovery", file_name));
5142
    if (ha_create_table_from_engine(thd, db, file_name) == 0)
5143
      files->push_back(thd->strdup(file_name)); 
5144 5145 5146 5147 5148
  }

  pthread_mutex_unlock(&LOCK_open);      
  
  hash_free(&ok_tables);
5149
  hash_free(&ndb_tables);
5150
  } // extra bracket to avoid gcc 2.95.3 warning
5151
  DBUG_RETURN(0);    
5152 5153 5154 5155 5156 5157 5158 5159
}


/*
  Initialise all gloal variables before creating 
  a NDB Cluster table handler
 */

5160 5161 5162 5163 5164 5165 5166
/* Call back after cluster connect */
static int connect_callback()
{
  update_status_variables(g_ndb_cluster_connection);
  return 0;
}

5167
bool ndbcluster_init()
5168
{
5169
  int res;
5170
  DBUG_ENTER("ndbcluster_init");
5171 5172 5173 5174

  if (have_ndbcluster != SHOW_OPTION_YES)
    goto ndbcluster_init_error;

5175
  // Set connectstring if specified
5176 5177
  if (opt_ndbcluster_connectstring != 0)
    DBUG_PRINT("connectstring", ("%s", opt_ndbcluster_connectstring));     
5178
  if ((g_ndb_cluster_connection=
5179
       new Ndb_cluster_connection(opt_ndbcluster_connectstring)) == 0)
5180
  {
5181
    DBUG_PRINT("error",("Ndb_cluster_connection(%s)",
5182
                        opt_ndbcluster_connectstring));
5183
    goto ndbcluster_init_error;
5184
  }
tomas@poseidon.ndb.mysql.com's avatar
ndb:  
tomas@poseidon.ndb.mysql.com committed
5185 5186 5187 5188 5189
  {
    char buf[128];
    my_snprintf(buf, sizeof(buf), "mysqld --server-id=%d", server_id);
    g_ndb_cluster_connection->set_name(buf);
  }
5190 5191 5192
  g_ndb_cluster_connection->set_optimized_node_selection
    (opt_ndb_optimized_node_selection);

5193
  // Create a Ndb object to open the connection  to NDB
5194 5195 5196 5197 5198
  if ( (g_ndb= new Ndb(g_ndb_cluster_connection, "sys")) == 0 )
  {
    DBUG_PRINT("error", ("failed to create global ndb object"));
    goto ndbcluster_init_error;
  }
5199
  g_ndb->getDictionary()->set_local_table_data_size(sizeof(Ndb_local_table_statistics));
5200 5201 5202
  if (g_ndb->init() != 0)
  {
    ERR_PRINT (g_ndb->getNdbError());
5203
    goto ndbcluster_init_error;
5204
  }
5205

5206
  if ((res= g_ndb_cluster_connection->connect(0,0,0)) == 0)
5207
  {
5208
    connect_callback();
5209
    DBUG_PRINT("info",("NDBCLUSTER storage engine at %s on port %d",
5210 5211
                       g_ndb_cluster_connection->get_connected_host(),
                       g_ndb_cluster_connection->get_connected_port()));
5212
    g_ndb_cluster_connection->wait_until_ready(10,3);
5213
  } 
5214
  else if (res == 1)
5215
  {
5216
    if (g_ndb_cluster_connection->start_connect_thread(connect_callback)) 
5217
    {
5218
      DBUG_PRINT("error", ("g_ndb_cluster_connection->start_connect_thread()"));
5219 5220
      goto ndbcluster_init_error;
    }
5221
#ifndef DBUG_OFF
5222 5223
    {
      char buf[1024];
5224
      DBUG_PRINT("info",
5225 5226 5227 5228
                 ("NDBCLUSTER storage engine not started, "
                  "will connect using %s",
                  g_ndb_cluster_connection->
                  get_connectstring(buf,sizeof(buf))));
5229
    }
5230
#endif
5231
  }
5232
  else
5233 5234 5235
  {
    DBUG_ASSERT(res == -1);
    DBUG_PRINT("error", ("permanent error"));
5236
    goto ndbcluster_init_error;
5237
  }
5238
  
5239 5240 5241
  (void) hash_init(&ndbcluster_open_tables,system_charset_info,32,0,0,
                   (hash_get_key) ndbcluster_get_key,0,0);
  pthread_mutex_init(&ndbcluster_mutex,MY_MUTEX_INIT_FAST);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5242 5243
  pthread_mutex_init(&LOCK_ndb_util_thread, MY_MUTEX_INIT_FAST);
  pthread_cond_init(&COND_ndb_util_thread, NULL);
5244

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5245 5246 5247 5248 5249 5250

  // Create utility thread
  pthread_t tmp;
  if (pthread_create(&tmp, &connection_attrib, ndb_util_thread_func, 0))
  {
    DBUG_PRINT("error", ("Could not create ndb utility thread"));
5251 5252 5253 5254
    hash_free(&ndbcluster_open_tables);
    pthread_mutex_destroy(&ndbcluster_mutex);
    pthread_mutex_destroy(&LOCK_ndb_util_thread);
    pthread_cond_destroy(&COND_ndb_util_thread);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5255 5256 5257
    goto ndbcluster_init_error;
  }
  
5258
  ndbcluster_inited= 1;
5259
  DBUG_RETURN(FALSE);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5260

5261
ndbcluster_init_error:
5262
  if (g_ndb)
5263 5264 5265 5266 5267
    delete g_ndb;
  g_ndb= NULL;
  if (g_ndb_cluster_connection)
    delete g_ndb_cluster_connection;
  g_ndb_cluster_connection= NULL;
5268 5269
  have_ndbcluster= SHOW_OPTION_DISABLED;	// If we couldn't use handler
  DBUG_RETURN(TRUE);
5270 5271 5272 5273 5274 5275
}


/*
  End use of the NDB Cluster table handler
  - free all global variables allocated by 
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5276
    ndbcluster_init()
5277 5278 5279 5280 5281
*/

bool ndbcluster_end()
{
  DBUG_ENTER("ndbcluster_end");
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5282

5283 5284 5285
  if (!ndbcluster_inited)
    DBUG_RETURN(0);

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5286 5287 5288 5289 5290 5291
  // Kill ndb utility thread
  (void) pthread_mutex_lock(&LOCK_ndb_util_thread);  
  DBUG_PRINT("exit",("killing ndb util thread: %lx", ndb_util_thread));
  (void) pthread_cond_signal(&COND_ndb_util_thread);
  (void) pthread_mutex_unlock(&LOCK_ndb_util_thread);

5292
  if (g_ndb)
5293 5294
  {
#ifndef DBUG_OFF
5295 5296
    Ndb::Free_list_usage tmp;
    tmp.m_name= 0;
5297 5298 5299 5300 5301 5302 5303 5304 5305 5306
    while (g_ndb->get_free_list_usage(&tmp))
    {
      uint leaked= (uint) tmp.m_created - tmp.m_free;
      if (leaked)
        fprintf(stderr, "NDB: Found %u %s%s that %s not been released\n",
                leaked, tmp.m_name,
                (leaked == 1)?"":"'s",
                (leaked == 1)?"has":"have");
    }
#endif
5307
    delete g_ndb;
5308
    g_ndb= NULL;
5309
  }
5310
  delete g_ndb_cluster_connection;
5311
  g_ndb_cluster_connection= NULL;
5312

5313 5314
  hash_free(&ndbcluster_open_tables);
  pthread_mutex_destroy(&ndbcluster_mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5315 5316
  pthread_mutex_destroy(&LOCK_ndb_util_thread);
  pthread_cond_destroy(&COND_ndb_util_thread);
5317 5318 5319 5320
  ndbcluster_inited= 0;
  DBUG_RETURN(0);
}

5321 5322 5323 5324 5325
/*
  Static error print function called from
  static handler method ndbcluster_commit
  and ndbcluster_rollback
*/
5326 5327

void ndbcluster_print_error(int error, const NdbOperation *error_op)
5328
{
5329 5330
  DBUG_ENTER("ndbcluster_print_error");
  TABLE tab;
5331
  const char *tab_name= (error_op) ? error_op->getTableName() : "";
5332
  tab.alias= (char *) tab_name;
5333
  ha_ndbcluster error_handler(&tab);
5334
  tab.file= &error_handler;
5335
  error_handler.print_error(error, MYF(0));
ndbdev@ndbmaster.mysql.com's avatar
ndbdev@ndbmaster.mysql.com committed
5336
  DBUG_VOID_RETURN;
5337
}
5338

5339 5340 5341
/**
 * Set a given location from full pathname to database name
 *
5342
 */
5343
void ha_ndbcluster::set_dbname(const char *path_name, char *dbname)
5344 5345 5346 5347
{
  char *end, *ptr;
  
  /* Scan name from the end */
5348 5349 5350 5351 5352 5353
  ptr= strend(path_name)-1;
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
  ptr--;
  end= ptr;
5354 5355 5356 5357
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
  uint name_len= end - ptr;
5358 5359
  memcpy(dbname, ptr + 1, name_len);
  dbname[name_len]= '\0';
5360 5361
#ifdef __WIN__
  /* Put to lower case */
5362 5363
  
  ptr= dbname;
5364 5365
  
  while (*ptr != '\0') {
5366
    *ptr= tolower(*ptr);
5367 5368 5369 5370 5371
    ptr++;
  }
#endif
}

5372 5373 5374 5375 5376 5377 5378 5379 5380
/*
  Set m_dbname from full pathname to table file
 */

void ha_ndbcluster::set_dbname(const char *path_name)
{
  set_dbname(path_name, m_dbname);
}

5381 5382 5383 5384 5385 5386 5387 5388 5389 5390
/**
 * Set a given location from full pathname to table file
 *
 */
void
ha_ndbcluster::set_tabname(const char *path_name, char * tabname)
{
  char *end, *ptr;
  
  /* Scan name from the end */
5391 5392
  end= strend(path_name)-1;
  ptr= end;
5393 5394 5395
  while (ptr >= path_name && *ptr != '\\' && *ptr != '/') {
    ptr--;
  }
5396
  uint name_len= end - ptr;
5397
  memcpy(tabname, ptr + 1, end - ptr);
5398
  tabname[name_len]= '\0';
5399 5400
#ifdef __WIN__
  /* Put to lower case */
5401
  ptr= tabname;
5402 5403 5404 5405 5406 5407 5408 5409 5410
  
  while (*ptr != '\0') {
    *ptr= tolower(*ptr);
    ptr++;
  }
#endif
}

/*
5411
  Set m_tabname from full pathname to table file 
5412 5413
 */

5414
void ha_ndbcluster::set_tabname(const char *path_name)
5415
{
5416
  set_tabname(path_name, m_tabname);
5417 5418 5419 5420
}


ha_rows 
5421 5422 5423 5424
ha_ndbcluster::records_in_range(uint inx, key_range *min_key,
                                key_range *max_key)
{
  KEY *key_info= table->key_info + inx;
5425
  uint key_length= key_info->key_length;
5426
  NDB_INDEX_TYPE idx_type= get_index_type(inx);  
5427 5428

  DBUG_ENTER("records_in_range");
5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442
  // Prevent partial read of hash indexes by returning HA_POS_ERROR
  if ((idx_type == UNIQUE_INDEX || idx_type == PRIMARY_KEY_INDEX) &&
      ((min_key && min_key->length < key_length) ||
       (max_key && max_key->length < key_length)))
    DBUG_RETURN(HA_POS_ERROR);
  
  // Read from hash index with full key
  // This is a "const" table which returns only one record!      
  if ((idx_type != ORDERED_INDEX) &&
      ((min_key && min_key->length == key_length) || 
       (max_key && max_key->length == key_length)))
    DBUG_RETURN(1);
  
  DBUG_RETURN(10); /* Good guess when you don't know anything */
5443 5444
}

5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471
ulong ha_ndbcluster::table_flags(void) const
{
  if (m_ha_not_exact_count)
    return m_table_flags | HA_NOT_EXACT_COUNT;
  else
    return m_table_flags;
}
const char * ha_ndbcluster::table_type() const 
{
  return("ndbcluster");
}
uint ha_ndbcluster::max_supported_record_length() const
{ 
  return NDB_MAX_TUPLE_SIZE;
}
uint ha_ndbcluster::max_supported_keys() const
{
  return MAX_KEY;
}
uint ha_ndbcluster::max_supported_key_parts() const 
{
  return NDB_MAX_NO_OF_ATTRIBUTES_IN_KEY;
}
uint ha_ndbcluster::max_supported_key_length() const
{
  return NDB_MAX_KEY_SIZE;
}
pekka@mysql.com's avatar
pekka@mysql.com committed
5472 5473 5474 5475
uint ha_ndbcluster::max_supported_key_part_length() const
{
  return NDB_MAX_KEY_SIZE;
}
5476 5477 5478 5479 5480 5481 5482 5483 5484 5485
bool ha_ndbcluster::low_byte_first() const
{ 
#ifdef WORDS_BIGENDIAN
  return FALSE;
#else
  return TRUE;
#endif
}
bool ha_ndbcluster::has_transactions()
{
5486
  return TRUE;
5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500
}
const char* ha_ndbcluster::index_type(uint key_number)
{
  switch (get_index_type(key_number)) {
  case ORDERED_INDEX:
  case UNIQUE_ORDERED_INDEX:
  case PRIMARY_KEY_ORDERED_INDEX:
    return "BTREE";
  case UNIQUE_INDEX:
  case PRIMARY_KEY_INDEX:
  default:
    return "HASH";
  }
}
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5501

5502 5503
uint8 ha_ndbcluster::table_cache_type()
{
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5504 5505 5506 5507 5508 5509
  DBUG_ENTER("ha_ndbcluster::table_cache_type=HA_CACHE_TBL_ASKTRANSACT");
  DBUG_RETURN(HA_CACHE_TBL_ASKTRANSACT);
}


uint ndb_get_commitcount(THD *thd, char *dbname, char *tabname,
5510
                         Uint64 *commit_count)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5511 5512 5513
{
  DBUG_ENTER("ndb_get_commitcount");

5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531
  char name[FN_REFLEN];
  NDB_SHARE *share;
  (void)strxnmov(name, FN_REFLEN, "./",dbname,"/",tabname,NullS);
  DBUG_PRINT("enter", ("name: %s", name));
  pthread_mutex_lock(&ndbcluster_mutex);
  if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables,
                                       (byte*) name,
                                       strlen(name))))
  {
    pthread_mutex_unlock(&ndbcluster_mutex);
    DBUG_PRINT("info", ("Table %s not found in ndbcluster_open_tables",
                        name));
    DBUG_RETURN(1);
  }
  share->use_count++;
  pthread_mutex_unlock(&ndbcluster_mutex);

  pthread_mutex_lock(&share->mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5532 5533
  if (ndb_cache_check_time > 0)
  {
5534
    if (share->commit_count != 0)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5535
    {
5536
      *commit_count= share->commit_count;
5537 5538 5539
      char buff[22];
      DBUG_PRINT("info", ("Getting commit_count: %s from share",
                          llstr(share->commit_count, buff)));
5540 5541 5542
      pthread_mutex_unlock(&share->mutex);
      free_share(share);
      DBUG_RETURN(0);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5543 5544
    }
  }
5545
  DBUG_PRINT("info", ("Get commit_count from NDB"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5546 5547 5548 5549
  Ndb *ndb;
  if (!(ndb= check_ndb_in_thd(thd)))
    DBUG_RETURN(1);
  ndb->setDatabaseName(dbname);
5550 5551
  uint lock= share->commit_count_lock;
  pthread_mutex_unlock(&share->mutex);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5552 5553 5554

  struct Ndb_statistics stat;
  if (ndb_get_table_statistics(ndb, tabname, &stat))
5555 5556
  {
    free_share(share);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5557
    DBUG_RETURN(1);
5558 5559 5560
  }

  pthread_mutex_lock(&share->mutex);
5561
  if (share->commit_count_lock == lock)
5562
  {
5563 5564 5565
    char buff[22];
    DBUG_PRINT("info", ("Setting commit_count to %s",
                        llstr(stat.commit_count, buff)));
5566 5567 5568 5569 5570 5571 5572 5573 5574 5575
    share->commit_count= stat.commit_count;
    *commit_count= stat.commit_count;
  }
  else
  {
    DBUG_PRINT("info", ("Discarding commit_count, comit_count_lock changed"));
    *commit_count= 0;
  }
  pthread_mutex_unlock(&share->mutex);
  free_share(share);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611
  DBUG_RETURN(0);
}


/*
  Check if a cached query can be used.
  This is done by comparing the supplied engine_data to commit_count of
  the table.
  The commit_count is either retrieved from the share for the table, where
  it has been cached by the util thread. If the util thread is not started,
  NDB has to be contacetd to retrieve the commit_count, this will introduce
  a small delay while waiting for NDB to answer.


  SYNOPSIS
  ndbcluster_cache_retrieval_allowed
    thd            thread handle
    full_name      concatenation of database name,
                   the null character '\0', and the table
                   name
    full_name_len  length of the full name,
                   i.e. len(dbname) + len(tablename) + 1

    engine_data    parameter retrieved when query was first inserted into
                   the cache. If the value of engine_data is changed,
                   all queries for this table should be invalidated.

  RETURN VALUE
    TRUE  Yes, use the query from cache
    FALSE No, don't use the cached query, and if engine_data
          has changed, all queries for this table should be invalidated

*/

static my_bool
ndbcluster_cache_retrieval_allowed(THD *thd,
5612 5613
                                   char *full_name, uint full_name_len,
                                   ulonglong *engine_data)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5614 5615 5616 5617 5618
{
  Uint64 commit_count;
  bool is_autocommit= !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
  char *dbname= full_name;
  char *tabname= dbname+strlen(dbname)+1;
5619 5620
  char buff[22], buff2[22];
  DBUG_ENTER("ndbcluster_cache_retrieval_allowed");
5621 5622
  DBUG_PRINT("enter", ("dbname: %s, tabname: %s, is_autocommit: %d",
                       dbname, tabname, is_autocommit));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5623 5624

  if (!is_autocommit)
5625 5626
  {
    DBUG_PRINT("exit", ("No, don't use cache in transaction"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5627
    DBUG_RETURN(FALSE);
5628
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5629 5630 5631

  if (ndb_get_commitcount(thd, dbname, tabname, &commit_count))
  {
5632 5633
    *engine_data= 0; /* invalidate */
    DBUG_PRINT("exit", ("No, could not retrieve commit_count"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5634 5635
    DBUG_RETURN(FALSE);
  }
5636 5637
  DBUG_PRINT("info", ("*engine_data: %s, commit_count: %s",
                      llstr(*engine_data, buff), llstr(commit_count, buff2)));
5638
  if (commit_count == 0)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5639
  {
5640 5641
    *engine_data= 0; /* invalidate */
    DBUG_PRINT("exit", ("No, local commit has been performed"));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5642 5643
    DBUG_RETURN(FALSE);
  }
5644 5645 5646 5647 5648 5649
  else if (*engine_data != commit_count)
  {
    *engine_data= commit_count; /* invalidate */
     DBUG_PRINT("exit", ("No, commit_count has changed"));
     DBUG_RETURN(FALSE);
   }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5650

5651 5652
  DBUG_PRINT("exit", ("OK to use cache, engine_data: %s",
                      llstr(*engine_data, buff)));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680
  DBUG_RETURN(TRUE);
}


/**
   Register a table for use in the query cache. Fetch the commit_count
   for the table and return it in engine_data, this will later be used
   to check if the table has changed, before the cached query is reused.

   SYNOPSIS
   ha_ndbcluster::can_query_cache_table
    thd            thread handle
    full_name      concatenation of database name,
                   the null character '\0', and the table
                   name
    full_name_len  length of the full name,
                   i.e. len(dbname) + len(tablename) + 1
    qc_engine_callback  function to be called before using cache on this table
    engine_data    out, commit_count for this table

  RETURN VALUE
    TRUE  Yes, it's ok to cahce this query
    FALSE No, don't cach the query

*/

my_bool
ha_ndbcluster::register_query_cache_table(THD *thd,
5681 5682 5683
                                          char *full_name, uint full_name_len,
                                          qc_engine_callback *engine_callback,
                                          ulonglong *engine_data)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5684
{
5685 5686
  Uint64 commit_count;
  char buff[22];
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5687
  bool is_autocommit= !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN));
5688
  DBUG_ENTER("ha_ndbcluster::register_query_cache_table");
5689 5690 5691
  DBUG_PRINT("enter",("dbname: %s, tabname: %s, is_autocommit: %d",
		      m_dbname, m_tabname, is_autocommit));

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5692
  if (!is_autocommit)
5693 5694
  {
    DBUG_PRINT("exit", ("Can't register table during transaction"))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5695
    DBUG_RETURN(FALSE);
5696
  }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5697 5698 5699 5700

  if (ndb_get_commitcount(thd, m_dbname, m_tabname, &commit_count))
  {
    *engine_data= 0;
5701
    DBUG_PRINT("exit", ("Error, could not get commitcount"))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5702 5703 5704 5705
    DBUG_RETURN(FALSE);
  }
  *engine_data= commit_count;
  *engine_callback= ndbcluster_cache_retrieval_allowed;
5706
  DBUG_PRINT("exit", ("commit_count: %s", llstr(commit_count, buff)));
5707
  DBUG_RETURN(commit_count > 0);
5708
}
5709

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5710

5711
/*
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5712
  Handling the shared NDB_SHARE structure that is needed to
5713 5714 5715 5716 5717 5718 5719
  provide table locking.
  It's also used for sharing data with other NDB handlers
  in the same MySQL Server. There is currently not much
  data we want to or can share.
 */

static byte* ndbcluster_get_key(NDB_SHARE *share,uint *length,
5720
                                my_bool not_used __attribute__((unused)))
5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748
{
  *length=share->table_name_length;
  return (byte*) share->table_name;
}

static NDB_SHARE* get_share(const char *table_name)
{
  NDB_SHARE *share;
  pthread_mutex_lock(&ndbcluster_mutex);
  uint length=(uint) strlen(table_name);
  if (!(share=(NDB_SHARE*) hash_search(&ndbcluster_open_tables,
                                       (byte*) table_name,
                                       length)))
  {
    if ((share=(NDB_SHARE *) my_malloc(sizeof(*share)+length+1,
                                       MYF(MY_WME | MY_ZEROFILL))))
    {
      share->table_name_length=length;
      share->table_name=(char*) (share+1);
      strmov(share->table_name,table_name);
      if (my_hash_insert(&ndbcluster_open_tables, (byte*) share))
      {
        pthread_mutex_unlock(&ndbcluster_mutex);
        my_free((gptr) share,0);
        return 0;
      }
      thr_lock_init(&share->lock);
      pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
5749
      share->commit_count= 0;
5750 5751 5752 5753 5754 5755 5756
      share->commit_count_lock= 0;
    }
    else
    {
      DBUG_PRINT("error", ("Failed to alloc share"));
      pthread_mutex_unlock(&ndbcluster_mutex);
      return 0;
5757 5758 5759
    }
  }
  share->use_count++;
5760 5761 5762 5763 5764

  DBUG_PRINT("share",
	     ("table_name: %s, length: %d, use_count: %d, commit_count: %d",
	      share->table_name, share->table_name_length, share->use_count,
	      share->commit_count));
5765 5766 5767 5768 5769 5770 5771 5772 5773 5774
  pthread_mutex_unlock(&ndbcluster_mutex);
  return share;
}


static void free_share(NDB_SHARE *share)
{
  pthread_mutex_lock(&ndbcluster_mutex);
  if (!--share->use_count)
  {
5775
     hash_delete(&ndbcluster_open_tables, (byte*) share);
5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803
    thr_lock_delete(&share->lock);
    pthread_mutex_destroy(&share->mutex);
    my_free((gptr) share, MYF(0));
  }
  pthread_mutex_unlock(&ndbcluster_mutex);
}



/*
  Internal representation of the frm blob
   
*/

struct frm_blob_struct 
{
  struct frm_blob_header 
  {
    uint ver;      // Version of header
    uint orglen;   // Original length of compressed data
    uint complen;  // Compressed length of data, 0=uncompressed
  } head;
  char data[1];  
};



static int packfrm(const void *data, uint len, 
5804
                   const void **pack_data, uint *pack_len)
5805 5806 5807 5808 5809 5810 5811 5812 5813
{
  int error;
  ulong org_len, comp_len;
  uint blob_len;
  frm_blob_struct* blob;
  DBUG_ENTER("packfrm");
  DBUG_PRINT("enter", ("data: %x, len: %d", data, len));
  
  error= 1;
5814
  org_len= len;
5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833
  if (my_compress((byte*)data, &org_len, &comp_len))
    goto err;
  
  DBUG_PRINT("info", ("org_len: %d, comp_len: %d", org_len, comp_len));
  DBUG_DUMP("compressed", (char*)data, org_len);
  
  error= 2;
  blob_len= sizeof(frm_blob_struct::frm_blob_header)+org_len;
  if (!(blob= (frm_blob_struct*) my_malloc(blob_len,MYF(MY_WME))))
    goto err;
  
  // Store compressed blob in machine independent format
  int4store((char*)(&blob->head.ver), 1);
  int4store((char*)(&blob->head.orglen), comp_len);
  int4store((char*)(&blob->head.complen), org_len);
  
  // Copy frm data into blob, already in machine independent format
  memcpy(blob->data, data, org_len);  
  
5834 5835 5836
  *pack_data= blob;
  *pack_len= blob_len;
  error= 0;
5837 5838 5839 5840 5841 5842 5843 5844 5845
  
  DBUG_PRINT("exit", ("pack_data: %x, pack_len: %d", *pack_data, *pack_len));
err:
  DBUG_RETURN(error);
  
}


static int unpackfrm(const void **unpack_data, uint *unpack_len,
5846
                    const void *pack_data)
5847
{
5848
   const frm_blob_struct *blob= (frm_blob_struct*)pack_data;
5849 5850 5851 5852 5853
   byte *data;
   ulong complen, orglen, ver;
   DBUG_ENTER("unpackfrm");
   DBUG_PRINT("enter", ("pack_data: %x", pack_data));

5854 5855 5856
   complen=     uint4korr((char*)&blob->head.complen);
   orglen=      uint4korr((char*)&blob->head.orglen);
   ver=         uint4korr((char*)&blob->head.ver);
5857 5858
 
   DBUG_PRINT("blob",("ver: %d complen: %d orglen: %d",
5859
                     ver,complen,orglen));
5860 5861 5862 5863
   DBUG_DUMP("blob->data", (char*) blob->data, complen);
 
   if (ver != 1)
     DBUG_RETURN(1);
5864
   if (!(data= my_malloc(max(orglen, complen), MYF(MY_WME))))
5865 5866 5867 5868 5869 5870 5871 5872 5873
     DBUG_RETURN(2);
   memcpy(data, blob->data, complen);
 
   if (my_uncompress(data, &complen, &orglen))
   {
     my_free((char*)data, MYF(0));
     DBUG_RETURN(3);
   }

5874 5875
   *unpack_data= data;
   *unpack_len= complen;
5876 5877 5878 5879 5880

   DBUG_PRINT("exit", ("frmdata: %x, len: %d", *unpack_data, *unpack_len));

   DBUG_RETURN(0);
}
5881 5882 5883

static 
int
5884
ndb_get_table_statistics(Ndb* ndb, const char * table,
5885
                         struct Ndb_statistics * ndbstat)
5886
{
5887
  NdbTransaction* pTrans;
5888
  NdbError error;
5889 5890
  int retries= 10;
  int retry_sleep= 30 * 1000; /* 30 milliseconds */
5891 5892 5893
  char buff[22], buff2[22], buff3[22], buff4[22];
  DBUG_ENTER("ndb_get_table_statistics");
  DBUG_PRINT("enter", ("table: %s", table));
5894 5895

  do
5896
  {
5897 5898
    Uint64 rows, commits, mem;
    Uint32 size;
5899
    Uint32 count= 0;
5900 5901
    Uint64 sum_rows= 0;
    Uint64 sum_commits= 0;
5902 5903
    Uint64 sum_row_size= 0;
    Uint64 sum_mem= 0;
5904 5905 5906 5907 5908
    NdbScanOperation*pOp;
    NdbResultSet *rs;
    int check;

    if ((pTrans= ndb->startTransaction()) == NULL)
5909
    {
5910 5911 5912
      error= ndb->getNdbError();
      goto retry;
    }
5913
      
5914 5915 5916 5917
    if ((pOp= pTrans->getNdbScanOperation(table)) == NULL)
    {
      error= pTrans->getNdbError();
      goto retry;
5918
    }
5919
    
5920
    if (pOp->readTuples(NdbOperation::LM_CommittedRead))
5921 5922 5923 5924
    {
      error= pOp->getNdbError();
      goto retry;
    }
5925
    
5926 5927 5928 5929 5930
    if (pOp->interpret_exit_last_row() == -1)
    {
      error= pOp->getNdbError();
      goto retry;
    }
5931 5932 5933
    
    pOp->getValue(NdbDictionary::Column::ROW_COUNT, (char*)&rows);
    pOp->getValue(NdbDictionary::Column::COMMIT_COUNT, (char*)&commits);
5934 5935
    pOp->getValue(NdbDictionary::Column::ROW_SIZE, (char*)&size);
    pOp->getValue(NdbDictionary::Column::FRAGMENT_MEMORY, (char*)&mem);
5936
    
5937 5938 5939
    if (pTrans->execute(NdbTransaction::NoCommit,
                        NdbTransaction::AbortOnError,
                        TRUE) == -1)
5940
    {
5941 5942
      error= pTrans->getNdbError();
      goto retry;
5943
    }
5944
    
monty@mishka.local's avatar
monty@mishka.local committed
5945
    while ((check= pOp->nextResult(TRUE, TRUE)) == 0)
5946 5947 5948
    {
      sum_rows+= rows;
      sum_commits+= commits;
5949
      if (sum_row_size < size)
5950
        sum_row_size= size;
5951
      sum_mem+= mem;
5952
      count++;
5953 5954 5955
    }
    
    if (check == -1)
5956 5957 5958 5959
    {
      error= pOp->getNdbError();
      goto retry;
    }
5960

5961
    pOp->close(TRUE);
5962

5963
    ndb->closeTransaction(pTrans);
5964 5965 5966 5967 5968 5969

    ndbstat->row_count= sum_rows;
    ndbstat->commit_count= sum_commits;
    ndbstat->row_size= sum_row_size;
    ndbstat->fragment_memory= sum_mem;

5970 5971 5972 5973 5974 5975 5976
    DBUG_PRINT("exit", ("records: %s  commits: %s "
                        "row_size: %s  mem: %s count: %u",
			llstr(sum_rows, buff),
                        llstr(sum_commits, buff2),
                        llstr(sum_row_size, buff3),
                        llstr(sum_mem, buff4),
                        count));
5977

5978
    DBUG_RETURN(0);
5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990
retry:
    if (pTrans)
    {
      ndb->closeTransaction(pTrans);
      pTrans= NULL;
    }
    if (error.status == NdbError::TemporaryError && retries--)
    {
      my_sleep(retry_sleep);
      continue;
    }
    break;
5991
  } while(1);
5992 5993
  DBUG_PRINT("exit", ("failed, error %u(%s)", error.code, error.message));
  ERR_RETURN(error);
5994 5995
}

5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010
/*
  Create a .ndb file to serve as a placeholder indicating 
  that the table with this name is a ndb table
*/

int ha_ndbcluster::write_ndb_file()
{
  File file;
  bool error=1;
  char path[FN_REFLEN];
  
  DBUG_ENTER("write_ndb_file");
  DBUG_PRINT("enter", ("db: %s, name: %s", m_dbname, m_tabname));

  (void)strxnmov(path, FN_REFLEN, 
6011
                 mysql_data_home,"/",m_dbname,"/",m_tabname,ha_ndb_ext,NullS);
6012 6013 6014 6015 6016 6017 6018 6019 6020 6021

  if ((file=my_create(path, CREATE_MODE,O_RDWR | O_TRUNC,MYF(MY_WME))) >= 0)
  {
    // It's an empty file
    error=0;
    my_close(file,MYF(0));
  }
  DBUG_RETURN(error);
}

6022
void 
6023 6024
ha_ndbcluster::release_completed_operations(NdbTransaction *trans,
					    bool force_release)
6025 6026 6027 6028 6029 6030 6031 6032
{
  if (trans->hasBlobOperation())
  {
    /* We are reading/writing BLOB fields, 
       releasing operation records is unsafe
    */
    return;
  }
6033 6034 6035 6036 6037 6038 6039 6040 6041 6042
  if (!force_release)
  {
    if (get_thd_ndb(current_thd)->query_state & NDB_QUERY_MULTI_READ_RANGE)
    {
      /* We are batching reads and have not consumed all fetched
	 rows yet, releasing operation records is unsafe 
      */
      return;
    }
  }
6043
  trans->releaseCompletedOperations();
6044 6045
}

6046
int
6047
ha_ndbcluster::read_multi_range_first(KEY_MULTI_RANGE **found_range_p,
6048 6049 6050 6051
                                      KEY_MULTI_RANGE *ranges, 
                                      uint range_count,
                                      bool sorted, 
                                      HANDLER_BUFFER *buffer)
6052 6053
{
  DBUG_ENTER("ha_ndbcluster::read_multi_range_first");
6054
  
6055 6056
  int res;
  KEY* key_info= table->key_info + active_index;
6057
  NDB_INDEX_TYPE index_type= get_index_type(active_index);
joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
6058
  ulong reclength= table->s->reclength;
6059
  NdbOperation* op;
6060
  Thd_ndb *thd_ndb= get_thd_ndb(current_thd);
6061

6062
  if (uses_blob_value(m_retrieve_all_fields))
6063 6064 6065 6066
  {
    /**
     * blobs can't be batched currently
     */
6067
    m_disable_multi_read= TRUE;
6068
    DBUG_RETURN(handler::read_multi_range_first(found_range_p, 
6069 6070 6071 6072
                                                ranges, 
                                                range_count,
                                                sorted, 
                                                buffer));
6073
  }
6074
  thd_ndb->query_state|= NDB_QUERY_MULTI_READ_RANGE;
6075
  m_disable_multi_read= FALSE;
6076 6077 6078 6079

  /**
   * Copy arguments into member variables
   */
6080 6081 6082
  m_multi_ranges= ranges;
  multi_range_curr= ranges;
  multi_range_end= ranges+range_count;
6083 6084 6085
  multi_range_sorted= sorted;
  multi_range_buffer= buffer;

6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096
  /**
   * read multi range will read ranges as follows (if not ordered)
   *
   * input    read order
   * ======   ==========
   * pk-op 1  pk-op 1
   * pk-op 2  pk-op 2
   * range 3  range (3,5) NOTE result rows will be intermixed
   * pk-op 4  pk-op 4
   * range 5
   * pk-op 6  pk-ok 6
6097 6098
   */   

mskold@mysql.com's avatar
mskold@mysql.com committed
6099
  /**
6100 6101
   * Variables for loop
   */
6102 6103
  byte *curr= (byte*)buffer->buffer;
  byte *end_of_buffer= (byte*)buffer->buffer_end;
6104 6105
  NdbOperation::LockMode lm= 
    (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type);
6106
  bool need_pk = (lm == NdbOperation::LM_Read);
6107 6108
  const NDBTAB *tab= (const NDBTAB *) m_table;
  const NDBINDEX *unique_idx= (NDBINDEX *) m_index[active_index].unique_index;
6109
  const NDBINDEX *idx= (NDBINDEX *) m_index[active_index].index; 
6110 6111
  const NdbOperation* lastOp= m_active_trans->getLastDefinedOperation();
  NdbIndexScanOperation* scanOp= 0;
6112 6113
  for (; multi_range_curr<multi_range_end && curr+reclength <= end_of_buffer; 
       multi_range_curr++)
6114
  {
6115 6116 6117 6118 6119 6120
    switch (index_type){
    case PRIMARY_KEY_ORDERED_INDEX:
      if (!(multi_range_curr->start_key.length == key_info->key_length &&
            multi_range_curr->start_key.flag == HA_READ_KEY_EXACT))
      goto range;
      /* fall through */
6121
    case PRIMARY_KEY_INDEX:
6122
    {
6123
      multi_range_curr->range_flag |= UNIQUE_RANGE;
6124
      if ((op= m_active_trans->getNdbOperation(tab)) && 
6125 6126 6127
          !op->readTuple(lm) && 
          !set_primary_key(op, multi_range_curr->start_key.key) &&
          !define_read_attrs(curr, op) &&
6128
          (op->setAbortOption(AO_IgnoreError), TRUE))
6129
        curr += reclength;
6130
      else
6131
        ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
6132
      break;
6133 6134
    }
    break;
6135 6136 6137 6138 6139 6140 6141
    case UNIQUE_ORDERED_INDEX:
      if (!(multi_range_curr->start_key.length == key_info->key_length &&
            multi_range_curr->start_key.flag == HA_READ_KEY_EXACT &&
            !check_null_in_key(key_info, multi_range_curr->start_key.key,
                               multi_range_curr->start_key.length)))
      goto range;
      /* fall through */
6142
    case UNIQUE_INDEX:
6143
    {
6144
      multi_range_curr->range_flag |= UNIQUE_RANGE;
6145
      if ((op= m_active_trans->getNdbIndexOperation(unique_idx, tab)) && 
6146 6147 6148
          !op->readTuple(lm) && 
          !set_index_key(op, key_info, multi_range_curr->start_key.key) &&
          !define_read_attrs(curr, op) &&
6149
          (op->setAbortOption(AO_IgnoreError), TRUE))
6150
        curr += reclength;
6151
      else
6152
        ERR_RETURN(op ? op->getNdbError() : m_active_trans->getNdbError());
6153 6154
      break;
    }
6155 6156
    case ORDERED_INDEX:
    {
6157
  range:
6158
      multi_range_curr->range_flag &= ~(uint)UNIQUE_RANGE;
6159 6160
      if (scanOp == 0)
      {
6161 6162 6163 6164 6165 6166
        if (m_multi_cursor)
        {
          scanOp= m_multi_cursor;
          DBUG_ASSERT(scanOp->getSorted() == sorted);
          DBUG_ASSERT(scanOp->getLockMode() == 
                      (NdbOperation::LockMode)get_ndb_lock_type(m_lock.type));
6167
          if (scanOp->reset_bounds(m_force_send))
6168 6169 6170 6171 6172
            DBUG_RETURN(ndb_err(m_active_trans));
          
          end_of_buffer -= reclength;
        }
        else if ((scanOp= m_active_trans->getNdbIndexScanOperation(idx, tab)) 
6173 6174
                 &&!scanOp->readTuples(lm, 0, parallelism, sorted, 
				       FALSE, TRUE, need_pk)
6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185
                 &&!generate_scan_filter(m_cond_stack, scanOp)
                 &&!define_read_attrs(end_of_buffer-reclength, scanOp))
        {
          m_multi_cursor= scanOp;
          m_multi_range_cursor_result_ptr= end_of_buffer-reclength;
        }
        else
        {
          ERR_RETURN(scanOp ? scanOp->getNdbError() : 
                     m_active_trans->getNdbError());
        }
6186
      }
6187

6188
      const key_range *keys[2]= { &multi_range_curr->start_key, 
6189
                                  &multi_range_curr->end_key };
6190
      if ((res= set_bounds(scanOp, keys, multi_range_curr-ranges)))
6191
        DBUG_RETURN(res);
6192
      break;
6193
    }
6194
    case UNDEFINED_INDEX:
mskold@mysql.com's avatar
mskold@mysql.com committed
6195 6196 6197 6198
      DBUG_ASSERT(FALSE);
      DBUG_RETURN(1);
      break;
    }
6199 6200
  }
  
6201
  if (multi_range_curr != multi_range_end)
6202
  {
6203 6204 6205 6206 6207 6208
    /**
     * Mark that we're using entire buffer (even if might not) as
     *   we haven't read all ranges for some reason
     * This as we don't want mysqld to reuse the buffer when we read
     *   the remaining ranges
     */
6209
    buffer->end_of_used_area= (byte*)buffer->buffer_end;
6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220
  }
  else
  {
    buffer->end_of_used_area= curr;
  }
  
  /**
   * Set first operation in multi range
   */
  m_current_multi_operation= 
    lastOp ? lastOp->next() : m_active_trans->getFirstDefinedOperation();
6221
  if (!(res= execute_no_commit_ie(this, m_active_trans, true)))
6222
  {
6223 6224
    m_multi_range_defined= multi_range_curr;
    multi_range_curr= ranges;
6225 6226
    m_multi_range_result_ptr= (byte*)buffer->buffer;
    DBUG_RETURN(read_multi_range_next(found_range_p));
6227 6228 6229 6230
  }
  ERR_RETURN(m_active_trans->getNdbError());
}

6231 6232 6233 6234 6235 6236
#if 0
#define DBUG_MULTI_RANGE(x) printf("read_multi_range_next: case %d\n", x);
#else
#define DBUG_MULTI_RANGE(x)
#endif

6237
int
6238
ha_ndbcluster::read_multi_range_next(KEY_MULTI_RANGE ** multi_range_found_p)
6239 6240
{
  DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
6241
  if (m_disable_multi_read)
6242
  {
6243
    DBUG_RETURN(handler::read_multi_range_next(multi_range_found_p));
6244
  }
6245
  
6246
  int res;
6247
  int range_no;
joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
6248
  ulong reclength= table->s->reclength;
6249
  const NdbOperation* op= m_current_multi_operation;
6250
  for (;multi_range_curr < m_multi_range_defined; multi_range_curr++)
6251
  {
6252
    if (multi_range_curr->range_flag & UNIQUE_RANGE)
6253
    {
6254
      if (op->getNdbError().code == 0)
6255
        goto found_next;
6256 6257 6258
      
      op= m_active_trans->getNextCompletedOperation(op);
      m_multi_range_result_ptr += reclength;
6259
      continue;
6260
    } 
6261
    else if (m_multi_cursor && !multi_range_sorted)
6262
    {
6263 6264
      DBUG_MULTI_RANGE(1);
      if ((res= fetch_next(m_multi_cursor)) == 0)
6265
      {
6266 6267 6268
        DBUG_MULTI_RANGE(2);
        range_no= m_multi_cursor->get_range_no();
        goto found;
6269 6270 6271
      } 
      else
      {
6272
        goto close_scan;
6273 6274
      }
    }
6275
    else if (m_multi_cursor && multi_range_sorted)
6276
    {
6277 6278
      if (m_active_cursor && (res= fetch_next(m_multi_cursor)))
      {
6279 6280
        DBUG_MULTI_RANGE(3);
        goto close_scan;
6281
      }
6282
      
6283
      range_no= m_multi_cursor->get_range_no();
6284
      uint current_range_no= multi_range_curr - m_multi_ranges;
mskold@mysql.com's avatar
mskold@mysql.com committed
6285
      if ((uint) range_no == current_range_no)
6286
      {
6287
        DBUG_MULTI_RANGE(4);
6288
        // return current row
6289
        goto found;
6290
      }
6291
      else if (range_no > (int)current_range_no)
6292
      {
6293 6294 6295 6296
        DBUG_MULTI_RANGE(5);
        // wait with current row
        m_active_cursor= 0;
        continue;
6297 6298 6299
      }
      else 
      {
6300 6301 6302
        DBUG_MULTI_RANGE(6);
        // First fetch from cursor
        DBUG_ASSERT(range_no == -1);
6303
        if ((res= m_multi_cursor->nextResult(true)))
6304 6305 6306 6307 6308
        {
          goto close_scan;
        }
        multi_range_curr--; // Will be increased in for-loop
        continue;
6309
      }
6310
    }
6311
    else /** m_multi_cursor == 0 */
6312
    {
6313
      DBUG_MULTI_RANGE(7);
6314 6315 6316 6317
      /**
       * Corresponds to range 5 in example in read_multi_range_first
       */
      (void)1;
6318
      continue;
6319
    }
6320
    
6321
    DBUG_ASSERT(FALSE); // Should only get here via goto's
6322 6323 6324
close_scan:
    if (res == 1)
    {
6325
      m_multi_cursor->close(FALSE, TRUE);
6326
      m_active_cursor= m_multi_cursor= 0;
6327
      DBUG_MULTI_RANGE(8);
6328 6329 6330 6331 6332 6333 6334
      continue;
    } 
    else 
    {
      DBUG_RETURN(ndb_err(m_active_trans));
    }
  }
6335
  
6336
  if (multi_range_curr == multi_range_end)
6337
    DBUG_RETURN(HA_ERR_END_OF_FILE);
6338
  
6339 6340 6341 6342
  /**
   * Read remaining ranges
   */
  DBUG_RETURN(read_multi_range_first(multi_range_found_p, 
6343 6344 6345 6346
                                     multi_range_curr,
                                     multi_range_end - multi_range_curr, 
                                     multi_range_sorted,
                                     multi_range_buffer));
6347 6348
  
found:
6349 6350 6351
  /**
   * Found a record belonging to a scan
   */
6352
  m_active_cursor= m_multi_cursor;
6353
  * multi_range_found_p= m_multi_ranges + range_no;
6354 6355
  memcpy(table->record[0], m_multi_range_cursor_result_ptr, reclength);
  setup_recattr(m_active_cursor->getFirstRecAttr());
6356 6357 6358
  unpack_record(table->record[0]);
  table->status= 0;     
  DBUG_RETURN(0);
6359
  
6360
found_next:
6361 6362 6363 6364
  /**
   * Found a record belonging to a pk/index op,
   *   copy result and move to next to prepare for next call
   */
6365
  * multi_range_found_p= multi_range_curr;
6366
  memcpy(table->record[0], m_multi_range_result_ptr, reclength);
6367
  setup_recattr(op->getFirstRecAttr());
6368
  unpack_record(table->record[0]);
6369 6370
  table->status= 0;
  
6371
  multi_range_curr++;
6372
  m_current_multi_operation= m_active_trans->getNextCompletedOperation(op);
6373 6374
  m_multi_range_result_ptr += reclength;
  DBUG_RETURN(0);
6375 6376
}

6377 6378 6379 6380 6381 6382 6383 6384
int
ha_ndbcluster::setup_recattr(const NdbRecAttr* curr)
{
  DBUG_ENTER("setup_recattr");

  Field **field, **end;
  NdbValue *value= m_value;
  
joreland@mysql.com's avatar
merge  
joreland@mysql.com committed
6385
  end= table->field + table->s->fields;
6386 6387 6388 6389 6390 6391
  
  for (field= table->field; field < end; field++, value++)
  {
    if ((* value).ptr)
    {
      DBUG_ASSERT(curr != 0);
6392 6393 6394
      NdbValue* val= m_value + curr->getColumn()->getColumnNo();
      DBUG_ASSERT(val->ptr);
      val->rec= curr;
6395
      curr= curr->next();
6396 6397 6398
    }
  }
  
6399
  DBUG_RETURN(0);
6400 6401
}

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6402 6403
char*
ha_ndbcluster::update_table_comment(
6404 6405
                                /* out: table comment + additional */
        const char*     comment)/* in:  table comment defined by user */
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6406 6407
{
  uint length= strlen(comment);
6408
  if (length > 64000 - 3)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434
  {
    return((char*)comment); /* string too long */
  }

  Ndb* ndb;
  if (!(ndb= get_ndb()))
  {
    return((char*)comment);
  }

  ndb->setDatabaseName(m_dbname);
  NDBDICT* dict= ndb->getDictionary();
  const NDBTAB* tab;
  if (!(tab= dict->getTable(m_tabname)))
  {
    return((char*)comment);
  }

  char *str;
  const char *fmt="%s%snumber_of_replicas: %d";
  const unsigned fmt_len_plus_extra= length + strlen(fmt);
  if ((str= my_malloc(fmt_len_plus_extra, MYF(0))) == NULL)
  {
    return (char*)comment;
  }

6435 6436 6437
  my_snprintf(str,fmt_len_plus_extra,fmt,comment,
              length > 0 ? " ":"",
              tab->getReplicaCount());
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6438 6439 6440 6441 6442
  return str;
}


// Utility thread main loop
6443
pthread_handler_t ndb_util_thread_func(void *arg __attribute__((unused)))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6444 6445
{
  THD *thd; /* needs to be first for thread_stack */
6446
  Ndb* ndb;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6447 6448 6449 6450 6451 6452 6453 6454
  struct timespec abstime;

  my_thread_init();
  DBUG_ENTER("ndb_util_thread");
  DBUG_PRINT("enter", ("ndb_cache_check_time: %d", ndb_cache_check_time));

  thd= new THD; /* note that contructor of THD uses DBUG_ */
  THD_CHECK_SENTRY(thd);
6455
  ndb= new Ndb(g_ndb_cluster_connection, "");
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6456 6457 6458 6459 6460

  pthread_detach_this_thread();
  ndb_util_thread= pthread_self();

  thd->thread_stack= (char*)&thd; /* remember where our stack is */
6461
  if (thd->store_globals() || (ndb->init() != 0))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6462 6463 6464
  {
    thd->cleanup();
    delete thd;
6465
    delete ndb;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6466 6467 6468 6469
    DBUG_RETURN(NULL);
  }

  List<NDB_SHARE> util_open_tables;
6470
  set_timespec(abstime, 0);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6471 6472 6473 6474
  for (;;)
  {

    pthread_mutex_lock(&LOCK_ndb_util_thread);
monty@mysql.com's avatar
monty@mysql.com committed
6475 6476 6477
    pthread_cond_timedwait(&COND_ndb_util_thread,
                           &LOCK_ndb_util_thread,
                           &abstime);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6478 6479 6480 6481 6482 6483 6484 6485 6486 6487
    pthread_mutex_unlock(&LOCK_ndb_util_thread);

    DBUG_PRINT("ndb_util_thread", ("Started, ndb_cache_check_time: %d",
                                   ndb_cache_check_time));

    if (abort_loop)
      break; /* Shutting down server */

    if (ndb_cache_check_time == 0)
    {
6488 6489
      /* Wake up in 1 second to check if value has changed */
      set_timespec(abstime, 1);
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510
      continue;
    }

    /* Lock mutex and fill list with pointers to all open tables */
    NDB_SHARE *share;
    pthread_mutex_lock(&ndbcluster_mutex);
    for (uint i= 0; i < ndbcluster_open_tables.records; i++)
    {
      share= (NDB_SHARE *)hash_element(&ndbcluster_open_tables, i);
      share->use_count++; /* Make sure the table can't be closed */
      DBUG_PRINT("ndb_util_thread",
                 ("Found open table[%d]: %s, use_count: %d",
                  i, share->table_name, share->use_count));

      /* Store pointer to table */
      util_open_tables.push_back(share);
    }
    pthread_mutex_unlock(&ndbcluster_mutex);

    /* Iterate through the  open files list */
    List_iterator_fast<NDB_SHARE> it(util_open_tables);
6511
    while ((share= it++))
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6512 6513 6514 6515 6516 6517 6518 6519 6520 6521
    {
      /* Split tab- and dbname */
      char buf[FN_REFLEN];
      char *tabname, *db;
      uint length= dirname_length(share->table_name);
      tabname= share->table_name+length;
      memcpy(buf, share->table_name, length-1);
      buf[length-1]= 0;
      db= buf+dirname_length(buf);
      DBUG_PRINT("ndb_util_thread",
6522 6523
                 ("Fetching commit count for: %s",
                  share->table_name));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6524 6525

      /* Contact NDB to get commit count for table */
6526 6527 6528 6529 6530 6531 6532 6533
      ndb->setDatabaseName(db);
      struct Ndb_statistics stat;

      uint lock;
      pthread_mutex_lock(&share->mutex);
      lock= share->commit_count_lock;
      pthread_mutex_unlock(&share->mutex);

6534
      if (ndb_get_table_statistics(ndb, tabname, &stat) == 0)
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6535
      {
6536
        char buff[22], buff2[22];
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6537
        DBUG_PRINT("ndb_util_thread",
6538 6539 6540
                   ("Table: %s  commit_count: %s  rows: %s",
                    share->table_name,
                    llstr(stat.commit_count, buff),
monty@mysql.com's avatar
monty@mysql.com committed
6541
                    llstr(stat.row_count, buff2)));
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6542 6543 6544 6545 6546 6547
      }
      else
      {
        DBUG_PRINT("ndb_util_thread",
                   ("Error: Could not get commit count for table %s",
                    share->table_name));
6548
        stat.commit_count= 0;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6549
      }
6550 6551 6552 6553 6554 6555

      pthread_mutex_lock(&share->mutex);
      if (share->commit_count_lock == lock)
        share->commit_count= stat.commit_count;
      pthread_mutex_unlock(&share->mutex);

mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6556 6557 6558 6559 6560 6561 6562
      /* Decrease the use count and possibly free share */
      free_share(share);
    }

    /* Clear the list of open tables */
    util_open_tables.empty();

6563 6564 6565 6566 6567 6568 6569 6570 6571
    /* Calculate new time to wake up */
    int secs= 0;
    int msecs= ndb_cache_check_time;

    struct timeval tick_time;
    gettimeofday(&tick_time, 0);
    abstime.tv_sec=  tick_time.tv_sec;
    abstime.tv_nsec= tick_time.tv_usec * 1000;

6572
    if (msecs >= 1000){
6573 6574 6575 6576 6577 6578 6579 6580 6581 6582
      secs=  msecs / 1000;
      msecs= msecs % 1000;
    }

    abstime.tv_sec+=  secs;
    abstime.tv_nsec+= msecs * 1000000;
    if (abstime.tv_nsec >= 1000000000) {
      abstime.tv_sec+=  1;
      abstime.tv_nsec-= 1000000000;
    }
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6583 6584 6585 6586
  }

  thd->cleanup();
  delete thd;
6587
  delete ndb;
mskold@mysql.com's avatar
Merge  
mskold@mysql.com committed
6588 6589 6590 6591 6592 6593
  DBUG_PRINT("exit", ("ndb_util_thread"));
  my_thread_end();
  pthread_exit(0);
  DBUG_RETURN(NULL);
}

6594 6595 6596
/*
  Condition pushdown
*/
6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613
/*
  Push a condition to ndbcluster storage engine for evaluation 
  during table   and index scans. The conditions will be stored on a stack
  for possibly storing several conditions. The stack can be popped
  by calling cond_pop, handler::extra(HA_EXTRA_RESET) (handler::reset())
  will clear the stack.
  The current implementation supports arbitrary AND/OR nested conditions
  with comparisons between columns and constants (including constant
  expressions and function calls) and the following comparison operators:
  =, !=, >, >=, <, <=, "is null", and "is not null".
  
  RETURN
    NULL The condition was supported and will be evaluated for each 
    row found during the scan
    cond The condition was not supported and all rows will be returned from
         the scan for evaluation (and thus not saved on stack)
*/
6614 6615 6616 6617 6618
const 
COND* 
ha_ndbcluster::cond_push(const COND *cond) 
{ 
  DBUG_ENTER("cond_push");
6619 6620 6621
  Ndb_cond_stack *ndb_cond = new Ndb_cond_stack();
  DBUG_EXECUTE("where",print_where((COND *)cond, m_tabname););
  if (m_cond_stack)
mskold@mysql.com's avatar
mskold@mysql.com committed
6622
    ndb_cond->next= m_cond_stack;
6623 6624 6625 6626 6627 6628
  else
    ndb_cond->next= NULL;
  m_cond_stack= ndb_cond;
  
  if (serialize_cond(cond, ndb_cond))
  {
mskold@mysql.com's avatar
mskold@mysql.com committed
6629
    DBUG_RETURN(NULL);
6630 6631 6632 6633
  }
  else
  {
    cond_pop();
mskold@mysql.com's avatar
mskold@mysql.com committed
6634
  }
6635 6636 6637
  DBUG_RETURN(cond); 
}

6638 6639 6640
/*
  Pop the top condition from the condition stack of the handler instance.
*/
6641 6642 6643 6644 6645 6646 6647 6648 6649
void 
ha_ndbcluster::cond_pop() 
{ 
  Ndb_cond_stack *ndb_cond_stack= m_cond_stack;  
  if (ndb_cond_stack)
  {
    m_cond_stack= ndb_cond_stack->next;
    delete ndb_cond_stack;
  }
mskold@mysql.com's avatar
mskold@mysql.com committed
6650
}
6651

6652 6653 6654
/*
  Clear the condition stack
*/
6655 6656 6657 6658 6659 6660 6661 6662 6663 6664
void
ha_ndbcluster::cond_clear()
{
  DBUG_ENTER("cond_clear");
  while (m_cond_stack)
    cond_pop();

  DBUG_VOID_RETURN;
}

6665 6666 6667 6668 6669 6670
/*
  Serialize the item tree into a linked list represented by Ndb_cond
  for fast generation of NbdScanFilter. Adds information such as
  position of fields that is not directly available in the Item tree.
  Also checks if condition is supported.
*/
6671 6672 6673 6674 6675
void ndb_serialize_cond(const Item *item, void *arg)
{
  Ndb_cond_traverse_context *context= (Ndb_cond_traverse_context *) arg;
  DBUG_ENTER("ndb_serialize_cond");  

mskold@mysql.com's avatar
mskold@mysql.com committed
6676 6677 6678 6679 6680
  // Check if we are skipping arguments to a function to be evaluated
  if (context->skip)
  {
    DBUG_PRINT("info", ("Skiping argument %d", context->skip));
    context->skip--;
6681 6682 6683
    switch (item->type()) {
    case Item::FUNC_ITEM:
    {
mskold@mysql.com's avatar
mskold@mysql.com committed
6684 6685 6686 6687
      Item_func *func_item= (Item_func *) item;
      context->skip+= func_item->argument_count();
      break;
    }
6688 6689 6690 6691 6692
    case Item::INT_ITEM:
    case Item::REAL_ITEM:
    case Item::STRING_ITEM:
    case Item::VARBIN_ITEM:
    case Item::DECIMAL_ITEM:
mskold@mysql.com's avatar
mskold@mysql.com committed
6693 6694
      break;
    default:
6695
      context->supported= FALSE;
mskold@mysql.com's avatar
mskold@mysql.com committed
6696 6697
      break;
    }
6698
    
mskold@mysql.com's avatar
mskold@mysql.com committed
6699 6700 6701
    DBUG_VOID_RETURN;
  }
  
6702
  if (context->supported)
6703
  {
6704 6705 6706 6707 6708 6709
    Ndb_rewrite_context *rewrite_context= context->rewrite_stack;
    const Item_func *func_item;
    // Check if we are rewriting some unsupported function call
    if (rewrite_context &&
        (func_item= rewrite_context->func_item) &&
        rewrite_context->count++ == 0)
mskold@mysql.com's avatar
mskold@mysql.com committed
6710
    {
6711 6712
      switch (func_item->functype()) {
      case Item_func::BETWEEN:
6713
        /*
6714 6715 6716 6717 6718 6719 6720
          Rewrite 
          <field>|<const> BETWEEN <const1>|<field1> AND <const2>|<field2>
          to <field>|<const> > <const1>|<field1> AND 
          <field>|<const> < <const2>|<field2>
          or actually in prefix format
          BEGIN(AND) GT(<field>|<const>, <const1>|<field1>), 
          LT(<field>|<const>, <const2>|<field2>), END()
6721
        */
6722 6723
      case Item_func::IN_FUNC:
      {
6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739
        /*
          Rewrite <field>|<const> IN(<const1>|<field1>, <const2>|<field2>,..)
          to <field>|<const> = <const1>|<field1> OR 
          <field> = <const2>|<field2> ...
          or actually in prefix format
          BEGIN(OR) EQ(<field>|<const>, <const1><field1>), 
          EQ(<field>|<const>, <const2>|<field2>), ... END()
          Each part of the disjunction is added for each call
          to ndb_serialize_cond and end of rewrite statement 
          is wrapped in end of ndb_serialize_cond
        */
        if (context->expecting(item->type()))
        {
          // This is the <field>|<const> item, save it in the rewrite context
          rewrite_context->left_hand_item= item;
          if (item->type() == Item::FUNC_ITEM)
6740
          {
6741 6742 6743
            Item_func *func_item= (Item_func *) item;
            if (func_item->functype() == Item_func::UNKNOWN_FUNC &&
                func_item->const_item())
6744
            {
6745 6746 6747
              // Skip any arguments since we will evaluate function instead
              DBUG_PRINT("info", ("Skip until end of arguments marker"));
              context->skip= func_item->argument_count();
6748 6749 6750
            }
            else
            {
6751 6752 6753 6754
              DBUG_PRINT("info", ("Found unsupported functional expression in BETWEEN|IN"));
              context->supported= FALSE;
              DBUG_VOID_RETURN;
              
6755 6756 6757
            }
          }
        }
6758 6759
        else
        {
6760 6761 6762
          // Non-supported BETWEEN|IN expression
          DBUG_PRINT("info", ("Found unexpected item of type %u in BETWEEN|IN",
                              item->type()));
6763
          context->supported= FALSE;
6764
          DBUG_VOID_RETURN;
6765
        }
6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787
        break;
      }
      default:
        context->supported= FALSE;
        break;
      }
      DBUG_VOID_RETURN;
    }
    else
    {
      Ndb_cond_stack *ndb_stack= context->stack_ptr;
      Ndb_cond *prev_cond= context->cond_ptr;
      Ndb_cond *curr_cond= context->cond_ptr= new Ndb_cond();
      if (!ndb_stack->ndb_cond)
        ndb_stack->ndb_cond= curr_cond;
      curr_cond->prev= prev_cond;
      if (prev_cond) prev_cond->next= curr_cond;
    // Check if we are rewriting some unsupported function call
      if (context->rewrite_stack)
      {
        Ndb_rewrite_context *rewrite_context= context->rewrite_stack;
        const Item_func *func_item= rewrite_context->func_item;
6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799
        switch (func_item->functype()) {
        case Item_func::BETWEEN:
        {
          /*
            Rewrite 
            <field>|<const> BETWEEN <const1>|<field1> AND <const2>|<field2>
            to <field>|<const> > <const1>|<field1> AND 
            <field>|<const> < <const2>|<field2>
            or actually in prefix format
            BEGIN(AND) GT(<field>|<const>, <const1>|<field1>), 
            LT(<field>|<const>, <const2>|<field2>), END()
          */
6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818
          if (rewrite_context->count == 2)
          {
            // Lower limit of BETWEEN
            DBUG_PRINT("info", ("GE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(Item_func::GE_FUNC, 2);
          }
          else if (rewrite_context->count == 3)
          {
            // Upper limit of BETWEEN
            DBUG_PRINT("info", ("LE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(Item_func::LE_FUNC, 2);
          }
          else
          {
            // Illegal BETWEEN expression
            DBUG_PRINT("info", ("Illegal BETWEEN expression"));
            context->supported= FALSE;
            DBUG_VOID_RETURN;
          }
6819 6820
          break;
        }
6821 6822
        case Item_func::IN_FUNC:
        {
6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835
          /*
            Rewrite <field>|<const> IN(<const1>|<field1>, <const2>|<field2>,..)
            to <field>|<const> = <const1>|<field1> OR 
            <field> = <const2>|<field2> ...
            or actually in prefix format
            BEGIN(OR) EQ(<field>|<const>, <const1><field1>), 
            EQ(<field>|<const>, <const2>|<field2>), ... END()
            Each part of the disjunction is added for each call
            to ndb_serialize_cond and end of rewrite statement 
            is wrapped in end of ndb_serialize_cond
          */
          DBUG_PRINT("info", ("EQ_FUNC"));      
          curr_cond->ndb_item= new Ndb_item(Item_func::EQ_FUNC, 2);
6836 6837
          break;
        }
6838 6839
        default:
          context->supported= FALSE;
6840
        }
6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870
        // Handle left hand <field>|<const>
        context->rewrite_stack= NULL; // Disable rewrite mode
        context->expect_only(Item::FIELD_ITEM);
        context->expect_field_result(STRING_RESULT);
        context->expect_field_result(REAL_RESULT);
        context->expect_field_result(INT_RESULT);
        context->expect_field_result(DECIMAL_RESULT);
        context->expect(Item::INT_ITEM);
        context->expect(Item::STRING_ITEM);
        context->expect(Item::VARBIN_ITEM);
        context->expect(Item::FUNC_ITEM);
        ndb_serialize_cond(rewrite_context->left_hand_item, arg);
        context->skip= 0; // Any FUNC_ITEM expression has already been parsed
        context->rewrite_stack= rewrite_context; // Enable rewrite mode
        if (!context->supported)
          DBUG_VOID_RETURN;

        prev_cond= context->cond_ptr;
        curr_cond= context->cond_ptr= new Ndb_cond();
        prev_cond->next= curr_cond;
      }
      
      // Check for end of AND/OR expression
      if (!item)
      {
        // End marker for condition group
        DBUG_PRINT("info", ("End of condition group"));
        curr_cond->ndb_item= new Ndb_item(NDB_END_COND);
      }
      else
6871 6872 6873 6874
      {
        switch (item->type()) {
        case Item::FIELD_ITEM:
        {
6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893
          Item_field *field_item= (Item_field *) item;
          Field *field= field_item->field;
          enum_field_types type= field->type();
          /*
            Check that the field is part of the table of the handler
            instance and that we expect a field with of this result type.
          */
          if (context->table == field->table)
          {       
            const NDBTAB *tab= (const NDBTAB *) context->ndb_table;
            DBUG_PRINT("info", ("FIELD_ITEM"));
            DBUG_PRINT("info", ("table %s", tab->getName()));
            DBUG_PRINT("info", ("column %s", field->field_name));
            DBUG_PRINT("info", ("result type %d", field->result_type()));
            
            // Check that we are expecting a field and with the correct
            // result type
            if (context->expecting(Item::FIELD_ITEM) &&
                (context->expecting_field_result(field->result_type()) ||
mskold@mysql.com's avatar
mskold@mysql.com committed
6894
                 // Date and year can be written as string or int
6895 6896 6897 6898
                 ((type == MYSQL_TYPE_TIME ||
                   type == MYSQL_TYPE_DATE || 
                   type == MYSQL_TYPE_YEAR ||
                   type == MYSQL_TYPE_DATETIME)
mskold@mysql.com's avatar
mskold@mysql.com committed
6899 6900 6901
                  ? (context->expecting_field_result(STRING_RESULT) ||
                     context->expecting_field_result(INT_RESULT))
                  : true)) &&
6902
                // Bit fields no yet supported in scan filter
6903 6904 6905
                type != MYSQL_TYPE_BIT &&
                // No BLOB support in scan filter
                type != MYSQL_TYPE_TINY_BLOB &&
6906 6907
                type != MYSQL_TYPE_MEDIUM_BLOB &&
                type != MYSQL_TYPE_LONG_BLOB &&
6908
                type != MYSQL_TYPE_BLOB)
6909 6910 6911 6912 6913 6914 6915
            {
              const NDBCOL *col= tab->getColumn(field->field_name);
              DBUG_ASSERT(col);
              curr_cond->ndb_item= new Ndb_item(field, col->getColumnNo());
              context->dont_expect(Item::FIELD_ITEM);
              context->expect_no_field_result();
              if (context->expect_mask)
6916
              {
6917 6918 6919 6920 6921 6922 6923 6924 6925 6926
                // We have not seen second argument yet
                if (type == MYSQL_TYPE_TIME ||
                    type == MYSQL_TYPE_DATE || 
                    type == MYSQL_TYPE_YEAR ||
                    type == MYSQL_TYPE_DATETIME)
                {
                  context->expect_only(Item::STRING_ITEM);
                  context->expect(Item::INT_ITEM);
                }
                else
6927 6928
                  switch (field->result_type()) {
                  case STRING_RESULT:
6929 6930 6931 6932 6933
                    // Expect char string or binary string
                    context->expect_only(Item::STRING_ITEM);
                    context->expect(Item::VARBIN_ITEM);
                    context->expect_collation(field_item->collation.collation);
                    break;
6934
                  case REAL_RESULT:
6935 6936
                    context->expect_only(Item::REAL_ITEM);
                    context->expect(Item::DECIMAL_ITEM);
6937
                    context->expect(Item::INT_ITEM);
6938
                    break;
6939
                  case INT_RESULT:
6940 6941 6942
                    context->expect_only(Item::INT_ITEM);
                    context->expect(Item::VARBIN_ITEM);
                    break;
6943
                  case DECIMAL_RESULT:
6944 6945
                    context->expect_only(Item::DECIMAL_ITEM);
                    context->expect(Item::REAL_ITEM);
6946
                    context->expect(Item::INT_ITEM);
6947 6948 6949 6950
                    break;
                  default:
                    break;
                  }    
6951 6952
              }
              else
6953 6954 6955 6956
              {
                // Expect another logical expression
                context->expect_only(Item::FUNC_ITEM);
                context->expect(Item::COND_ITEM);
6957 6958 6959 6960 6961 6962 6963
                // Check that field and string constant collations are the same
                if ((field->result_type() == STRING_RESULT) &&
                    !context->expecting_collation(item->collation.collation)
                    && type != MYSQL_TYPE_TIME
                    && type != MYSQL_TYPE_DATE
                    && type != MYSQL_TYPE_YEAR
                    && type != MYSQL_TYPE_DATETIME)
6964
                {
mskold@mysql.com's avatar
mskold@mysql.com committed
6965
                  DBUG_PRINT("info", ("Found non-matching collation %s",  
6966 6967
                                      item->collation.collation->name)); 
                  context->supported= FALSE;                
6968 6969
                }
              }
6970 6971
              break;
            }
6972 6973
            else
            {
mskold@mysql.com's avatar
mskold@mysql.com committed
6974 6975
              DBUG_PRINT("info", ("Was not expecting field of type %u(%u)",
                                  field->result_type(), type));
6976
              context->supported= FALSE;
6977
            }
6978
          }
6979
          else
6980 6981 6982 6983
          {
            DBUG_PRINT("info", ("Was not expecting field from table %s(%s)",
                                context->table->s->table_name, 
                                field->table->s->table_name));
6984
            context->supported= FALSE;
6985
          }
6986 6987
          break;
        }
6988 6989
        case Item::FUNC_ITEM:
        {
6990 6991 6992 6993 6994 6995
          Item_func *func_item= (Item_func *) item;
          // Check that we expect a function or functional expression here
          if (context->expecting(Item::FUNC_ITEM) || 
              func_item->functype() == Item_func::UNKNOWN_FUNC)
            context->expect_nothing();
          else
6996
          {
6997 6998 6999
            // Did not expect function here
            context->supported= FALSE;
            break;
7000
          }
7001
          
7002 7003 7004
          switch (func_item->functype()) {
          case Item_func::EQ_FUNC:
          {
7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018
            DBUG_PRINT("info", ("EQ_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(), 
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
7019
          }
7020 7021
          case Item_func::NE_FUNC:
          {
7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035
            DBUG_PRINT("info", ("NE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
7036
          }
7037 7038
          case Item_func::LT_FUNC:
          {
7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053
            DBUG_PRINT("info", ("LT_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
7054 7055
          case Item_func::LE_FUNC:
          {
7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070
            DBUG_PRINT("info", ("LE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
7071 7072
          case Item_func::GE_FUNC:
          {
7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087
            DBUG_PRINT("info", ("GE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
7088 7089
          case Item_func::GT_FUNC:
          {
7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104
            DBUG_PRINT("info", ("GT_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::REAL_ITEM);
            context->expect(Item::DECIMAL_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
7105 7106
          case Item_func::LIKE_FUNC:
          {
7107 7108 7109 7110 7111 7112 7113 7114 7115
            DBUG_PRINT("info", ("LIKE_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::STRING_ITEM);
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect(Item::FUNC_ITEM);
            break;
          }
7116 7117
          case Item_func::ISNULL_FUNC:
          {
7118 7119 7120 7121 7122 7123 7124 7125 7126 7127
            DBUG_PRINT("info", ("ISNULL_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);      
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
7128 7129
          case Item_func::ISNOTNULL_FUNC:
          {
7130 7131 7132 7133 7134 7135 7136 7137 7138 7139
            DBUG_PRINT("info", ("ISNOTNULL_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);     
            context->expect(Item::FIELD_ITEM);
            context->expect_field_result(STRING_RESULT);
            context->expect_field_result(REAL_RESULT);
            context->expect_field_result(INT_RESULT);
            context->expect_field_result(DECIMAL_RESULT);
            break;
          }
7140 7141
          case Item_func::NOT_FUNC:
          {
7142 7143 7144 7145
            DBUG_PRINT("info", ("NOT_FUNC"));      
            curr_cond->ndb_item= new Ndb_item(func_item->functype(),
                                              func_item);     
            context->expect(Item::FUNC_ITEM);
7146
            context->expect(Item::COND_ITEM);
7147
            break;
7148
          }
7149 7150
          case Item_func::BETWEEN:
          {
7151
            DBUG_PRINT("info", ("BETWEEN, rewriting using AND"));
7152
            Item_func_between *between_func= (Item_func_between *) func_item;
7153 7154 7155 7156
            Ndb_rewrite_context *rewrite_context= 
              new Ndb_rewrite_context(func_item);
            rewrite_context->next= context->rewrite_stack;
            context->rewrite_stack= rewrite_context;
7157 7158 7159 7160 7161 7162 7163 7164 7165
            if (between_func->negated)
            {
              DBUG_PRINT("info", ("NOT_FUNC"));
              curr_cond->ndb_item= new Ndb_item(Item_func::NOT_FUNC, 1);
              prev_cond= curr_cond;
              curr_cond= context->cond_ptr= new Ndb_cond();
              curr_cond->prev= prev_cond;
              prev_cond->next= curr_cond;
            }
7166
            DBUG_PRINT("info", ("COND_AND_FUNC"));
7167 7168 7169
            curr_cond->ndb_item= 
              new Ndb_item(Item_func::COND_AND_FUNC, 
                           func_item->argument_count() - 1);
7170
            context->expect_only(Item::FIELD_ITEM);
7171 7172 7173 7174 7175
            context->expect(Item::INT_ITEM);
            context->expect(Item::STRING_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FUNC_ITEM);
            break;
7176
          }
7177 7178
          case Item_func::IN_FUNC:
          {
7179
            DBUG_PRINT("info", ("IN_FUNC, rewriting using OR"));
7180
            Item_func_in *in_func= (Item_func_in *) func_item;
7181 7182 7183 7184
            Ndb_rewrite_context *rewrite_context= 
              new Ndb_rewrite_context(func_item);
            rewrite_context->next= context->rewrite_stack;
            context->rewrite_stack= rewrite_context;
7185 7186 7187 7188 7189 7190 7191 7192 7193
            if (in_func->negated)
            {
              DBUG_PRINT("info", ("NOT_FUNC"));
              curr_cond->ndb_item= new Ndb_item(Item_func::NOT_FUNC, 1);
              prev_cond= curr_cond;
              curr_cond= context->cond_ptr= new Ndb_cond();
              curr_cond->prev= prev_cond;
              prev_cond->next= curr_cond;
            }
7194 7195 7196 7197 7198 7199 7200 7201 7202
            DBUG_PRINT("info", ("COND_OR_FUNC"));
            curr_cond->ndb_item= new Ndb_item(Item_func::COND_OR_FUNC, 
                                              func_item->argument_count() - 1);
            context->expect_only(Item::FIELD_ITEM);
            context->expect(Item::INT_ITEM);
            context->expect(Item::STRING_ITEM);
            context->expect(Item::VARBIN_ITEM);
            context->expect(Item::FUNC_ITEM);
            break;
7203
          }
7204 7205
          case Item_func::UNKNOWN_FUNC:
          {
7206 7207 7208 7209
            DBUG_PRINT("info", ("UNKNOWN_FUNC %s", 
                                func_item->const_item()?"const":""));  
            DBUG_PRINT("info", ("result type %d", func_item->result_type()));
            if (func_item->const_item())
7210 7211 7212 7213
            {
              switch (func_item->result_type()) {
              case STRING_RESULT:
              {
7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::STRING_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item); 
                if (context->expect_field_result_mask)
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(STRING_RESULT);
                  context->expect_collation(func_item->collation.collation);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                  // Check that string result have correct collation
                  if (!context->expecting_collation(item->collation.collation))
                  {
                    DBUG_PRINT("info", ("Found non-matching collation %s",  
                                        item->collation.collation->name));
                    context->supported= FALSE;
                  }
                }
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
7242 7243
              case REAL_RESULT:
              {
7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::REAL_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
                if (context->expect_field_result_mask) 
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(REAL_RESULT);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                }
                
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
7265 7266
              case INT_RESULT:
              {
7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::INT_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
                if (context->expect_field_result_mask) 
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(INT_RESULT);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                }
                
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
7288 7289
              case DECIMAL_RESULT:
              {
7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312
                NDB_ITEM_QUALIFICATION q;
                q.value_type= Item::DECIMAL_ITEM;
                curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
                if (context->expect_field_result_mask) 
                {
                  // We have not seen the field argument yet
                  context->expect_only(Item::FIELD_ITEM);
                  context->expect_only_field_result(DECIMAL_RESULT);
                }
                else
                {
                  // Expect another logical expression
                  context->expect_only(Item::FUNC_ITEM);
                  context->expect(Item::COND_ITEM);
                }
                // Skip any arguments since we will evaluate function instead
                DBUG_PRINT("info", ("Skip until end of arguments marker"));
                context->skip= func_item->argument_count();
                break;
              }
              default:
                break;
              }
7313
            }
7314 7315 7316 7317 7318
            else
              // Function does not return constant expression
              context->supported= FALSE;
            break;
          }
7319 7320
          default:
          {
7321 7322 7323
            DBUG_PRINT("info", ("Found func_item of type %d", 
                                func_item->functype()));
            context->supported= FALSE;
7324
          }
7325 7326
          }
          break;
7327
        }
7328
        case Item::STRING_ITEM:
7329 7330 7331
          DBUG_PRINT("info", ("STRING_ITEM")); 
          if (context->expecting(Item::STRING_ITEM)) 
          {
7332
#ifndef DBUG_OFF
7333 7334 7335 7336 7337 7338
            char buff[256];
            String str(buff,(uint32) sizeof(buff), system_charset_info);
            str.length(0);
            Item_string *string_item= (Item_string *) item;
            DBUG_PRINT("info", ("value \"%s\"", 
                                string_item->val_str(&str)->ptr()));
7339
#endif
7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::STRING_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);      
            if (context->expect_field_result_mask)
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(STRING_RESULT);
              context->expect_collation(item->collation.collation);
            }
            else 
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
              // Check that we are comparing with a field with same collation
              if (!context->expecting_collation(item->collation.collation))
              {
                DBUG_PRINT("info", ("Found non-matching collation %s",  
                                    item->collation.collation->name));
                context->supported= FALSE;
              }
            }
          }
          else
            context->supported= FALSE;
          break;
7367
        case Item::INT_ITEM:
7368 7369
          DBUG_PRINT("info", ("INT_ITEM"));
          if (context->expecting(Item::INT_ITEM)) 
7370
          {
7371 7372 7373 7374 7375 7376 7377 7378 7379 7380
            Item_int *int_item= (Item_int *) item;      
            DBUG_PRINT("info", ("value %d", int_item->value));
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::INT_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
            if (context->expect_field_result_mask) 
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(INT_RESULT);
7381 7382
              context->expect_field_result(REAL_RESULT);
              context->expect_field_result(DECIMAL_RESULT);
7383 7384 7385 7386 7387 7388 7389
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
7390 7391
          }
          else
7392 7393
            context->supported= FALSE;
          break;
7394
        case Item::REAL_ITEM:
7395 7396
          DBUG_PRINT("info", ("REAL_ITEM %s"));
          if (context->expecting(Item::REAL_ITEM)) 
7397
          {
7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414
            Item_float *float_item= (Item_float *) item;      
            DBUG_PRINT("info", ("value %f", float_item->value));
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::REAL_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
            if (context->expect_field_result_mask) 
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(REAL_RESULT);
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
7415
          }
7416 7417 7418
          else
            context->supported= FALSE;
          break;
7419
        case Item::VARBIN_ITEM:
7420 7421
          DBUG_PRINT("info", ("VARBIN_ITEM"));
          if (context->expecting(Item::VARBIN_ITEM)) 
7422
          {
7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::VARBIN_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);      
            if (context->expect_field_result_mask)
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(STRING_RESULT);
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
7438 7439
          }
          else
7440 7441
            context->supported= FALSE;
          break;
7442
        case Item::DECIMAL_ITEM:
7443 7444
          DBUG_PRINT("info", ("DECIMAL_ITEM %s"));
          if (context->expecting(Item::DECIMAL_ITEM)) 
7445
          {
7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463
            Item_decimal *decimal_item= (Item_decimal *) item;      
            DBUG_PRINT("info", ("value %f", decimal_item->val_real()));
            NDB_ITEM_QUALIFICATION q;
            q.value_type= Item::DECIMAL_ITEM;
            curr_cond->ndb_item= new Ndb_item(NDB_VALUE, q, item);
            if (context->expect_field_result_mask) 
            {
              // We have not seen the field argument yet
              context->expect_only(Item::FIELD_ITEM);
              context->expect_only_field_result(REAL_RESULT);
              context->expect_field_result(DECIMAL_RESULT);
            }
            else
            {
              // Expect another logical expression
              context->expect_only(Item::FUNC_ITEM);
              context->expect(Item::COND_ITEM);
            }
7464
          }
7465 7466 7467
          else
            context->supported= FALSE;
          break;
7468 7469
        case Item::COND_ITEM:
        {
7470 7471 7472
          Item_cond *cond_item= (Item_cond *) item;
          
          if (context->expecting(Item::COND_ITEM))
7473 7474 7475
          {
            switch (cond_item->functype()) {
            case Item_func::COND_AND_FUNC:
7476 7477 7478 7479
              DBUG_PRINT("info", ("COND_AND_FUNC"));
              curr_cond->ndb_item= new Ndb_item(cond_item->functype(),
                                                cond_item);      
              break;
7480
            case Item_func::COND_OR_FUNC:
7481 7482 7483 7484 7485 7486 7487 7488 7489
              DBUG_PRINT("info", ("COND_OR_FUNC"));
              curr_cond->ndb_item= new Ndb_item(cond_item->functype(),
                                                cond_item);      
              break;
            default:
              DBUG_PRINT("info", ("COND_ITEM %d", cond_item->functype()));
              context->supported= FALSE;
              break;
            }
7490
          }
7491
          else
7492 7493
          {
            /* Did not expect condition */
7494
            context->supported= FALSE;          
7495
          }
7496
          break;
7497
        }
7498 7499
        default:
        {
7500
          DBUG_PRINT("info", ("Found item of type %d", item->type()));
7501
          context->supported= FALSE;
7502 7503
        }
        }
7504
      }
7505 7506 7507 7508 7509 7510 7511 7512 7513 7514
      if (context->supported && context->rewrite_stack)
      {
        Ndb_rewrite_context *rewrite_context= context->rewrite_stack;
        if (rewrite_context->count == 
            rewrite_context->func_item->argument_count())
        {
          // Rewrite is done, wrap an END() at the en
          DBUG_PRINT("info", ("End of condition group"));
          prev_cond= curr_cond;
          curr_cond= context->cond_ptr= new Ndb_cond();
7515
          curr_cond->prev= prev_cond;
7516 7517 7518
          prev_cond->next= curr_cond;
          curr_cond->ndb_item= new Ndb_item(NDB_END_COND);
          // Pop rewrite stack
7519 7520 7521
          context->rewrite_stack=  rewrite_context->next;
          rewrite_context->next= NULL;
          delete(rewrite_context);
7522
        }
7523
      }
7524
    }
7525
  }
7526
 
7527 7528 7529 7530 7531 7532 7533 7534
  DBUG_VOID_RETURN;
}

bool
ha_ndbcluster::serialize_cond(const COND *cond, Ndb_cond_stack *ndb_cond)
{
  DBUG_ENTER("serialize_cond");
  Item *item= (Item *) cond;
7535
  Ndb_cond_traverse_context context(table, (void *)m_table, ndb_cond);
7536 7537 7538
  // Expect a logical expression
  context.expect(Item::FUNC_ITEM);
  context.expect(Item::COND_ITEM);
7539
  item->traverse_cond(&ndb_serialize_cond, (void *) &context, Item::PREFIX);
7540
  DBUG_PRINT("info", ("The pushed condition is %ssupported", (context.supported)?"":"not "));
7541

7542
  DBUG_RETURN(context.supported);
7543 7544
}

7545 7546
int
ha_ndbcluster::build_scan_filter_predicate(Ndb_cond * &cond, 
7547 7548
                                           NdbScanFilter *filter,
                                           bool negated)
7549 7550
{
  DBUG_ENTER("build_scan_filter_predicate");  
7551 7552 7553
  switch (cond->ndb_item->type) {
  case NDB_FUNCTION:
  {
7554 7555 7556
    if (!cond->next)
      break;
    Ndb_item *a= cond->next->ndb_item;
7557
    Ndb_item *b, *field, *value= NULL;
7558 7559
    LINT_INIT(field);

7560 7561
    switch (cond->ndb_item->argument_count()) {
    case 1:
7562 7563 7564
      field= 
        (a->type == NDB_FIELD)? a : NULL;
      break;
7565
    case 2:
7566
      if (!cond->next->next)
7567
        break;
7568 7569
      b= cond->next->next->ndb_item;
      value= 
7570 7571 7572
        (a->type == NDB_VALUE)? a
        : (b->type == NDB_VALUE)? b
        : NULL;
7573
      field= 
7574 7575 7576
        (a->type == NDB_FIELD)? a
        : (b->type == NDB_FIELD)? b
        : NULL;
7577
      break;
7578
    default:
7579 7580
      break;
    }
7581 7582 7583
    switch ((negated) ? 
            Ndb_item::negate(cond->ndb_item->qualification.function_type)
            : cond->ndb_item->qualification.function_type) {
7584
    case NDB_EQ_FUNC:
7585
    {
7586
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7587 7588
      // Save value in right format for the field type
      value->save_in_field(field);
7589
      DBUG_PRINT("info", ("Generating EQ filter"));
7590
      if (filter->cmp(NdbScanFilter::COND_EQ, 
7591 7592 7593 7594
                      field->get_field_no(),
                      field->get_val(),
                      field->pack_length()) == -1)
        DBUG_RETURN(1);
7595 7596
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7597
    }
7598
    case NDB_NE_FUNC:
7599
    {
7600
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7601 7602
      // Save value in right format for the field type
      value->save_in_field(field);
7603
      DBUG_PRINT("info", ("Generating NE filter"));
7604
      if (filter->cmp(NdbScanFilter::COND_NE, 
7605 7606 7607 7608
                      field->get_field_no(),
                      field->get_val(),
                      field->pack_length()) == -1)
        DBUG_RETURN(1);
7609 7610
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7611
    }
7612
    case NDB_LT_FUNC:
7613
    {
7614
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7615 7616
      // Save value in right format for the field type
      value->save_in_field(field);
7617
      if (a == field)
7618
      {
7619 7620 7621 7622 7623 7624
        DBUG_PRINT("info", ("Generating LT filter")); 
        if (filter->cmp(NdbScanFilter::COND_LT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7625
      }
7626
      else
7627
      {
7628 7629 7630 7631 7632 7633
        DBUG_PRINT("info", ("Generating GT filter")); 
        if (filter->cmp(NdbScanFilter::COND_GT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7634
      }
7635 7636
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7637
    }
7638
    case NDB_LE_FUNC:
7639
    {
7640
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7641 7642
      // Save value in right format for the field type
      value->save_in_field(field);
7643
      if (a == field)
7644
      {
7645 7646 7647 7648 7649 7650
        DBUG_PRINT("info", ("Generating LE filter")); 
        if (filter->cmp(NdbScanFilter::COND_LE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);       
7651
      }
7652
      else
7653
      {
7654 7655 7656 7657 7658 7659
        DBUG_PRINT("info", ("Generating GE filter")); 
        if (filter->cmp(NdbScanFilter::COND_GE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7660
      }
7661 7662
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7663
    }
7664
    case NDB_GE_FUNC:
7665
    {
7666
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7667 7668
      // Save value in right format for the field type
      value->save_in_field(field);
7669
      if (a == field)
7670
      {
7671 7672 7673 7674 7675 7676
        DBUG_PRINT("info", ("Generating GE filter")); 
        if (filter->cmp(NdbScanFilter::COND_GE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7677
      }
7678
      else
7679
      {
7680 7681 7682 7683 7684 7685
        DBUG_PRINT("info", ("Generating LE filter")); 
        if (filter->cmp(NdbScanFilter::COND_LE, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7686
      }
7687 7688
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7689
    }
7690
    case NDB_GT_FUNC:
7691
    {
7692
      if (!value || !field) break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7693 7694
      // Save value in right format for the field type
      value->save_in_field(field);
7695
      if (a == field)
7696
      {
7697 7698 7699 7700 7701 7702
        DBUG_PRINT("info", ("Generating GT filter"));
        if (filter->cmp(NdbScanFilter::COND_GT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7703
      }
7704
      else
7705
      {
7706 7707 7708 7709 7710 7711
        DBUG_PRINT("info", ("Generating LT filter"));
        if (filter->cmp(NdbScanFilter::COND_LT, 
                        field->get_field_no(),
                        field->get_val(),
                        field->pack_length()) == -1)
          DBUG_RETURN(1);
7712
      }
7713 7714
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7715
    }
7716
    case NDB_LIKE_FUNC:
7717
    {
7718
      if (!value || !field) break;
7719 7720 7721
      if ((value->qualification.value_type != Item::STRING_ITEM) &&
          (value->qualification.value_type != Item::VARBIN_ITEM))
          break;
mskold@mysql.com's avatar
mskold@mysql.com committed
7722 7723 7724
      // Save value in right format for the field type
      value->save_in_field(field);
      DBUG_PRINT("info", ("Generating LIKE filter: like(%d,%s,%d)", 
7725 7726 7727 7728
                          field->get_field_no(), value->get_val(), 
                          value->pack_length()));
      if (filter->cmp(NdbScanFilter::COND_LIKE, 
                      field->get_field_no(),
7729 7730
                      value->get_val(),
                      value->pack_length()) == -1)
7731
        DBUG_RETURN(1);
7732 7733
      cond= cond->next->next->next;
      DBUG_RETURN(0);
7734
    }
7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754
    case NDB_NOTLIKE_FUNC:
    {
      if (!value || !field) break;
      if ((value->qualification.value_type != Item::STRING_ITEM) &&
          (value->qualification.value_type != Item::VARBIN_ITEM))
          break;
      // Save value in right format for the field type
      value->save_in_field(field);
      DBUG_PRINT("info", ("Generating NOTLIKE filter: notlike(%d,%s,%d)", 
                          field->get_field_no(), value->get_val(), 
                          value->pack_length()));
      if (filter->cmp(NdbScanFilter::COND_NOT_LIKE, 
                      field->get_field_no(),
                      value->get_val(),
                      value->pack_length()) == -1)
        DBUG_RETURN(1);
      cond= cond->next->next->next;
      DBUG_RETURN(0);
    }
    case NDB_ISNULL_FUNC:
7755 7756 7757 7758 7759
      if (!field)
        break;
      DBUG_PRINT("info", ("Generating ISNULL filter"));
      if (filter->isnull(field->get_field_no()) == -1)
        DBUG_RETURN(1);
7760 7761
      cond= cond->next->next;
      DBUG_RETURN(0);
7762
    case NDB_ISNOTNULL_FUNC:
7763
    {
7764 7765 7766 7767 7768
      if (!field)
        break;
      DBUG_PRINT("info", ("Generating ISNOTNULL filter"));
      if (filter->isnotnull(field->get_field_no()) == -1)
        DBUG_RETURN(1);         
7769 7770
      cond= cond->next->next;
      DBUG_RETURN(0);
7771 7772 7773 7774 7775 7776 7777 7778 7779 7780
    }
    default:
      break;
    }
    break;
  }
  default:
    break;
  }
  DBUG_PRINT("info", ("Found illegal condition"));
7781
  DBUG_RETURN(1);
7782 7783
}

7784
int
7785
ha_ndbcluster::build_scan_filter_group(Ndb_cond* &cond, NdbScanFilter *filter)
7786
{
7787
  uint level=0;
7788
  bool negated= FALSE;
7789
  DBUG_ENTER("build_scan_filter_group");
7790

7791 7792
  do
  {
7793 7794 7795 7796 7797 7798
    if (!cond)
      DBUG_RETURN(1);
    switch (cond->ndb_item->type) {
    case NDB_FUNCTION:
    {
      switch (cond->ndb_item->qualification.function_type) {
7799
      case NDB_COND_AND_FUNC:
7800
      {
7801 7802 7803 7804 7805
        level++;
        DBUG_PRINT("info", ("Generating %s group %u", (negated)?"NAND":"AND",
                            level));
        if ((negated) ? filter->begin(NdbScanFilter::NAND)
            : filter->begin(NdbScanFilter::AND) == -1)
7806
          DBUG_RETURN(1);
7807
        negated= FALSE;
7808 7809 7810
        cond= cond->next;
        break;
      }
7811
      case NDB_COND_OR_FUNC:
7812
      {
7813 7814 7815 7816 7817 7818
        level++;
        DBUG_PRINT("info", ("Generating %s group %u", (negated)?"NOR":"OR",
                            level));
        if ((negated) ? filter->begin(NdbScanFilter::NOR)
            : filter->begin(NdbScanFilter::OR) == -1)
          DBUG_RETURN(1);
7819
        negated= FALSE;
7820 7821 7822
        cond= cond->next;
        break;
      }
7823
      case NDB_NOT_FUNC:
7824
      {
7825
        DBUG_PRINT("info", ("Generating negated query"));
7826
        cond= cond->next;
7827
        negated= TRUE;
7828 7829 7830 7831
        break;
      }
      default:
        if (build_scan_filter_predicate(cond, filter, negated))
7832
          DBUG_RETURN(1);
7833
        negated= FALSE;
7834 7835 7836
        break;
      }
      break;
7837 7838
    }
    case NDB_END_COND:
7839 7840
      DBUG_PRINT("info", ("End of group %u", level));
      level--;
7841 7842
      if (cond) cond= cond->next;
      if (filter->end() == -1)
7843
        DBUG_RETURN(1);
7844 7845 7846
      if (!negated)
        break;
      // else fall through (NOT END is an illegal condition)
7847 7848
    default:
    {
7849
      DBUG_PRINT("info", ("Illegal scan filter"));
7850
    }
7851
    }
7852
  }  while (level > 0 || negated);
7853
  
7854
  DBUG_RETURN(0);
7855 7856
}

7857 7858
int
ha_ndbcluster::build_scan_filter(Ndb_cond * &cond, NdbScanFilter *filter)
7859 7860 7861 7862
{
  bool simple_cond= TRUE;
  DBUG_ENTER("build_scan_filter");  

7863 7864 7865
    switch (cond->ndb_item->type) {
    case NDB_FUNCTION:
      switch (cond->ndb_item->qualification.function_type) {
7866 7867
      case NDB_COND_AND_FUNC:
      case NDB_COND_OR_FUNC:
7868 7869 7870 7871 7872 7873 7874 7875 7876
        simple_cond= FALSE;
        break;
      default:
        break;
      }
      break;
    default:
      break;
    }
7877 7878 7879 7880 7881 7882
  if (simple_cond && filter->begin() == -1)
    DBUG_RETURN(1);
  if (build_scan_filter_group(cond, filter))
    DBUG_RETURN(1);
  if (simple_cond && filter->end() == -1)
    DBUG_RETURN(1);
7883

7884
  DBUG_RETURN(0);
7885 7886
}

7887
int
7888
ha_ndbcluster::generate_scan_filter(Ndb_cond_stack *ndb_cond_stack,
7889
                                    NdbScanOperation *op)
7890 7891 7892 7893
{
  DBUG_ENTER("generate_scan_filter");
  if (ndb_cond_stack)
  {
7894
    DBUG_PRINT("info", ("Generating scan filter"));
7895 7896 7897 7898 7899
    NdbScanFilter filter(op);
    bool multiple_cond= FALSE;
    // Wrap an AND group around multiple conditions
    if (ndb_cond_stack->next) {
      multiple_cond= TRUE;
7900
      if (filter.begin() == -1)
7901
        DBUG_RETURN(1); 
7902 7903
    }
    for (Ndb_cond_stack *stack= ndb_cond_stack; 
7904 7905
         (stack); 
         stack= stack->next)
7906
      {
7907
        Ndb_cond *cond= stack->ndb_cond;
7908

7909 7910 7911 7912 7913
        if (build_scan_filter(cond, &filter))
        {
          DBUG_PRINT("info", ("build_scan_filter failed"));
          DBUG_RETURN(1);
        }
7914
      }
7915 7916
    if (multiple_cond && filter.end() == -1)
      DBUG_RETURN(1);
7917 7918 7919 7920 7921 7922
  }
  else
  {  
    DBUG_PRINT("info", ("Empty stack"));
  }

7923
  DBUG_RETURN(0);
7924 7925
}

7926 7927 7928 7929 7930 7931 7932 7933 7934 7935
int
ndbcluster_show_status(THD* thd)
{
  Protocol *protocol= thd->protocol;
  
  DBUG_ENTER("ndbcluster_show_status");
  
  if (have_ndbcluster != SHOW_OPTION_YES) 
  {
    my_message(ER_NOT_SUPPORTED_YET,
7936 7937
	       "Cannot call SHOW NDBCLUSTER STATUS because skip-ndbcluster is "
               "defined",
7938 7939 7940 7941 7942 7943 7944 7945 7946 7947
	       MYF(0));
    DBUG_RETURN(TRUE);
  }
  
  List<Item> field_list;
  field_list.push_back(new Item_empty_string("free_list", 255));
  field_list.push_back(new Item_return_int("created", 10,MYSQL_TYPE_LONG));
  field_list.push_back(new Item_return_int("free", 10,MYSQL_TYPE_LONG));
  field_list.push_back(new Item_return_int("sizeof", 10,MYSQL_TYPE_LONG));

7948 7949
  if (protocol->send_fields(&field_list,
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
7950 7951
    DBUG_RETURN(TRUE);
  
7952
  if (get_thd_ndb(thd) && get_thd_ndb(thd)->ndb)
7953
  {
7954
    Ndb* ndb= (get_thd_ndb(thd))->ndb;
7955 7956
    Ndb::Free_list_usage tmp;
    tmp.m_name= 0;
7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973
    while (ndb->get_free_list_usage(&tmp))
    {
      protocol->prepare_for_resend();
      
      protocol->store(tmp.m_name, &my_charset_bin);
      protocol->store((uint)tmp.m_created);
      protocol->store((uint)tmp.m_free);
      protocol->store((uint)tmp.m_sizeof);
      if (protocol->write())
	DBUG_RETURN(TRUE);
    }
  }
  send_eof(thd);
  
  DBUG_RETURN(FALSE);
}

7974
#endif /* HAVE_NDBCLUSTER_DB */