ma_loghandler.c 250 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (C) 2007 MySQL AB & Sanja Belkin

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

16
#include "maria_def.h"
17
#include "trnman.h"
18 19
#include "ma_blockrec.h" /* for some constants and in-write hooks */
#include "ma_key_recover.h" /* For some in-write hooks */
20

21 22 23 24 25
/**
   @file
   @brief Module which writes and reads to a transaction log
*/

26
/* 0xFF can never be valid first byte of a chunk */
unknown's avatar
unknown committed
27 28
#define TRANSLOG_FILLER 0xFF

unknown's avatar
unknown committed
29
/* number of opened log files in the pagecache (should be at least 2) */
30
#define OPENED_FILES_NUM 3
unknown's avatar
unknown committed
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#define CACHED_FILES_NUM 5
#define CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT 7
#if CACHED_FILES_NUM > CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT
#include <hash.h>
#include <m_ctype.h>
#endif

/* transaction log file descriptor */
typedef struct st_translog_file
{
  uint32 number;
  PAGECACHE_FILE handler;
  my_bool was_recovered;
  my_bool is_sync;
} TRANSLOG_FILE;
46

unknown's avatar
unknown committed
47
/* records buffer size (should be TRANSLOG_PAGE_SIZE * n) */
48 49 50 51 52 53 54 55 56
#define TRANSLOG_WRITE_BUFFER (1024*1024)
/* min chunk length */
#define TRANSLOG_MIN_CHUNK 3
/*
  Number of buffers used by loghandler

  Should be at least 4, because one thread can block up to 2 buffers in
  normal circumstances (less then half of one and full other, or just
  switched one and other), But if we met end of the file in the middle and
unknown's avatar
unknown committed
57 58
  have to switch buffer it will be 3.  + 1 buffer for flushing/writing.
  We have a bigger number here for higher concurrency.
59 60
*/
#define TRANSLOG_BUFFERS_NO 5
unknown's avatar
unknown committed
61
/* number of bytes (+ header) which can be unused on first page in sequence */
62 63
#define TRANSLOG_MINCHUNK_CONTENT 1
/* version of log file */
unknown's avatar
unknown committed
64
#define TRANSLOG_VERSION_ID 10000               /* 1.00.00 */
65

unknown's avatar
unknown committed
66 67
#define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */

68 69 70
/* Maximum length of compressed LSNs (the worst case of whole LSN storing) */
#define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE)
#define MAX_NUMBER_OF_LSNS_PER_RECORD 2
unknown's avatar
unknown committed
71

unknown's avatar
unknown committed
72

73 74 75 76 77 78
/* log write buffer descriptor */
struct st_translog_buffer
{
  LSN last_lsn;
  /* This buffer offset in the file */
  TRANSLOG_ADDRESS offset;
79 80 81 82 83
  /*
    Next buffer offset in the file (it is not always offset + size,
    in case of flush by LSN it can be offset + size - TRANSLOG_PAGE_SIZE)
  */
  TRANSLOG_ADDRESS next_buffer_offset;
84
  /*
85
     How much is written (or will be written when copy_to_buffer_in_progress
86 87
     become 0) to this buffer
  */
unknown's avatar
unknown committed
88 89
  translog_size_t size;
  /* File handler for this buffer */
unknown's avatar
unknown committed
90
  TRANSLOG_FILE *file;
91
  /* Threads which are waiting for buffer filling/freeing */
unknown's avatar
unknown committed
92
  pthread_cond_t waiting_filling_buffer;
93
  /* Number of records which are in copy progress */
unknown's avatar
unknown committed
94
  uint copy_to_buffer_in_progress;
95 96
  /* list of waiting buffer ready threads */
  struct st_my_thread_var *waiting_flush;
97 98
  /*
    Pointer on the buffer which overlap with this one (due to flush of
unknown's avatar
unknown committed
99 100 101
    loghandler, the last page of that buffer is the same as the first page
    of this buffer) and have to be written first (because contain old
    content of page which present in both buffers)
102
  */
103 104
  struct st_translog_buffer *overlay;
#ifndef DBUG_OFF
unknown's avatar
unknown committed
105
  uint buffer_no;
106
#endif
107 108
  /*
    Lock for the buffer.
unknown's avatar
unknown committed
109

110
    Current buffer also lock the whole handler (if one want lock the handler
unknown's avatar
unknown committed
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
    one should lock the current buffer).

    Buffers are locked only in one direction (with overflow and beginning
    from the first buffer). If we keep lock on buffer N we can lock only
    buffer N+1 (never N-1).

    One thread do not lock more then 2 buffer in a time, so to make dead
    lock it should be N thread (where N equal number of buffers) takes one
    buffer and try to lock next. But it is impossible because there is only
    2 cases when thread take 2 buffers: 1) one thread finishes current
    buffer (where horizon is) and start next (to which horizon moves).  2)
    flush start from buffer after current (oldest) and go till the current
    crabbing by buffer sequence. And there is  only one flush in a moment
    (they are serialised).

   Because of above and number of buffers equal 5 we can't get dead lock (it is
   impossible to get all 5 buffers locked simultaneously).
128
  */
unknown's avatar
unknown committed
129
  pthread_mutex_t mutex;
130
  /* Cache for current log. */
unknown's avatar
unknown committed
131
  uchar buffer[TRANSLOG_WRITE_BUFFER];
132 133 134 135 136
};


struct st_buffer_cursor
{
137
  /* pointer into the buffer */
unknown's avatar
unknown committed
138
  uchar *ptr;
unknown's avatar
unknown committed
139 140
  /* current buffer */
  struct st_translog_buffer *buffer;
141
  /* How many bytes we wrote on the current page */
unknown's avatar
unknown committed
142
  uint16 current_page_fill;
143 144 145 146
  /*
    How many times we write the page on the disk during flushing process
    (for sector protection).
  */
147 148 149
  uint16 write_counter;
  /* previous write offset */
  uint16 previous_offset;
unknown's avatar
unknown committed
150
  /* Number of current buffer */
151
  uint8 buffer_no;
152 153 154 155 156 157 158 159 160 161
  /*
    True if it is just filling buffer after advancing the pointer to
    the horizon.
  */
  my_bool chaser;
  /*
    Is current page of the cursor already finished (sector protection
    should be applied if it is needed)
  */
  my_bool protected;
162 163 164 165 166 167 168
};


struct st_translog_descriptor
{
  /* *** Parameters of the log handler *** */

unknown's avatar
unknown committed
169 170 171
  /* Page cache for the log reads */
  PAGECACHE *pagecache;
  uint flags;
172 173
  /* File open flags */
  uint open_flags;
174 175 176
  /* max size of one log size (for new logs creation) */
  uint32 log_file_max_size;
  uint32 server_version;
177
  /* server ID (used for replication) */
178 179 180
  uint32 server_id;
  /* Loghandler's buffer capacity in case of chunk 2 filling */
  uint32 buffer_capacity_chunk_2;
181 182 183 184 185 186
  /*
    Half of the buffer capacity in case of chunk 2 filling,
    used to decide will we write a record in one group or many.
    It is written to the variable just to avoid devision every
    time we need it.
  */
187
  uint32 half_buffer_capacity_chunk_2;
188
  /* Page overhead calculated by flags (whether CRC is enabled, etc) */
unknown's avatar
unknown committed
189
  uint16 page_overhead;
190 191 192 193
  /*
    Page capacity ("useful load") calculated by flags
    (TRANSLOG_PAGE_SIZE - page_overhead-1)
  */
unknown's avatar
unknown committed
194
  uint16 page_capacity_chunk_2;
195
  /* Path to the directory where we store log store files */
unknown's avatar
unknown committed
196
  char directory[FN_REFLEN];
197 198

  /* *** Current state of the log handler *** */
unknown's avatar
unknown committed
199 200 201 202 203 204 205
  /* list of opened files */
  DYNAMIC_ARRAY open_files;
  /* min/max number of file in the array */
  uint32 max_file, min_file;
  /* the opened files list guard */
  rw_lock_t open_files_lock;

206 207 208 209
  /*
    File descriptor of the directory where we store log files for syncing
    it.
  */
unknown's avatar
unknown committed
210
  File directory_fd;
211 212 213 214 215 216 217 218 219
  /* buffers for log writing */
  struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
  /*
     horizon - visible end of the log (here is absolute end of the log:
     position where next chunk can start
  */
  TRANSLOG_ADDRESS horizon;
  /* horizon buffer cursor */
  struct st_buffer_cursor bc;
220 221
  /* maximum LSN of the current (not finished) file */
  LSN max_lsn;
222

223 224 225 226 227 228
  /*
    Last flushed LSN (protected by log_flush_lock).
    Pointers in the log ordered like this:
    last_lsn_checked <= flushed <= sent_to_disk <= in_buffers_only <=
    max_lsn <= horizon
  */
229
  LSN flushed;
230
  /* Last LSN sent to the disk (but maybe not written yet) */
231
  LSN sent_to_disk;
232
  TRANSLOG_ADDRESS previous_flush_horizon;
233
  /* All what is after this address is not sent to disk yet */
234
  TRANSLOG_ADDRESS in_buffers_only;
unknown's avatar
unknown committed
235
  /* protection of sent_to_file and in_buffers_only */
236
  pthread_mutex_t sent_to_disk_lock;
unknown's avatar
unknown committed
237 238 239 240
  /*
    Protect flushed (see above) and for flush serialization (will
    be removed in v1.5
  */
241
  pthread_mutex_t log_flush_lock;
242 243 244 245 246 247 248 249 250 251

  /* Protects changing of headers of finished files (max_lsn) */
  pthread_mutex_t file_header_lock;

  /*
    Sorted array (with protection) of files where we started writing process
    and so we can't give last LSN yet
  */
  pthread_mutex_t unfinished_files_lock;
  DYNAMIC_ARRAY unfinished_files;
252

253 254 255 256 257
  /*
    minimum number of still need file calculeted during last
    translog_purge call
  */
  uint32 min_need_file;
258 259 260 261 262 263
  /* Purger data: minimum file in the log (or 0 if unknown) */
  uint32 min_file_number;
  /* Protect purger from many calls and it's data */
  pthread_mutex_t purger_lock;
  /* last low water mark checked */
  LSN last_lsn_checked;
264 265 266 267
};

static struct st_translog_descriptor log_descriptor;

268 269
ulong log_purge_type= TRANSLOG_PURGE_IMMIDIATE;
ulong log_file_size= TRANSLOG_FILE_SIZE;
270 271
ulong sync_log_dir= TRANSLOG_SYNC_DIR_NEWFILE;

unknown's avatar
unknown committed
272
/* Marker for end of log */
unknown's avatar
unknown committed
273
static uchar end_of_log= 0;
unknown's avatar
unknown committed
274
#define END_OF_LOG &end_of_log
275

276
enum enum_translog_status translog_status= TRANSLOG_UNINITED;
277

278
/* chunk types */
unknown's avatar
unknown committed
279 280 281 282 283
#define TRANSLOG_CHUNK_LSN   0x00      /* 0 chunk refer as LSN (head or tail */
#define TRANSLOG_CHUNK_FIXED (1 << 6)  /* 1 (pseudo)fixed record (also LSN) */
#define TRANSLOG_CHUNK_NOHDR (2 << 6)  /* 2 no head chunk (till page end) */
#define TRANSLOG_CHUNK_LNGTH (3 << 6)  /* 3 chunk with chunk length */
#define TRANSLOG_CHUNK_TYPE  (3 << 6)  /* Mask to get chunk type */
284 285 286
#define TRANSLOG_REC_TYPE    0x3F               /* Mask to get record type */

/* compressed (relative) LSN constants */
unknown's avatar
unknown committed
287
#define TRANSLOG_CLSN_LEN_BITS 0xC0    /* Mask to get compressed LSN length */
288 289


unknown's avatar
unknown committed
290

291 292 293 294 295 296
#include <my_atomic.h>
/* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */
static MARIA_SHARE **id_to_share= NULL;
/* lock for id_to_share */
static my_atomic_rwlock_t LOCK_id_to_share;

unknown's avatar
unknown committed
297 298 299 300 301 302
static my_bool translog_dummy_callback(uchar *page,
                                       pgcache_page_no_t page_no,
                                       uchar* data_ptr);
static my_bool translog_page_validator(uchar *page,
                                       pgcache_page_no_t page_no,
                                       uchar* data_ptr);
303

304
static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner);
unknown's avatar
unknown committed
305 306
static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected);

307

308 309 310 311
/*
  Initialize log_record_type_descriptors
*/

unknown's avatar
unknown committed
312
LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
313

314 315

#ifndef DBUG_OFF
unknown's avatar
unknown committed
316 317

#define translog_buffer_lock_assert_owner(B) \
318
  safe_mutex_assert_owner(&(B)->mutex);
unknown's avatar
unknown committed
319 320 321 322 323
void translog_lock_assert_owner()
{
  translog_buffer_lock_assert_owner(log_descriptor.bc.buffer);
}

324 325 326 327 328 329 330 331 332 333 334 335 336 337
/**
  @brief check the description table validity

  @param num             how many records should be filled
*/

static void check_translog_description_table(int num)
{
  int i;
  DBUG_ENTER("check_translog_description_table");
  DBUG_PRINT("enter", ("last record: %d", num));
  DBUG_ASSERT(num > 0);
  /* last is reserved for extending the table */
  DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
unknown's avatar
unknown committed
338
  DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED);
339 340

  for (i= 0; i <= num; i++)
341 342 343 344
  {
    DBUG_PRINT("info",
               ("record type: %d  class: %d  fixed: %u  header: %u  LSNs: %u  "
                "name: %s",
unknown's avatar
unknown committed
345
                i, log_record_type_descriptor[i].rclass,
346 347 348 349
                (uint)log_record_type_descriptor[i].fixed_length,
                (uint)log_record_type_descriptor[i].read_header_len,
                (uint)log_record_type_descriptor[i].compressed_LSN,
                log_record_type_descriptor[i].name));
unknown's avatar
unknown committed
350
    switch (log_record_type_descriptor[i].rclass) {
351
    case LOGRECTYPE_NOT_ALLOWED:
352
      DBUG_ASSERT(i == 0);
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
      break;
    case LOGRECTYPE_VARIABLE_LENGTH:
      DBUG_ASSERT(log_record_type_descriptor[i].fixed_length == 0);
      DBUG_ASSERT((log_record_type_descriptor[i].compressed_LSN == 0) ||
                  ((log_record_type_descriptor[i].compressed_LSN == 1) &&
                   (log_record_type_descriptor[i].read_header_len >=
                    LSN_STORE_SIZE)) ||
                  ((log_record_type_descriptor[i].compressed_LSN == 2) &&
                   (log_record_type_descriptor[i].read_header_len >=
                    LSN_STORE_SIZE * 2)));
      break;
    case LOGRECTYPE_PSEUDOFIXEDLENGTH:
      DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
                  log_record_type_descriptor[i].read_header_len);
      DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN > 0);
      DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN <= 2);
      break;
    case LOGRECTYPE_FIXEDLENGTH:
      DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
                  log_record_type_descriptor[i].read_header_len);
      DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN == 0);
      break;
    default:
      DBUG_ASSERT(0);
    }
  }
  for (i= num + 1; i < LOGREC_NUMBER_OF_TYPES; i++)
  {
unknown's avatar
unknown committed
381 382
    DBUG_ASSERT(log_record_type_descriptor[i].rclass ==
                LOGRECTYPE_NOT_ALLOWED);
383 384 385
  }
  DBUG_VOID_RETURN;
}
unknown's avatar
unknown committed
386 387 388
#else
#define translog_buffer_lock_assert_owner(B)
#define translog_lock_assert_owner()
389 390
#endif

391
static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23=
unknown's avatar
unknown committed
392
{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
393
 "reserved", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL };
394 395 396

static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
397
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
unknown's avatar
unknown committed
398
 write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
399
 "redo_insert_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
400 401

static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL=
402 403
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
unknown's avatar
unknown committed
404
 write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
405
 "redo_insert_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
406

407 408
/* Use this entry next time we need to add a new entry */
static LOG_DESC INIT_LOGREC_REDO_NOT_USED=
unknown's avatar
unknown committed
409
{LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
410
 "redo_insert_row_blob", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
411 412

static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS=
unknown's avatar
unknown committed
413 414
{LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL,
 write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
415
 "redo_insert_row_blobs", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
416 417 418 419 420

static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD=
{LOGRECTYPE_FIXEDLENGTH,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
421
 NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
422
 "redo_purge_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
423 424 425 426 427

static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL=
{LOGRECTYPE_FIXEDLENGTH,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
428
 NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
429
 "redo_purge_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
430

431
static LOG_DESC INIT_LOGREC_REDO_FREE_BLOCKS=
432 433
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
unknown's avatar
unknown committed
434
 NULL, write_hook_for_redo, NULL, 0,
435 436 437 438 439 440 441 442
 "redo_free_blocks", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};

static LOG_DESC INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL=
{LOGRECTYPE_FIXEDLENGTH,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE,
 NULL, write_hook_for_redo, NULL, 0,
 "redo_free_head_or_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
443

unknown's avatar
unknown committed
444
/* not yet used; for when we have versioning */
445
static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW=
unknown's avatar
unknown committed
446
{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
447
 "redo_delete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
448

unknown's avatar
unknown committed
449
/** @todo RECOVERY BUG unused, remove? */
450
static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD=
unknown's avatar
unknown committed
451
{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
452
 "redo_update_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
453 454

static LOG_DESC INIT_LOGREC_REDO_INDEX=
unknown's avatar
unknown committed
455
{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
456
 "redo_index", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
457

458 459 460 461 462 463 464 465 466 467 468 469
static LOG_DESC INIT_LOGREC_REDO_INDEX_NEW_PAGE=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1,
 NULL, write_hook_for_redo, NULL, 0,
 "redo_index_new_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};

static LOG_DESC INIT_LOGREC_REDO_INDEX_FREE_PAGE=
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
 FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
 NULL, write_hook_for_redo, NULL, 0,
 "redo_index_free_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};

470
static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW=
unknown's avatar
unknown committed
471
{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
unknown's avatar
unknown committed
472
 "redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
473 474

static LOG_DESC INIT_LOGREC_CLR_END=
unknown's avatar
unknown committed
475 476
{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE +
 CLR_TYPE_STORE_SIZE, NULL, write_hook_for_clr_end, NULL, 1,
unknown's avatar
unknown committed
477
 "clr_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
478 479

static LOG_DESC INIT_LOGREC_PURGE_END=
unknown's avatar
unknown committed
480
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
481
 "purge_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
482 483

static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT=
unknown's avatar
unknown committed
484
{LOGRECTYPE_VARIABLE_LENGTH, 0,
485
 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
486
 NULL, write_hook_for_undo_row_insert, NULL, 1,
unknown's avatar
unknown committed
487
 "undo_row_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
488 489 490 491

static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
492
 NULL, write_hook_for_undo_row_delete, NULL, 1,
unknown's avatar
unknown committed
493
 "undo_row_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
494 495 496 497

static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
unknown's avatar
unknown committed
498
 NULL, write_hook_for_undo_row_update, NULL, 1,
unknown's avatar
unknown committed
499
 "undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL};
500 501

static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT=
502 503
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
504
 NULL, write_hook_for_undo_key_insert, NULL, 1,
unknown's avatar
unknown committed
505
 "undo_key_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
506

507 508 509 510 511 512 513
/* This will never be in the log, only in the clr */
static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
 NULL, write_hook_for_undo_key, NULL, 1,
 "undo_key_insert_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};

514
static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE=
515 516 517
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
 NULL, write_hook_for_undo_key, NULL, 1,
unknown's avatar
unknown committed
518
 "undo_key_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
519

520 521 522 523 524 525
static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT=
{LOGRECTYPE_VARIABLE_LENGTH, 0,
 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
 NULL, write_hook_for_undo_key, NULL, 1,
 "undo_key_delete_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};

526
static LOG_DESC INIT_LOGREC_PREPARE=
unknown's avatar
unknown committed
527
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
528
 "prepare", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
529 530

static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE=
531
{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
532
 "prepare_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
533 534

static LOG_DESC INIT_LOGREC_COMMIT=
unknown's avatar
unknown committed
535 536 537
{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
 NULL, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
 NULL};
538 539

static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
unknown's avatar
unknown committed
540
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
unknown's avatar
unknown committed
541
 "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
542

543
static LOG_DESC INIT_LOGREC_CHECKPOINT=
unknown's avatar
unknown committed
544
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
545
 "checkpoint", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
546 547

static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE=
unknown's avatar
unknown committed
548
{LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
549
"redo_create_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
550 551

static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE=
unknown's avatar
unknown committed
552
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
553
 "redo_rename_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
554 555

static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE=
556
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
557
 "redo_drop_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
558

559 560
static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL=
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
unknown's avatar
unknown committed
561
 NULL, write_hook_for_redo_delete_all, NULL, 0,
unknown's avatar
unknown committed
562
 "redo_delete_all", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
563 564

static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE=
unknown's avatar
unknown committed
565
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 8 + 8, FILEID_STORE_SIZE + 8 + 8,
unknown's avatar
unknown committed
566
 NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
567
 "redo_repair_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
568 569

static LOG_DESC INIT_LOGREC_FILE_ID=
unknown's avatar
unknown committed
570
{LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, write_hook_for_file_id, NULL, 0,
unknown's avatar
unknown committed
571
 "file_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
572 573

static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID=
unknown's avatar
unknown committed
574
{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
unknown's avatar
unknown committed
575
 "long_transaction_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
576

unknown's avatar
unknown committed
577 578 579 580 581
static LOG_DESC INIT_LOGREC_INCOMPLETE_LOG=
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
 NULL, NULL, NULL, 0,
 "incomplete_log", LOGREC_IS_GROUP_ITSELF, NULL, NULL};

unknown's avatar
unknown committed
582 583 584 585 586
static LOG_DESC INIT_LOGREC_INCOMPLETE_GROUP=
{LOGRECTYPE_FIXEDLENGTH, 0, 0,
 NULL, NULL, NULL, 0,
 "incomplete_group", LOGREC_IS_GROUP_ITSELF, NULL, NULL};

587
const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL;
588

589
void translog_table_init()
590
{
591
  int i;
592 593 594 595 596 597
  log_record_type_descriptor[LOGREC_RESERVED_FOR_CHUNKS23]=
    INIT_LOGREC_RESERVED_FOR_CHUNKS23;
  log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_HEAD]=
    INIT_LOGREC_REDO_INSERT_ROW_HEAD;
  log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_TAIL]=
    INIT_LOGREC_REDO_INSERT_ROW_TAIL;
598 599
  log_record_type_descriptor[LOGREC_REDO_NOT_USED]=
    INIT_LOGREC_REDO_NOT_USED;
600 601 602 603 604 605
  log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOBS]=
    INIT_LOGREC_REDO_INSERT_ROW_BLOBS;
  log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_HEAD]=
    INIT_LOGREC_REDO_PURGE_ROW_HEAD;
  log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_TAIL]=
    INIT_LOGREC_REDO_PURGE_ROW_TAIL;
606 607 608 609
  log_record_type_descriptor[LOGREC_REDO_FREE_BLOCKS]=
    INIT_LOGREC_REDO_FREE_BLOCKS;
  log_record_type_descriptor[LOGREC_REDO_FREE_HEAD_OR_TAIL]=
    INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL;
610 611 612 613 614 615
  log_record_type_descriptor[LOGREC_REDO_DELETE_ROW]=
    INIT_LOGREC_REDO_DELETE_ROW;
  log_record_type_descriptor[LOGREC_REDO_UPDATE_ROW_HEAD]=
    INIT_LOGREC_REDO_UPDATE_ROW_HEAD;
  log_record_type_descriptor[LOGREC_REDO_INDEX]=
    INIT_LOGREC_REDO_INDEX;
616 617 618 619
  log_record_type_descriptor[LOGREC_REDO_INDEX_NEW_PAGE]=
    INIT_LOGREC_REDO_INDEX_NEW_PAGE;
  log_record_type_descriptor[LOGREC_REDO_INDEX_FREE_PAGE]=
    INIT_LOGREC_REDO_INDEX_FREE_PAGE;
620 621 622 623 624 625 626 627 628 629 630 631 632 633
  log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]=
    INIT_LOGREC_REDO_UNDELETE_ROW;
  log_record_type_descriptor[LOGREC_CLR_END]=
    INIT_LOGREC_CLR_END;
  log_record_type_descriptor[LOGREC_PURGE_END]=
    INIT_LOGREC_PURGE_END;
  log_record_type_descriptor[LOGREC_UNDO_ROW_INSERT]=
    INIT_LOGREC_UNDO_ROW_INSERT;
  log_record_type_descriptor[LOGREC_UNDO_ROW_DELETE]=
    INIT_LOGREC_UNDO_ROW_DELETE;
  log_record_type_descriptor[LOGREC_UNDO_ROW_UPDATE]=
    INIT_LOGREC_UNDO_ROW_UPDATE;
  log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT]=
    INIT_LOGREC_UNDO_KEY_INSERT;
634 635
  log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT_WITH_ROOT]=
    INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT;
636 637
  log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]=
    INIT_LOGREC_UNDO_KEY_DELETE;
638 639
  log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE_WITH_ROOT]=
    INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
640 641 642 643 644 645 646 647
  log_record_type_descriptor[LOGREC_PREPARE]=
    INIT_LOGREC_PREPARE;
  log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]=
    INIT_LOGREC_PREPARE_WITH_UNDO_PURGE;
  log_record_type_descriptor[LOGREC_COMMIT]=
    INIT_LOGREC_COMMIT;
  log_record_type_descriptor[LOGREC_COMMIT_WITH_UNDO_PURGE]=
    INIT_LOGREC_COMMIT_WITH_UNDO_PURGE;
648 649
  log_record_type_descriptor[LOGREC_CHECKPOINT]=
    INIT_LOGREC_CHECKPOINT;
650 651 652 653 654 655
  log_record_type_descriptor[LOGREC_REDO_CREATE_TABLE]=
    INIT_LOGREC_REDO_CREATE_TABLE;
  log_record_type_descriptor[LOGREC_REDO_RENAME_TABLE]=
    INIT_LOGREC_REDO_RENAME_TABLE;
  log_record_type_descriptor[LOGREC_REDO_DROP_TABLE]=
    INIT_LOGREC_REDO_DROP_TABLE;
656 657 658 659
  log_record_type_descriptor[LOGREC_REDO_DELETE_ALL]=
    INIT_LOGREC_REDO_DELETE_ALL;
  log_record_type_descriptor[LOGREC_REDO_REPAIR_TABLE]=
    INIT_LOGREC_REDO_REPAIR_TABLE;
660 661 662 663
  log_record_type_descriptor[LOGREC_FILE_ID]=
    INIT_LOGREC_FILE_ID;
  log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]=
    INIT_LOGREC_LONG_TRANSACTION_ID;
unknown's avatar
unknown committed
664 665
  log_record_type_descriptor[LOGREC_INCOMPLETE_LOG]=
    INIT_LOGREC_INCOMPLETE_LOG;
unknown's avatar
unknown committed
666 667 668
  log_record_type_descriptor[LOGREC_INCOMPLETE_GROUP]=
    INIT_LOGREC_INCOMPLETE_GROUP;
  for (i= LOGREC_INCOMPLETE_GROUP + 1;
669 670
       i < LOGREC_NUMBER_OF_TYPES;
       i++)
unknown's avatar
unknown committed
671
    log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
unknown's avatar
unknown committed
672 673 674
#ifndef DBUG_OFF
  check_translog_description_table(LOGREC_INCOMPLETE_GROUP);
#endif
675 676
};

677

unknown's avatar
unknown committed
678 679
/* all possible flags page overheads */
static uint page_overhead[TRANSLOG_FLAGS_NUM];
680 681 682 683 684 685 686 687 688 689 690

typedef struct st_translog_validator_data
{
  TRANSLOG_ADDRESS *addr;
  my_bool was_recovered;
} TRANSLOG_VALIDATOR_DATA;


const char *maria_data_root;


unknown's avatar
unknown committed
691 692 693 694 695 696 697 698
/*
  Check cursor/buffer consistence

  SYNOPSIS
    translog_check_cursor
    cursor               cursor which will be checked
*/

699 700
static void translog_check_cursor(struct st_buffer_cursor *cursor
                                 __attribute__((unused)))
unknown's avatar
unknown committed
701 702 703 704 705 706 707 708 709 710
{
  DBUG_ASSERT(cursor->chaser ||
              ((ulong) (cursor->ptr - cursor->buffer->buffer) ==
               cursor->buffer->size));
  DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
  DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
              cursor->current_page_fill % TRANSLOG_PAGE_SIZE);
  DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
}

711 712 713 714 715 716 717

/**
  @brief switch the loghandler in read only mode in case of write error
*/

void translog_stop_writing()
{
unknown's avatar
unknown committed
718 719 720
  translog_status= (translog_status == TRANSLOG_SHUTDOWN ?
                    TRANSLOG_UNINITED :
                    TRANSLOG_READONLY);
721 722 723 724
  log_descriptor.open_flags= O_BINARY | O_RDONLY;
}


725
/*
726
  @brief Get file name of the log by log number
727

728 729
  @param file_no         Number of the log we want to open
  @param path            Pointer to buffer where file name will be
unknown's avatar
unknown committed
730
                         stored (must be FN_REFLEN bytes at least)
731 732

  @return pointer to path
733 734
*/

735
char *translog_filename_by_fileno(uint32 file_no, char *path)
736
{
unknown's avatar
unknown committed
737 738
  char buff[11], *end;
  uint length;
739
  DBUG_ENTER("translog_filename_by_fileno");
unknown's avatar
unknown committed
740
  DBUG_ASSERT(file_no <= 0xfffffff);
unknown's avatar
unknown committed
741 742 743 744

  /* log_descriptor.directory is already formated */
  end= strxmov(path, log_descriptor.directory, "maria_log.0000000", NullS);
  length= (uint) (int10_to_str(file_no, buff, 10) - buff);
745
  strmov(end - length +1, buff);
unknown's avatar
unknown committed
746

unknown's avatar
unknown committed
747
  DBUG_PRINT("info", ("Path: '%s'  path: 0x%lx", path, (ulong) path));
unknown's avatar
unknown committed
748
  DBUG_RETURN(path);
749 750 751
}


752 753
/**
  @brief Create log file with given number without cache
754

755
  @param file_no         Number of the log we want to open
756

757 758 759 760 761 762 763 764 765 766
  retval -1  error
  retval # file descriptor number
*/

static File create_logfile_by_number_no_cache(uint32 file_no)
{
  File file;
  char path[FN_REFLEN];
  DBUG_ENTER("create_logfile_by_number_no_cache");

767
  if (translog_status != TRANSLOG_OK)
unknown's avatar
unknown committed
768
     DBUG_RETURN(-1);
769

770 771 772 773
  /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
  if ((file= my_create(translog_filename_by_fileno(file_no, path),
                       0, O_BINARY | O_RDWR, MYF(MY_WME))) < 0)
  {
unknown's avatar
unknown committed
774 775
    DBUG_PRINT("error", ("Error %d during creating file '%s'", errno, path));
    translog_stop_writing();
776 777 778 779 780
    DBUG_RETURN(-1);
  }
  if (sync_log_dir >= TRANSLOG_SYNC_DIR_NEWFILE &&
      my_sync(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD)))
  {
unknown's avatar
unknown committed
781
    DBUG_PRINT("error", ("Error %d during syncing directory '%s'",
782
                         errno, log_descriptor.directory));
unknown's avatar
unknown committed
783
    translog_stop_writing();
784 785 786 787 788 789 790 791 792 793 794 795 796
    DBUG_RETURN(-1);
  }
  DBUG_PRINT("info", ("File: '%s'  handler: %d", path, file));
  DBUG_RETURN(file);
}

/**
  @brief Open (not create) log file with given number without cache

  @param file_no         Number of the log we want to open

  retval -1  error
  retval # file descriptor number
797 798
*/

799
static File open_logfile_by_number_no_cache(uint32 file_no)
800 801 802 803 804
{
  File file;
  char path[FN_REFLEN];
  DBUG_ENTER("open_logfile_by_number_no_cache");

805
  /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
806
  /* TODO: use my_create() */
unknown's avatar
unknown committed
807
  if ((file= my_open(translog_filename_by_fileno(file_no, path),
808
                     log_descriptor.open_flags,
809 810
                     MYF(MY_WME))) < 0)
  {
811
    DBUG_PRINT("error", ("Error %d during opening file '%s'", errno, path));
unknown's avatar
unknown committed
812
    DBUG_RETURN(-1);
813
  }
unknown's avatar
unknown committed
814
  DBUG_PRINT("info", ("File: '%s'  handler: %d", path, file));
815 816 817 818
  DBUG_RETURN(file);
}


unknown's avatar
unknown committed
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865
/**
  @brief get file descriptor by given number using cache

  @param file_no         Number of the log we want to open

  retval # file descriptor
*/

static TRANSLOG_FILE *get_logfile_by_number(uint32 file_no)
{
  TRANSLOG_FILE *file;
  DBUG_ENTER("get_logfile_by_number");
  rw_rdlock(&log_descriptor.open_files_lock);
  DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
              log_descriptor.open_files.elements);
  DBUG_ASSERT(log_descriptor.max_file >= file_no);
  DBUG_ASSERT(log_descriptor.min_file <= file_no);
  DBUG_ASSERT(log_descriptor.max_file - file_no <
              log_descriptor.open_files.elements);
  file= *dynamic_element(&log_descriptor.open_files,
                         log_descriptor.max_file - file_no, TRANSLOG_FILE **);
  rw_unlock(&log_descriptor.open_files_lock);
  DBUG_PRINT("info", ("File 0x%lx File no: %lu, File handler: %d",
                      (ulong)file, (ulong)file_no,
                      (file ? file->handler.file : -1)));
  DBUG_ASSERT(!file || file->number == file_no);
  DBUG_RETURN(file);
}


/**
  @brief get current file descriptor

  retval # file descriptor
*/

static TRANSLOG_FILE *get_current_logfile()
{
  TRANSLOG_FILE *file;
  rw_rdlock(&log_descriptor.open_files_lock);
  DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
              log_descriptor.open_files.elements);
  file= *dynamic_element(&log_descriptor.open_files, 0, TRANSLOG_FILE **);
  rw_unlock(&log_descriptor.open_files_lock);
  return (file);
}

866 867 868 869 870 871 872 873
uchar	NEAR maria_trans_file_magic[]=
{ (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A',
 'L', 'O', 'G' };
#define LOG_HEADER_DATA_SIZE (sizeof(maria_trans_file_magic) + \
                              8 + 4 + 4 + 4 + 2 + 3 + \
                              LSN_STORE_SIZE)


874 875 876 877 878 879
/*
  Write log file page header in the just opened new log file

  SYNOPSIS
    translog_write_file_header();

unknown's avatar
unknown committed
880 881 882
   NOTES
    First page is just a marker page; We don't store any real log data in it.

883 884 885 886 887
  RETURN
    0 OK
    1 ERROR
*/

888
static my_bool translog_write_file_header()
889
{
unknown's avatar
unknown committed
890
  TRANSLOG_FILE *file;
891
  ulonglong timestamp;
unknown's avatar
unknown committed
892
  uchar page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff;
unknown's avatar
unknown committed
893
  my_bool rc;
894 895 896
  DBUG_ENTER("translog_write_file_header");

  /* file tag */
unknown's avatar
unknown committed
897 898
  memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
  page+= sizeof(maria_trans_file_magic);
899 900
  /* timestamp */
  timestamp= my_getsystime();
unknown's avatar
unknown committed
901 902
  int8store(page, timestamp);
  page+= 8;
903
  /* maria version */
unknown's avatar
unknown committed
904 905
  int4store(page, TRANSLOG_VERSION_ID);
  page+= 4;
906
  /* mysql version (MYSQL_VERSION_ID) */
unknown's avatar
unknown committed
907 908
  int4store(page, log_descriptor.server_version);
  page+= 4;
909
  /* server ID */
unknown's avatar
unknown committed
910 911 912 913 914
  int4store(page, log_descriptor.server_id);
  page+= 4;
  /* loghandler page_size/DISK_DRIVE_SECTOR_SIZE */
  int2store(page, TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE);
  page+= 2;
915
  /* file number */
unknown's avatar
unknown committed
916 917
  int3store(page, LSN_FILE_NO(log_descriptor.horizon));
  page+= 3;
unknown's avatar
unknown committed
918
  lsn_store(page, LSN_IMPOSSIBLE);
919
  page+= LSN_STORE_SIZE;
unknown's avatar
unknown committed
920
  memset(page, TRANSLOG_FILLER, sizeof(page_buff) - (page- page_buff));
921

unknown's avatar
unknown committed
922 923 924 925 926 927 928 929 930 931
  file= get_current_logfile();
  rc= my_pwrite(file->handler.file, page_buff, sizeof(page_buff), 0,
                log_write_flags) != 0;
  /*
    Dropping the flag in such way can make false alarm: signalling than the
    file in not sync when it is sync, but the situation is quite rare and
    protections with mutexes give much more overhead to the whole engine
  */
  file->is_sync= 0;
  DBUG_RETURN(rc);
932 933
}

934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950
/*
  @brief write the new LSN on the given file header

  @param file            The file descriptor
  @param lsn             That LSN which should be written

  @retval 0 OK
  @retval 1 Error
*/

static my_bool translog_max_lsn_to_header(File file, LSN lsn)
{
  uchar lsn_buff[LSN_STORE_SIZE];
  DBUG_ENTER("translog_max_lsn_to_header");
  DBUG_PRINT("enter", ("File descriptor: %ld  "
                       "lsn: (%lu,0x%lx)",
                       (long) file,
unknown's avatar
unknown committed
951
                       LSN_IN_PARTS(lsn)));
952 953 954 955 956

  lsn_store(lsn_buff, lsn);

  DBUG_RETURN(my_pwrite(file, lsn_buff,
                        LSN_STORE_SIZE,
957
                        (LOG_HEADER_DATA_SIZE - LSN_STORE_SIZE),
958 959 960 961
                        log_write_flags) != 0 ||
              my_sync(file, MYF(MY_WME)) != 0);
}

962

963 964 965 966 967 968
/*
  Information from transaction log file header
*/

typedef struct st_loghandler_file_info
{
969
  /*
970 971 972
    LSN_IMPOSSIBLE for current file (not finished file).
    Maximum LSN of the record which parts stored in the
    file.
973 974
  */
  LSN max_lsn;
975 976
  ulonglong timestamp;   /* Time stamp */
  ulong maria_version;   /* Version of maria loghandler */
977
  ulong mysql_version;   /* Version of mysql server */
978 979 980 981 982 983
  ulong server_id;       /* Server ID */
  uint page_size;        /* Loghandler page size */
  uint file_number;      /* Number of the file (from the file header) */
} LOGHANDLER_FILE_INFO;

/*
984
  @brief Read hander file information from loghandler file
985 986

  @param desc header information descriptor to be filled with information
987
  @param file file descriptor to read
988 989 990 991 992

  @retval 0 OK
  @retval 1 Error
*/

993
my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc, File file)
994
{
995
  uchar page_buff[LOG_HEADER_DATA_SIZE], *ptr;
996 997
  DBUG_ENTER("translog_read_file_header");

998
  if (my_pread(file, page_buff,
999 1000 1001 1002 1003 1004 1005 1006 1007 1008
               sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME)))
  {
    DBUG_PRINT("info", ("log read fail error: %d", my_errno));
    DBUG_RETURN(1);
  }
  ptr= page_buff + sizeof(maria_trans_file_magic);
  desc->timestamp= uint8korr(ptr);
  ptr+= 8;
  desc->maria_version= uint4korr(ptr);
  ptr+= 4;
1009
  desc->mysql_version= uint4korr(ptr);
1010 1011
  ptr+= 4;
  desc->server_id= uint4korr(ptr);
1012
  ptr+= 4;
1013 1014 1015
  desc->page_size= uint2korr(ptr);
  ptr+= 2;
  desc->file_number= uint3korr(ptr);
1016 1017
  ptr+=3;
  desc->max_lsn= lsn_korr(ptr);
unknown's avatar
unknown committed
1018 1019 1020 1021 1022 1023 1024 1025 1026
  DBUG_PRINT("info", ("timestamp: %llu  maria ver: %lu mysql ver: %lu  "
                      "server id %lu page size %u file number %lu  "
                      "max lsn: (%lu,0x%lx)",
                      (ulonglong) desc->timestamp,
                      (ulong) desc->maria_version,
                      (ulong) desc->mysql_version,
                      (ulong) desc->server_id,
                      desc->page_size, (ulong) desc->file_number,
                      LSN_IN_PARTS(desc->max_lsn)));
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
  DBUG_RETURN(0);
}


/*
  @brief set the lsn to the files from_file - to_file if it is greater
  then written in the file

  @param from_file       first file number (min)
  @param to_file         last file number (max)
  @param lsn             the lsn for writing
  @param is_locked       true if current thread locked the log handler

  @retval 0 OK
  @retval 1 Error
*/

1044
static my_bool translog_set_lsn_for_files(uint32 from_file, uint32 to_file,
1045 1046
                                          LSN lsn, my_bool is_locked)
{
1047
  uint32 file;
1048 1049
  DBUG_ENTER("translog_set_lsn_for_files");
  DBUG_PRINT("enter", ("From: %lu  to: %lu  lsn: (%lu,0x%lx)  locked: %d",
1050
                       (ulong) from_file, (ulong) to_file,
unknown's avatar
unknown committed
1051
                       LSN_IN_PARTS(lsn),
1052 1053 1054 1055 1056 1057 1058
                       is_locked));
  DBUG_ASSERT(from_file <= to_file);
  DBUG_ASSERT(from_file > 0); /* we have not file 0 */

  /* Checks the current file (not finished yet file) */
  if (!is_locked)
    translog_lock();
1059
  if (to_file == (uint32) LSN_FILE_NO(log_descriptor.horizon))
1060 1061 1062 1063 1064 1065 1066 1067 1068
  {
    if (likely(cmp_translog_addr(lsn, log_descriptor.max_lsn) > 0))
      log_descriptor.max_lsn= lsn;
    to_file--;
  }
  if (!is_locked)
    translog_unlock();

  /* Checks finished files if they are */
1069
  pthread_mutex_lock(&log_descriptor.file_header_lock);
1070 1071 1072 1073 1074 1075 1076 1077
  for (file= from_file; file <= to_file; file++)
  {
    LOGHANDLER_FILE_INFO info;
    File fd= open_logfile_by_number_no_cache(file);
    if (fd < 0 ||
        translog_read_file_header(&info, fd) ||
        (cmp_translog_addr(lsn, info.max_lsn) > 0 &&
         translog_max_lsn_to_header(fd, lsn)))
1078 1079
    {
      translog_stop_writing();
1080
      DBUG_RETURN(1);
1081
    }
1082
  }
1083
  pthread_mutex_unlock(&log_descriptor.file_header_lock);
1084

1085 1086 1087 1088
  DBUG_RETURN(0);
}


1089 1090 1091
/* descriptor of file in unfinished_files */
struct st_file_counter
{
1092 1093
  uint32 file;            /* file number */
  uint32 counter;         /* counter for started writes */
1094 1095 1096 1097 1098 1099 1100 1101 1102
};


/*
  @brief mark file "in progress" (for multi-group records)

  @param file            log file number
*/

1103
static void translog_mark_file_unfinished(uint32 file)
1104 1105 1106 1107 1108 1109
{
  int place, i;
  struct st_file_counter fc, *fc_ptr;
  fc.file= file; fc.counter= 1;

  DBUG_ENTER("translog_mark_file_unfinished");
1110
  DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1111

1112
  pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
1113 1114 1115 1116 1117 1118 1119 1120

  if (log_descriptor.unfinished_files.elements == 0)
  {
    insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
    DBUG_PRINT("info", ("The first element inserted"));
    goto end;
  }

1121
  for (place= log_descriptor.unfinished_files.elements - 1;
1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162
       place >= 0;
       place--)
  {
    fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
                            place, struct st_file_counter *);
    if (fc_ptr->file <= file)
      break;
  }

  if (place >= 0 && fc_ptr->file == file)
  {
     fc_ptr->counter++;
     DBUG_PRINT("info", ("counter increased"));
     goto end;
  }

  if (place == (int)log_descriptor.unfinished_files.elements)
  {
    insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
    DBUG_PRINT("info", ("The last element inserted"));
    goto end;
  }
  /* shift and assign new element */
  insert_dynamic(&log_descriptor.unfinished_files,
                 (uchar*)
                 dynamic_element(&log_descriptor.unfinished_files,
                                 log_descriptor.unfinished_files.elements- 1,
                                 struct st_file_counter *));
  for(i= log_descriptor.unfinished_files.elements - 1; i > place; i--)
  {
    /* we do not use set_dynamic() to avoid unneeded checks */
    memcpy(dynamic_element(&log_descriptor.unfinished_files,
                           i, struct st_file_counter *),
           dynamic_element(&log_descriptor.unfinished_files,
                           i + 1, struct st_file_counter *),
           sizeof(struct st_file_counter));
  }
  memcpy(dynamic_element(&log_descriptor.unfinished_files,
                         place + 1, struct st_file_counter *),
         &fc, sizeof(struct st_file_counter));
end:
1163
  pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
  DBUG_VOID_RETURN;
}



/*
  @brief remove file mark "in progress" (for multi-group records)

  @param file            log file number
*/

1175
static void translog_mark_file_finished(uint32 file)
1176 1177 1178 1179
{
  int i;
  struct st_file_counter *fc_ptr;
  DBUG_ENTER("translog_mark_file_finished");
1180
  DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1181

unknown's avatar
unknown committed
1182 1183
  LINT_INIT(fc_ptr);

1184
  pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201

  DBUG_ASSERT(log_descriptor.unfinished_files.elements > 0);
  for (i= 0;
       i < (int) log_descriptor.unfinished_files.elements;
       i++)
  {
    fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
                            i, struct st_file_counter *);
    if (fc_ptr->file == file)
    {
      break;
    }
  }
  DBUG_ASSERT(i < (int) log_descriptor.unfinished_files.elements);

  if (! --fc_ptr->counter)
    delete_dynamic_element(&log_descriptor.unfinished_files, i);
1202
  pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217
  DBUG_VOID_RETURN;
}


/*
  @brief get max LSN of the record which parts stored in this file

  @param file            file number

  @return requested LSN or LSN_IMPOSSIBLE/LSN_ERROR
    @retval LSN_IMPOSSIBLE File is still not finished
    @retval LSN_ERROR Error opening file
    @retval # LSN of the record which parts stored in this file
*/

1218
LSN translog_get_file_max_lsn_stored(uint32 file)
1219
{
1220
  uint32 limit= FILENO_IMPOSSIBLE;
1221
  DBUG_ENTER("translog_get_file_max_lsn_stored");
1222
  DBUG_PRINT("enter", ("file: %lu", (ulong)file));
1223 1224
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
1225

1226
  pthread_mutex_lock(&log_descriptor.unfinished_files_lock);
1227 1228 1229 1230 1231 1232 1233 1234 1235

  /* find file with minimum file number "in progress" */
  if (log_descriptor.unfinished_files.elements > 0)
  {
    struct st_file_counter *fc_ptr;
    fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
                            0, struct st_file_counter *);
    limit= fc_ptr->file; /* minimal file number "in progress" */
  }
1236
  pthread_mutex_unlock(&log_descriptor.unfinished_files_lock);
1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262

  /*
    if there is no "in progress file" then unfinished file is in progress
    for sure
  */
  if (limit == FILENO_IMPOSSIBLE)
  {
    TRANSLOG_ADDRESS horizon= translog_get_horizon();
    limit= LSN_FILE_NO(horizon);
  }

  if (file >= limit)
  {
    DBUG_PRINT("info", ("The file in in progress"));
    DBUG_RETURN(LSN_IMPOSSIBLE);
  }

  {
    LOGHANDLER_FILE_INFO info;
    File fd= open_logfile_by_number_no_cache(file);
    if (fd < 0 ||
        translog_read_file_header(&info, fd))
    {
      DBUG_PRINT("error", ("Can't read file header"));
      DBUG_RETURN(LSN_ERROR);
    }
unknown's avatar
unknown committed
1263
    DBUG_PRINT("info", ("Max lsn: (%lu,0x%lx)",
unknown's avatar
unknown committed
1264
                         LSN_IN_PARTS(info.max_lsn)));
1265 1266 1267 1268
    DBUG_RETURN(info.max_lsn);
  }
}

1269 1270 1271 1272 1273 1274 1275 1276
/*
  Initialize transaction log file buffer

  SYNOPSIS
    translog_buffer_init()
    buffer               The buffer to initialize

  RETURN
unknown's avatar
unknown committed
1277 1278
    0  OK
    1  Error
1279 1280
*/

1281
static my_bool translog_buffer_init(struct st_translog_buffer *buffer)
1282 1283
{
  DBUG_ENTER("translog_buffer_init");
unknown's avatar
unknown committed
1284
  buffer->last_lsn= LSN_IMPOSSIBLE;
1285
  /* This Buffer File */
unknown's avatar
unknown committed
1286
  buffer->file= NULL;
1287
  buffer->overlay= 0;
1288
  /* cache for current log */
unknown's avatar
unknown committed
1289
  memset(buffer->buffer, TRANSLOG_FILLER, TRANSLOG_WRITE_BUFFER);
1290 1291 1292
  /* Buffer size */
  buffer->size= 0;
  /* cond of thread which is waiting for buffer filling */
unknown's avatar
unknown committed
1293 1294
  if (pthread_cond_init(&buffer->waiting_filling_buffer, 0))
    DBUG_RETURN(1);
1295
  /* Number of records which are in copy progress */
1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306
  buffer->copy_to_buffer_in_progress= 0;
  /* list of waiting buffer ready threads */
  buffer->waiting_flush= 0;
  /* lock for the buffer. Current buffer also lock the handler */
  if (pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST))
    DBUG_RETURN(1);
  DBUG_RETURN(0);
}


/*
unknown's avatar
unknown committed
1307
  @brief close transaction log file by descriptor
1308

unknown's avatar
unknown committed
1309
  @param file            pagegecache file descriptor reference
1310

unknown's avatar
unknown committed
1311 1312 1313
  @return Operation status
    @retval 0  OK
    @retval 1  Error
1314 1315
*/

unknown's avatar
unknown committed
1316
static my_bool translog_close_log_file(TRANSLOG_FILE *file)
1317
{
unknown's avatar
unknown committed
1318 1319 1320
  int rc= 0;
  flush_pagecache_blocks(log_descriptor.pagecache, &file->handler,
                         FLUSH_RELEASE);
unknown's avatar
unknown committed
1321 1322 1323 1324
  /*
    Sync file when we close it
    TODO: sync only we have changed the log
  */
unknown's avatar
unknown committed
1325 1326 1327 1328
  if (!file->is_sync)
    rc= my_sync(file->handler.file, MYF(MY_WME));
  rc|= my_close(file->handler.file, MYF(MY_WME));
  my_free(file, MYF(0));
unknown's avatar
unknown committed
1329
  return test(rc);
1330 1331 1332
}


unknown's avatar
unknown committed
1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343
/**
  @brief Dummy function for write failure (the log to not use
  pagecache writing)
*/

void translog_dummy_write_failure(uchar *data __attribute__((unused)))
{
  return;
}


unknown's avatar
unknown committed
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355
/**
  @brief Initializes TRANSLOG_FILE structure

  @param file            reference on the file to initialize
  @param number          file number
  @param is_sync         is file synced on disk
*/

static void translog_file_init(TRANSLOG_FILE *file, uint32 number,
                               my_bool is_sync)
{
  pagecache_file_init(file->handler, &translog_page_validator,
unknown's avatar
unknown committed
1356 1357
                      &translog_dummy_callback,
                      &translog_dummy_write_failure, file);
unknown's avatar
unknown committed
1358 1359 1360 1361 1362 1363
  file->number= number;
  file->was_recovered= 0;
  file->is_sync= is_sync;
}


1364 1365
/**
  @brief Create and fill header of new file.
1366

1367 1368 1369
  @note the caller must call it right after it has increased
   log_descriptor.horizon to the new file
   (log_descriptor.horizon+= LSN_ONE_FILE)
1370

1371 1372 1373

  @retval 0 OK
  @retval 1 Error
1374 1375
*/

1376
static my_bool translog_create_new_file()
1377
{
unknown's avatar
unknown committed
1378 1379 1380 1381
  TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(sizeof(TRANSLOG_FILE),
                                                 MYF(0));

  TRANSLOG_FILE *old= get_current_logfile();
1382
  uint32 file_no= LSN_FILE_NO(log_descriptor.horizon);
1383 1384
  DBUG_ENTER("translog_create_new_file");

unknown's avatar
unknown committed
1385
  if (file == NULL)
1386
    goto error;
1387 1388

  /*
1389 1390 1391 1392
    Writes max_lsn to the file header before finishing it (there is no need
    to lock file header buffer because it is still unfinished file, so only
    one thread can finish the file and nobody interested of LSN of current
    (unfinished) file, because no one can purge it).
1393
  */
unknown's avatar
unknown committed
1394
  if (translog_max_lsn_to_header(old->handler.file, log_descriptor.max_lsn))
1395
    goto error;
1396

unknown's avatar
unknown committed
1397 1398 1399 1400 1401 1402
  rw_wrlock(&log_descriptor.open_files_lock);
  DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
              log_descriptor.open_files.elements);
  DBUG_ASSERT(file_no == log_descriptor.max_file + 1);
  if (allocate_dynamic(&log_descriptor.open_files,
                       log_descriptor.max_file - log_descriptor.min_file + 2))
1403
    goto error_lock;
unknown's avatar
unknown committed
1404 1405
  if ((file->handler.file=
       create_logfile_by_number_no_cache(file_no)) == -1)
1406
    goto error_lock;
unknown's avatar
unknown committed
1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427
  translog_file_init(file, file_no, 0);

  /* this call just expand the array */
  insert_dynamic(&log_descriptor.open_files, (uchar*)&file);
  log_descriptor.max_file++;
  {
    char *start= (char*) dynamic_element(&log_descriptor.open_files, 0,
                                         TRANSLOG_FILE**);
    memmove(start + sizeof(TRANSLOG_FILE*), start,
            sizeof(TRANSLOG_FILE*) *
            (log_descriptor.max_file - log_descriptor.min_file + 1 - 1));
  }
  /* can't fail we because we expanded array */
  set_dynamic(&log_descriptor.open_files, (uchar*)&file, 0);
  DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
              log_descriptor.open_files.elements);
  rw_unlock(&log_descriptor.open_files_lock);

  DBUG_PRINT("info", ("file_no: %lu", (ulong)file_no));

  if (translog_write_file_header())
1428 1429
    DBUG_RETURN(1);

unknown's avatar
unknown committed
1430
  if (ma_control_file_write_and_force(LSN_IMPOSSIBLE, file_no,
1431
                                      CONTROL_FILE_UPDATE_ONLY_LOGNO))
1432 1433
  {
    translog_stop_writing();
1434
    DBUG_RETURN(1);
1435
  }
1436 1437

  DBUG_RETURN(0);
unknown's avatar
unknown committed
1438

1439
error_lock:
unknown's avatar
unknown committed
1440
  rw_unlock(&log_descriptor.open_files_lock);
1441 1442
error:
  translog_stop_writing();
unknown's avatar
unknown committed
1443
  DBUG_RETURN(1);
1444 1445 1446
}


unknown's avatar
unknown committed
1447 1448
/**
  @brief Locks the loghandler buffer.
1449

unknown's avatar
unknown committed
1450
  @param buffer          This buffer which should be locked
1451

unknown's avatar
unknown committed
1452 1453 1454 1455
  @note See comment before buffer 'mutex' variable.

  @retval 0 OK
  @retval 1 Error
1456 1457 1458 1459
*/

static my_bool translog_buffer_lock(struct st_translog_buffer *buffer)
{
1460
  my_bool res;
1461
  DBUG_ENTER("translog_buffer_lock");
unknown's avatar
unknown committed
1462
  DBUG_PRINT("enter",
1463 1464
             ("Lock buffer #%u: (0x%lx)", (uint) buffer->buffer_no,
              (ulong) buffer));
1465
  res= (pthread_mutex_lock(&buffer->mutex) != 0);
1466
  DBUG_RETURN(res);
1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477
}


/*
  Unlock the loghandler buffer

  SYNOPSIS
    translog_buffer_unlock()
    buffer               This buffer which should be unlocked

  RETURN
unknown's avatar
unknown committed
1478 1479
    0  OK
    1  Error
1480 1481 1482 1483
*/

static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer)
{
1484
  my_bool res;
1485
  DBUG_ENTER("translog_buffer_unlock");
1486 1487
  DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx)",
                       (uint) buffer->buffer_no, (ulong) buffer));
1488

1489
  res= (pthread_mutex_unlock(&buffer->mutex) != 0);
1490 1491 1492 1493 1494
  DBUG_RETURN(res);
}


/*
unknown's avatar
unknown committed
1495
  Write a header on the page
1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508

  SYNOPSIS
    translog_new_page_header()
    horizon              Where to write the page
    cursor               Where to write the page

  NOTE
    - space for page header should be checked before
*/

static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
                                     struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
1509
  uchar *ptr;
1510 1511

  DBUG_ENTER("translog_new_page_header");
unknown's avatar
unknown committed
1512
  DBUG_ASSERT(cursor->ptr);
1513 1514 1515 1516 1517

  cursor->protected= 0;

  ptr= cursor->ptr;
  /* Page number */
1518 1519
  int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE);
  ptr+= 3;
1520
  /* File number */
1521 1522
  int3store(ptr, LSN_FILE_NO(*horizon));
  ptr+= 3;
unknown's avatar
unknown committed
1523 1524 1525
  DBUG_ASSERT(TRANSLOG_PAGE_FLAGS == (ptr - cursor->ptr));
  cursor->ptr[TRANSLOG_PAGE_FLAGS]= (uchar) log_descriptor.flags;
  ptr++;
1526 1527 1528 1529
  if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
  {
#ifndef DBUG_OFF
    DBUG_PRINT("info", ("write  0x11223344 CRC to (%lu,0x%lx)",
unknown's avatar
unknown committed
1530
                        LSN_IN_PARTS(*horizon)));
unknown's avatar
unknown committed
1531
    /* This will be overwritten by real CRC; This is just for debugging */
1532 1533
    int4store(ptr, 0x11223344);
#endif
unknown's avatar
unknown committed
1534
    /* CRC will be put when page is finished */
unknown's avatar
unknown committed
1535
    ptr+= CRC_SIZE;
1536 1537 1538
  }
  if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
  {
1539 1540 1541 1542 1543 1544
    /*
      The time() works like "random" values producer because it is enough to
      have such "random" for this purpose and it will not interfere with
      higher level pseudo random value generator
    */
    uint16 tmp_time= time(NULL);
unknown's avatar
unknown committed
1545 1546
    ptr[0]= tmp_time & 0xFF;
    ptr+= TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1547 1548
  }
  {
unknown's avatar
unknown committed
1549
    uint len= (ptr - cursor->ptr);
1550
    (*horizon)+= len; /* increasing the offset part of the address */
unknown's avatar
unknown committed
1551
    cursor->current_page_fill= len;
1552 1553 1554 1555
    if (!cursor->chaser)
      cursor->buffer->size+= len;
  }
  cursor->ptr= ptr;
unknown's avatar
unknown committed
1556
  DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
1557 1558
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
1559
                      (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
1560
  translog_check_cursor(cursor);
1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571
  DBUG_VOID_RETURN;
}


/*
  Put sector protection on the page image

  SYNOPSIS
    translog_put_sector_protection()
    page                 reference on the page content
    cursor               cursor of the buffer
unknown's avatar
unknown committed
1572 1573 1574 1575

  NOTES
    We put a sector protection on all following sectors on the page,
    except the first sector that is protected by page header.
1576 1577
*/

unknown's avatar
unknown committed
1578
static void translog_put_sector_protection(uchar *page,
1579 1580
                                           struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
1581
  uchar *table= page + log_descriptor.page_overhead -
unknown's avatar
unknown committed
1582 1583
    TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
  uint i, offset;
unknown's avatar
unknown committed
1584 1585 1586
  uint16 last_protected_sector= ((cursor->previous_offset - 1) /
                                 DISK_DRIVE_SECTOR_SIZE);
  uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE;
unknown's avatar
unknown committed
1587
  uint8 value= table[0] + cursor->write_counter;
1588
  DBUG_ENTER("translog_put_sector_protection");
unknown's avatar
unknown committed
1589

1590
  if (start_sector == 0)
unknown's avatar
unknown committed
1591 1592 1593 1594
  {
    /* First sector is protected by file & page numbers in the page header. */
    start_sector= 1;
  }
1595

unknown's avatar
unknown committed
1596 1597
  DBUG_PRINT("enter", ("Write counter:%u  value:%u  offset:%u, "
                       "last protected:%u  start sector:%u",
1598 1599 1600 1601 1602 1603
                       (uint) cursor->write_counter,
                       (uint) value,
                       (uint) cursor->previous_offset,
                       (uint) last_protected_sector, (uint) start_sector));
  if (last_protected_sector == start_sector)
  {
unknown's avatar
unknown committed
1604
    i= last_protected_sector;
unknown's avatar
unknown committed
1605
    offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE;
1606 1607 1608 1609
    /* restore data, because we modified sector which was protected */
    if (offset < cursor->previous_offset)
      page[offset]= table[i];
  }
unknown's avatar
unknown committed
1610 1611 1612
  for (i= start_sector, offset= start_sector * DISK_DRIVE_SECTOR_SIZE;
       i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
       i++, (offset+= DISK_DRIVE_SECTOR_SIZE))
1613
  {
unknown's avatar
unknown committed
1614 1615
    DBUG_PRINT("info", ("sector:%u  offset:%u  data 0x%x",
                        i, offset, (uint) page[offset]));
1616
    table[i]= page[offset];
unknown's avatar
unknown committed
1617 1618 1619
    page[offset]= value;
    DBUG_PRINT("info", ("sector:%u  offset:%u  data 0x%x",
                        i, offset, (uint) page[offset]));
1620 1621 1622 1623 1624 1625
  }
  DBUG_VOID_RETURN;
}


/*
unknown's avatar
unknown committed
1626
  Calculate CRC32 of given area
1627 1628

  SYNOPSIS
unknown's avatar
unknown committed
1629
    translog_crc()
1630 1631 1632 1633
    area                 Pointer of the area beginning
    length               The Area length

  RETURN
unknown's avatar
unknown committed
1634
    CRC32
1635 1636
*/

unknown's avatar
unknown committed
1637
static uint32 translog_crc(uchar *area, uint length)
1638
{
1639 1640
  DBUG_ENTER("translog_crc");
  DBUG_RETURN(crc32(0L, (unsigned char*) area, length));
1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655
}


/*
  Finish current page with zeros

  SYNOPSIS
    translog_finish_page()
    horizon              \ horizon & buffer pointers
    cursor               /
*/

static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
                                 struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
1656
  uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
unknown's avatar
unknown committed
1657
  uchar *page= cursor->ptr - cursor->current_page_fill;
1658
  DBUG_ENTER("translog_finish_page");
unknown's avatar
unknown committed
1659 1660 1661 1662
  DBUG_PRINT("enter", ("Buffer: #%u 0x%lx  "
                       "Buffer addr: (%lu,0x%lx)  "
                       "Page addr: (%lu,0x%lx)  "
                       "size:%lu (%lu)  Pg:%u  left:%u",
1663
                       (uint) cursor->buffer_no, (ulong) cursor->buffer,
unknown's avatar
unknown committed
1664
                       LSN_IN_PARTS(cursor->buffer->offset),
1665 1666
                       (ulong) LSN_FILE_NO(*horizon),
                       (ulong) (LSN_OFFSET(*horizon) -
unknown's avatar
unknown committed
1667
                                cursor->current_page_fill),
1668 1669
                       (ulong) cursor->buffer->size,
                       (ulong) (cursor->ptr -cursor->buffer->buffer),
unknown's avatar
unknown committed
1670
                       (uint) cursor->current_page_fill, (uint) left));
1671
  DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset));
unknown's avatar
unknown committed
1672
  translog_check_cursor(cursor);
1673 1674 1675 1676 1677
  if (cursor->protected)
  {
    DBUG_PRINT("info", ("Already protected and finished"));
    DBUG_VOID_RETURN;
  }
unknown's avatar
unknown committed
1678 1679 1680 1681
  cursor->protected= 1;

  DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
  if (left != 0)
1682
  {
unknown's avatar
unknown committed
1683
    DBUG_PRINT("info", ("left: %u", (uint) left));
unknown's avatar
unknown committed
1684
    memset(cursor->ptr, TRANSLOG_FILLER, left);
1685
    cursor->ptr+= left;
unknown's avatar
unknown committed
1686
    (*horizon)+= left; /* offset increasing */
1687 1688
    if (!cursor->chaser)
      cursor->buffer->size+= left;
1689
    /* We are finishing the page so reset the counter */
unknown's avatar
unknown committed
1690 1691 1692
    cursor->current_page_fill= 0;
    DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx  "
                        "chaser: %d  Size: %lu (%lu)",
1693 1694 1695
                        (uint) cursor->buffer->buffer_no,
                        (ulong) cursor->buffer, cursor->chaser,
                        (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
1696
                        (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
1697
    translog_check_cursor(cursor);
1698
  }
unknown's avatar
unknown committed
1699 1700
  /*
    When we are finishing the page other thread might not finish the page
1701 1702
    header yet (in case if we started from the middle of the page) so we
    have to read log_descriptor.flags but not the flags from the page.
unknown's avatar
unknown committed
1703 1704
  */
  if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
1705 1706 1707 1708 1709 1710
  {
    translog_put_sector_protection(page, cursor);
    DBUG_PRINT("info", ("drop write_counter"));
    cursor->write_counter= 0;
    cursor->previous_offset= 0;
  }
unknown's avatar
unknown committed
1711
  if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
1712
  {
unknown's avatar
unknown committed
1713 1714 1715 1716 1717
    uint32 crc= translog_crc(page + log_descriptor.page_overhead,
                             TRANSLOG_PAGE_SIZE -
                             log_descriptor.page_overhead);
    DBUG_PRINT("info", ("CRC: %lx", (ulong) crc));
    /* We have page number, file number and flag before crc */
1718 1719 1720 1721 1722 1723 1724
    int4store(page + 3 + 3 + 1, crc);
  }
  DBUG_VOID_RETURN;
}


/*
1725
  @brief Wait until all threads have finished filling this buffer.
1726

1727
  @param buffer          This buffer should be check
1728
*/
1729

1730 1731 1732
static void translog_wait_for_writers(struct st_translog_buffer *buffer)
{
  DBUG_ENTER("translog_wait_for_writers");
unknown's avatar
unknown committed
1733
  DBUG_PRINT("enter", ("Buffer #%u 0x%lx  copies in progress: %u",
1734 1735
                       (uint) buffer->buffer_no, (ulong) buffer,
                       (int) buffer->copy_to_buffer_in_progress));
1736
  translog_buffer_lock_assert_owner(buffer);
1737

unknown's avatar
unknown committed
1738
  while (buffer->copy_to_buffer_in_progress)
1739
  {
1740 1741
    DBUG_PRINT("info", ("wait for writers... buffer: #%u  0x%lx",
                        (uint) buffer->buffer_no, (ulong) buffer));
unknown's avatar
unknown committed
1742
    DBUG_ASSERT(buffer->file != NULL);
unknown's avatar
unknown committed
1743
    pthread_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
1744 1745
    DBUG_PRINT("info", ("wait for writers done buffer: #%u  0x%lx",
                        (uint) buffer->buffer_no, (ulong) buffer));
unknown's avatar
unknown committed
1746
  }
1747 1748 1749 1750 1751 1752 1753

  DBUG_VOID_RETURN;
}


/*

unknown's avatar
unknown committed
1754
  Wait for buffer to become free
1755 1756 1757

  SYNOPSIS
    translog_wait_for_buffer_free()
unknown's avatar
unknown committed
1758
    buffer               The buffer we are waiting for
1759 1760 1761 1762 1763 1764 1765 1766

  NOTE
    - this buffer should be locked
*/

static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
{
  DBUG_ENTER("translog_wait_for_buffer_free");
unknown's avatar
unknown committed
1767
  DBUG_PRINT("enter", ("Buffer: #%u 0x%lx  copies in progress: %u  "
unknown's avatar
unknown committed
1768
                       "File: %d  size: %lu",
1769 1770
                       (uint) buffer->buffer_no, (ulong) buffer,
                       (int) buffer->copy_to_buffer_in_progress,
unknown's avatar
unknown committed
1771 1772
                       (buffer->file ? buffer->file->handler.file : -1),
                       (ulong) buffer->size));
1773 1774 1775

  translog_wait_for_writers(buffer);

unknown's avatar
unknown committed
1776
  while (buffer->file != NULL)
1777
  {
1778 1779
    DBUG_PRINT("info", ("wait for writers... buffer: #%u  0x%lx",
                        (uint) buffer->buffer_no, (ulong) buffer));
unknown's avatar
unknown committed
1780
    pthread_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
1781 1782
    DBUG_PRINT("info", ("wait for writers done. buffer: #%u  0x%lx",
                        (uint) buffer->buffer_no, (ulong) buffer));
unknown's avatar
unknown committed
1783 1784
  }
  DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
1785 1786 1787 1788 1789
  DBUG_VOID_RETURN;
}


/*
unknown's avatar
unknown committed
1790
  Initialize the cursor for a buffer
1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806

  SYNOPSIS
    translog_cursor_init()
    buffer               The buffer
    cursor               It's cursor
    buffer_no            Number of buffer
*/

static void translog_cursor_init(struct st_buffer_cursor *cursor,
                                 struct st_translog_buffer *buffer,
                                 uint8 buffer_no)
{
  DBUG_ENTER("translog_cursor_init");
  cursor->ptr= buffer->buffer;
  cursor->buffer= buffer;
  cursor->buffer_no= buffer_no;
unknown's avatar
unknown committed
1807
  cursor->current_page_fill= 0;
1808 1809 1810 1811 1812 1813 1814 1815 1816
  cursor->chaser= (cursor != &log_descriptor.bc);
  cursor->write_counter= 0;
  cursor->previous_offset= 0;
  cursor->protected= 0;
  DBUG_VOID_RETURN;
}


/*
1817
  @brief Initialize buffer for the current file, and a cursor for this buffer.
1818

1819 1820 1821
  @param buffer          The buffer
  @param cursor          It's cursor
  @param buffer_no       Number of buffer
1822
*/
1823

1824 1825
static void translog_start_buffer(struct st_translog_buffer *buffer,
                                  struct st_buffer_cursor *cursor,
unknown's avatar
unknown committed
1826
                                  uint buffer_no)
1827 1828 1829
{
  DBUG_ENTER("translog_start_buffer");
  DBUG_PRINT("enter",
unknown's avatar
unknown committed
1830
             ("Assign buffer: #%u (0x%lx) offset: 0x%lx(%lu)",
1831
              (uint) buffer->buffer_no, (ulong) buffer,
1832 1833
              (ulong) LSN_OFFSET(log_descriptor.horizon),
              (ulong) LSN_OFFSET(log_descriptor.horizon)));
1834
  DBUG_ASSERT(buffer_no == buffer->buffer_no);
unknown's avatar
unknown committed
1835
  buffer->last_lsn= LSN_IMPOSSIBLE;
1836
  buffer->offset= log_descriptor.horizon;
1837
  buffer->next_buffer_offset= LSN_IMPOSSIBLE;
unknown's avatar
unknown committed
1838
  buffer->file= get_current_logfile();
1839 1840 1841
  buffer->overlay= 0;
  buffer->size= 0;
  translog_cursor_init(cursor, buffer, buffer_no);
unknown's avatar
unknown committed
1842 1843 1844 1845
  DBUG_PRINT("info", ("file: #%ld (%d)  init cursor #%u: 0x%lx  "
                      "chaser: %d  Size: %lu (%lu)",
                      (long) (buffer->file ? buffer->file->number : 0),
                      (buffer->file ? buffer->file->handler.file : -1),
1846 1847
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
1848
                      (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
1849
  translog_check_cursor(cursor);
1850 1851 1852 1853 1854
  DBUG_VOID_RETURN;
}


/*
1855
  @brief Switch to the next buffer in a chain.
1856

1857 1858 1859
  @param horizon         \ Pointers on current position in file and buffer
  @param cursor          /
  @param new_file        Also start new file
1860

1861
  @note
1862 1863 1864
   - loghandler should be locked
   - after return new and old buffer still are locked

1865 1866
  @retval 0 OK
  @retval 1 Error
1867 1868 1869 1870 1871 1872
*/

static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
                                    struct st_buffer_cursor *cursor,
                                    my_bool new_file)
{
unknown's avatar
unknown committed
1873 1874
  uint old_buffer_no= cursor->buffer_no;
  uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
1875 1876 1877 1878
  struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
  my_bool chasing= cursor->chaser;
  DBUG_ENTER("translog_buffer_next");

unknown's avatar
unknown committed
1879
  DBUG_PRINT("info", ("horizon: (%lu,0x%lx)  chasing: %d",
unknown's avatar
unknown committed
1880
                      LSN_IN_PARTS(log_descriptor.horizon), chasing));
1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891

  DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);

  translog_finish_page(horizon, cursor);

  if (!chasing)
  {
    translog_buffer_lock(new_buffer);
    translog_wait_for_buffer_free(new_buffer);
  }
  else
unknown's avatar
unknown committed
1892
    DBUG_ASSERT(new_buffer->file != NULL);
1893

1894 1895
  if (new_file)
  {
1896

1897
    /* move the horizon to the next file and its header page */
unknown's avatar
unknown committed
1898 1899
    (*horizon)+= LSN_ONE_FILE;
    (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE);
1900 1901 1902 1903 1904 1905 1906 1907 1908 1909
    if (!chasing && translog_create_new_file())
    {
      DBUG_RETURN(1);
    }
  }

  /* prepare next page */
  if (chasing)
    translog_cursor_init(cursor, new_buffer, new_buffer_no);
  else
1910 1911
  {
    translog_lock_assert_owner();
1912
    translog_start_buffer(new_buffer, cursor, new_buffer_no);
1913
  }
1914
  log_descriptor.buffers[old_buffer_no].next_buffer_offset= new_buffer->offset;
1915 1916 1917 1918 1919 1920
  translog_new_page_header(horizon, cursor);
  DBUG_RETURN(0);
}


/*
1921
  Sets max LSN sent to file, and address from which data is only in the buffer
1922 1923

  SYNOPSIS
1924
    translog_set_sent_to_disk()
1925
    lsn                  LSN to assign
1926 1927 1928
    in_buffers           to assign to in_buffers_only

  TODO: use atomic operations if possible (64bit architectures?)
1929 1930
*/

1931
static void translog_set_sent_to_disk(LSN lsn, TRANSLOG_ADDRESS in_buffers)
1932
{
1933
  DBUG_ENTER("translog_set_sent_to_disk");
1934
  pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
1935 1936
  DBUG_PRINT("enter", ("lsn: (%lu,0x%lx) in_buffers: (%lu,0x%lx)  "
                       "in_buffers_only: (%lu,0x%lx)",
unknown's avatar
unknown committed
1937 1938 1939
                       LSN_IN_PARTS(lsn),
                       LSN_IN_PARTS(in_buffers),
                       LSN_IN_PARTS(log_descriptor.in_buffers_only)));
1940 1941
  DBUG_ASSERT(cmp_translog_addr(lsn, log_descriptor.sent_to_disk) >= 0);
  log_descriptor.sent_to_disk= lsn;
1942 1943 1944 1945 1946 1947
  /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
  if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
  {
    log_descriptor.in_buffers_only= in_buffers;
    DBUG_PRINT("info", ("set new in_buffers_only"));
  }
1948
  pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
1949 1950 1951 1952 1953
  DBUG_VOID_RETURN;
}


/*
1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964
  Sets address from which data is only in the buffer

  SYNOPSIS
    translog_set_only_in_buffers()
    lsn                  LSN to assign
    in_buffers           to assign to in_buffers_only
*/

static void translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)
{
  DBUG_ENTER("translog_set_only_in_buffers");
1965
  pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
1966 1967
  DBUG_PRINT("enter", ("in_buffers: (%lu,0x%lx)  "
                       "in_buffers_only: (%lu,0x%lx)",
unknown's avatar
unknown committed
1968 1969
                       LSN_IN_PARTS(in_buffers),
                       LSN_IN_PARTS(log_descriptor.in_buffers_only)));
1970 1971 1972
  /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
  if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
  {
1973 1974
    if (translog_status != TRANSLOG_OK)
      DBUG_VOID_RETURN;
1975 1976 1977
    log_descriptor.in_buffers_only= in_buffers;
    DBUG_PRINT("info", ("set new in_buffers_only"));
  }
1978
  pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996
  DBUG_VOID_RETURN;
}


/*
  Gets address from which data is only in the buffer

  SYNOPSIS
    translog_only_in_buffers()

  RETURN
    address from which data is only in the buffer
*/

static TRANSLOG_ADDRESS translog_only_in_buffers()
{
  register TRANSLOG_ADDRESS addr;
  DBUG_ENTER("translog_only_in_buffers");
1997
  pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
1998
  addr= log_descriptor.in_buffers_only;
1999
  pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2000 2001 2002 2003 2004 2005
  DBUG_RETURN(addr);
}


/*
  Get max LSN sent to file
2006 2007

  SYNOPSIS
2008
    translog_get_sent_to_disk()
2009 2010 2011

  RETURN
    max LSN send to file
2012 2013
*/

2014
static LSN translog_get_sent_to_disk()
2015
{
2016
  register LSN lsn;
2017
  DBUG_ENTER("translog_get_sent_to_disk");
2018
  pthread_mutex_lock(&log_descriptor.sent_to_disk_lock);
2019
  lsn= log_descriptor.sent_to_disk;
2020
  pthread_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2021
  DBUG_RETURN(lsn);
2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035
}


/*
  Get first chunk address on the given page

  SYNOPSIS
    translog_get_first_chunk_offset()
    page                 The page where to find first chunk

  RETURN
    first chunk offset
*/

unknown's avatar
unknown committed
2036
static my_bool translog_get_first_chunk_offset(uchar *page)
2037 2038
{
  DBUG_ENTER("translog_get_first_chunk_offset");
2039 2040
  DBUG_ASSERT(page[TRANSLOG_PAGE_FLAGS] < TRANSLOG_FLAGS_NUM);
  DBUG_RETURN(page_overhead[page[TRANSLOG_PAGE_FLAGS]]);
2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054
}


/*
  Write coded length of record

  SYNOPSIS
    translog_write_variable_record_1group_code_len
    dst                  Destination buffer pointer
    length               Length which should be coded
    header_len           Calculated total header length
*/

static void
unknown's avatar
unknown committed
2055
translog_write_variable_record_1group_code_len(uchar *dst,
2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069
                                               translog_size_t length,
                                               uint16 header_len)
{
  switch (header_len) {
  case 6:                                      /* (5 + 1) */
    DBUG_ASSERT(length <= 250);
    *dst= (uint8) length;
    return;
  case 8:                                      /* (5 + 3) */
    DBUG_ASSERT(length <= 0xFFFF);
    *dst= 251;
    int2store(dst + 1, length);
    return;
  case 9:                                      /* (5 + 4) */
unknown's avatar
unknown committed
2070
    DBUG_ASSERT(length <= (ulong) 0xFFFFFF);
2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095
    *dst= 252;
    int3store(dst + 1, length);
    return;
  case 10:                                     /* (5 + 5) */
    *dst= 253;
    int4store(dst + 1, length);
    return;
  default:
    DBUG_ASSERT(0);
  }
  return;
}


/*
  Decode record data length and advance given pointer to the next field

  SYNOPSIS
    translog_variable_record_1group_decode_len()
    src                  The pointer to the pointer to the length beginning

  RETURN
    decoded length
*/

unknown's avatar
unknown committed
2096
static translog_size_t translog_variable_record_1group_decode_len(uchar **src)
2097 2098 2099 2100
{
  uint8 first= (uint8) (**src);
  switch (first) {
  case 251:
unknown's avatar
unknown committed
2101
    (*src)+= 3;
2102 2103
    return (uint2korr((*src) - 2));
  case 252:
unknown's avatar
unknown committed
2104
    (*src)+= 4;
2105 2106
    return (uint3korr((*src) - 3));
  case 253:
unknown's avatar
unknown committed
2107
    (*src)+= 5;
2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131
    return (uint4korr((*src) - 4));
  case 254:
  case 255:
    DBUG_ASSERT(0);                             /* reserved for future use */
    return (0);
  default:
    (*src)++;
    return (first);
  }
}


/*
  Get total length of this chunk (not only body)

  SYNOPSIS
    translog_get_total_chunk_length()
    page                 The page where chunk placed
    offset               Offset of the chunk on this place

  RETURN
    total length of the chunk
*/

unknown's avatar
unknown committed
2132
static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset)
2133 2134 2135
{
  DBUG_ENTER("translog_get_total_chunk_length");
  switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
unknown's avatar
unknown committed
2136
  case TRANSLOG_CHUNK_LSN:
2137
  {
unknown's avatar
unknown committed
2138
    /* 0 chunk referred as LSN (head or tail) */
2139
    translog_size_t rec_len;
unknown's avatar
unknown committed
2140
    uchar *start= page + offset;
2141
    uchar *ptr= start + 1 + 2; /* chunk type and short trid */
2142 2143 2144 2145
    uint16 chunk_len, header_len, page_rest;
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
    rec_len= translog_variable_record_1group_decode_len(&ptr);
    chunk_len= uint2korr(ptr);
unknown's avatar
unknown committed
2146 2147
    header_len= (ptr -start) + 2;
    DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  header len: %u",
2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161
                        (ulong) rec_len, (uint) chunk_len, (uint) header_len));
    if (chunk_len)
    {
      DBUG_PRINT("info", ("chunk len: %u + %u = %u",
                          (uint) header_len, (uint) chunk_len,
                          (uint) (chunk_len + header_len)));
      DBUG_RETURN(chunk_len + header_len);
    }
    page_rest= TRANSLOG_PAGE_SIZE - offset;
    DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
    if (rec_len + header_len < page_rest)
      DBUG_RETURN(rec_len + header_len);
    DBUG_RETURN(page_rest);
  }
unknown's avatar
unknown committed
2162
  case TRANSLOG_CHUNK_FIXED:
2163
  {
unknown's avatar
unknown committed
2164
    uchar *ptr;
2165
    uint type= page[offset] & TRANSLOG_REC_TYPE;
unknown's avatar
unknown committed
2166 2167 2168 2169
    uint length;
    int i;
    /* 1 (pseudo)fixed record (also LSN) */
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
unknown's avatar
unknown committed
2170
    DBUG_ASSERT(log_record_type_descriptor[type].rclass ==
2171
                LOGRECTYPE_FIXEDLENGTH ||
unknown's avatar
unknown committed
2172
                log_record_type_descriptor[type].rclass ==
2173
                LOGRECTYPE_PSEUDOFIXEDLENGTH);
unknown's avatar
unknown committed
2174
    if (log_record_type_descriptor[type].rclass == LOGRECTYPE_FIXEDLENGTH)
2175 2176 2177 2178 2179 2180
    {
      DBUG_PRINT("info",
                 ("Fixed length: %u",
                  (uint) (log_record_type_descriptor[type].fixed_length + 3)));
      DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
    }
unknown's avatar
unknown committed
2181 2182 2183 2184

    ptr= page + offset + 3;            /* first compressed LSN */
    length= log_record_type_descriptor[type].fixed_length + 3;
    for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++)
2185
    {
unknown's avatar
unknown committed
2186
      /* first 2 bits is length - 2 */
2187
      uint len= (((uint8) (*ptr)) >> 6) + 2;
2188 2189
      if (ptr[0] == 0 && ((uint8) ptr[1]) == 1)
        len+= LSN_STORE_SIZE; /* case of full LSN storing */
unknown's avatar
unknown committed
2190
      ptr+= len;
2191
      /* subtract saved bytes */
2192
      length-= (LSN_STORE_SIZE - len);
2193
    }
unknown's avatar
unknown committed
2194 2195
    DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
    DBUG_RETURN(length);
2196
  }
unknown's avatar
unknown committed
2197 2198 2199
  case TRANSLOG_CHUNK_NOHDR:
    /* 2 no header chunk (till page end) */
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR  length: %u",
2200 2201 2202 2203 2204
                        (uint) (TRANSLOG_PAGE_SIZE - offset)));
    DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
  case TRANSLOG_CHUNK_LNGTH:                   /* 3 chunk with chunk length */
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
    DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
unknown's avatar
unknown committed
2205
    DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3));
2206 2207 2208
    DBUG_RETURN(uint2korr(page + offset + 1) + 3);
  default:
    DBUG_ASSERT(0);
unknown's avatar
unknown committed
2209
    DBUG_RETURN(0);
2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221
  }
}


/*
  Flush given buffer

  SYNOPSIS
    translog_buffer_flush()
    buffer               This buffer should be flushed

  RETURN
unknown's avatar
unknown committed
2222 2223
    0  OK
    1  Error
2224 2225 2226 2227
*/

static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
{
2228
  uint32 i, pg;
unknown's avatar
unknown committed
2229
  TRANSLOG_FILE *file;
2230
  DBUG_ENTER("translog_buffer_flush");
unknown's avatar
unknown committed
2231
  DBUG_ASSERT(buffer->file != NULL);
2232
  DBUG_PRINT("enter",
2233
             ("Buffer: #%u 0x%lx file: %d  offset: (%lu,0x%lx)  size: %lu",
2234
              (uint) buffer->buffer_no, (ulong) buffer,
unknown's avatar
unknown committed
2235
              buffer->file->handler.file,
unknown's avatar
unknown committed
2236
              LSN_IN_PARTS(buffer->offset),
2237
              (ulong) buffer->size));
2238
  translog_buffer_lock_assert_owner(buffer);
2239 2240 2241


  translog_wait_for_writers(buffer);
2242 2243 2244 2245

  if (buffer->overlay && buffer->overlay->file == buffer->file &&
      cmp_translog_addr(buffer->overlay->offset + buffer->overlay->size,
                        buffer->offset) > 0)
2246
  {
2247 2248 2249 2250
    /*
      This can't happen for normal translog_flush,
      only during destroying the loghandler
    */
2251
    struct st_translog_buffer *overlay= buffer->overlay;
2252
    TRANSLOG_ADDRESS buffer_offset= buffer->offset;
unknown's avatar
unknown committed
2253
    TRANSLOG_FILE *fl= buffer->file;
2254 2255
    translog_buffer_unlock(buffer);
    translog_buffer_lock(overlay);
2256
    /* rechecks under mutex protection that overlay is still our overlay */
unknown's avatar
unknown committed
2257
    if (buffer->overlay->file == fl &&
2258 2259 2260 2261 2262
        cmp_translog_addr(buffer->overlay->offset + buffer->overlay->size,
                          buffer_offset) > 0)
    {
      translog_wait_for_buffer_free(overlay);
    }
2263 2264
    translog_buffer_unlock(overlay);
    translog_buffer_lock(buffer);
unknown's avatar
unknown committed
2265
    if (buffer->file != NULL && buffer_offset == buffer->offset)
2266 2267 2268 2269 2270 2271 2272 2273 2274
    {
      /*
        This means that somebody else flushed the buffer while we was
        waiting for overlay then for locking buffer again.
        It is possible for single request for flush and destroying the
        loghandler.
      */
      DBUG_RETURN(0);
    }
2275 2276
  }

unknown's avatar
unknown committed
2277 2278 2279 2280
  /*
    Send page by page in the pagecache what we are going to write on the
    disk
  */
unknown's avatar
unknown committed
2281
  file= buffer->file;
unknown's avatar
unknown committed
2282
  for (i= 0, pg= LSN_OFFSET(buffer->offset) / TRANSLOG_PAGE_SIZE;
2283 2284
       i < buffer->size;
       i+= TRANSLOG_PAGE_SIZE, pg++)
2285
  {
2286 2287 2288
    TRANSLOG_ADDRESS addr= (buffer->offset + i);
    TRANSLOG_VALIDATOR_DATA data;
    data.addr= &addr;
unknown's avatar
unknown committed
2289
    DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE);
2290
    DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size);
unknown's avatar
unknown committed
2291
    if (translog_status != TRANSLOG_OK && translog_status != TRANSLOG_SHUTDOWN)
2292
      DBUG_RETURN(1);
2293
    if (pagecache_inject(log_descriptor.pagecache,
unknown's avatar
unknown committed
2294
                        &file->handler, pg, 3,
2295 2296 2297
                        buffer->buffer + i,
                        PAGECACHE_PLAIN_PAGE,
                        PAGECACHE_LOCK_LEFT_UNLOCKED,
2298
                        PAGECACHE_PIN_LEFT_UNPINNED, 0,
unknown's avatar
unknown committed
2299
                        LSN_IMPOSSIBLE))
2300
    {
2301
      DBUG_PRINT("error", ("Can't write page (%lu,0x%lx) to pagecache",
2302
                           (ulong) buffer->file,
2303
                           (ulong) (LSN_OFFSET(buffer->offset)+ i)));
2304 2305
      translog_stop_writing();
      DBUG_RETURN(1);
2306 2307
    }
  }
unknown's avatar
unknown committed
2308 2309
  file->is_sync= 0;
  if (my_pwrite(file->handler.file, (char*) buffer->buffer,
2310
                buffer->size, LSN_OFFSET(buffer->offset),
2311
                log_write_flags))
2312
  {
2313
    DBUG_PRINT("error", ("Can't write buffer (%lu,0x%lx) size %lu "
unknown's avatar
unknown committed
2314
                         "to the disk (%d)",
unknown's avatar
unknown committed
2315
                         (ulong) file->handler.file,
2316
                         (ulong) LSN_OFFSET(buffer->offset),
2317
                         (ulong) buffer->size, errno));
2318
    translog_stop_writing();
2319 2320
    DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
2321 2322 2323 2324 2325 2326
  /*
    Dropping the flag in such way can make false alarm: signalling than the
    file in not sync when it is sync, but the situation is quite rare and
    protections with mutexes give much more overhead to the whole engine
  */
  file->is_sync= 0;
unknown's avatar
unknown committed
2327

2328
  if (LSN_OFFSET(buffer->last_lsn) != 0)    /* if buffer->last_lsn is set */
2329
    translog_set_sent_to_disk(buffer->last_lsn,
2330 2331 2332
                              buffer->next_buffer_offset);
  else
    translog_set_only_in_buffers(buffer->next_buffer_offset);
2333
  /* Free buffer */
unknown's avatar
unknown committed
2334
  buffer->file= NULL;
2335
  buffer->overlay= 0;
unknown's avatar
unknown committed
2336
  pthread_cond_broadcast(&buffer->waiting_filling_buffer);
2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349
  DBUG_RETURN(0);
}


/*
  Recover page with sector protection (wipe out failed chunks)

  SYNOPSYS
    translog_recover_page_up_to_sector()
    page                 reference on the page
    offset               offset of failed sector

  RETURN
unknown's avatar
unknown committed
2350 2351
    0  OK
    1  Error
2352 2353
*/

unknown's avatar
unknown committed
2354
static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset)
2355 2356 2357
{
  uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
  DBUG_ENTER("translog_recover_page_up_to_sector");
unknown's avatar
unknown committed
2358
  DBUG_PRINT("enter", ("offset: %u  first chunk: %u",
2359 2360
                       (uint) offset, (uint) chunk_offset));

unknown's avatar
unknown committed
2361
  while (page[chunk_offset] != TRANSLOG_FILLER && chunk_offset < offset)
2362 2363 2364 2365 2366
  {
    uint16 chunk_length;
    if ((chunk_length=
         translog_get_total_chunk_length(page, chunk_offset)) == 0)
    {
2367
      DBUG_PRINT("error", ("cant get chunk length (offset %u)",
2368 2369 2370
                           (uint) chunk_offset));
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
2371
    DBUG_PRINT("info", ("chunk: offset: %u  length %u",
2372 2373 2374
                        (uint) chunk_offset, (uint) chunk_length));
    if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
    {
2375
      DBUG_PRINT("error", ("damaged chunk (offset %u) in trusted area",
2376 2377 2378 2379 2380 2381 2382
                           (uint) chunk_offset));
      DBUG_RETURN(1);
    }
    chunk_offset+= chunk_length;
  }

  valid_chunk_end= chunk_offset;
unknown's avatar
unknown committed
2383
  /* end of trusted area - sector parsing */
unknown's avatar
unknown committed
2384
  while (page[chunk_offset] != TRANSLOG_FILLER)
2385 2386 2387 2388 2389
  {
    uint16 chunk_length;
    if ((chunk_length=
         translog_get_total_chunk_length(page, chunk_offset)) == 0)
      break;
unknown's avatar
unknown committed
2390 2391

    DBUG_PRINT("info", ("chunk: offset: %u  length %u",
2392
                        (uint) chunk_offset, (uint) chunk_length));
unknown's avatar
unknown committed
2393 2394
    if (((ulong) chunk_offset) + ((ulong) chunk_length) >
        (uint) (offset + DISK_DRIVE_SECTOR_SIZE))
2395
      break;
unknown's avatar
unknown committed
2396

2397 2398 2399 2400 2401
    chunk_offset+= chunk_length;
    valid_chunk_end= chunk_offset;
  }
  DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));

unknown's avatar
unknown committed
2402 2403
  memset(page + valid_chunk_end, TRANSLOG_FILLER,
         TRANSLOG_PAGE_SIZE - valid_chunk_end);
2404 2405 2406 2407 2408

  DBUG_RETURN(0);
}


unknown's avatar
unknown committed
2409 2410 2411
/**
  @brief Dummy write callback.
*/
2412

unknown's avatar
unknown committed
2413
static my_bool
unknown's avatar
unknown committed
2414 2415 2416
translog_dummy_callback(uchar *page __attribute__((unused)),
                        pgcache_page_no_t page_no __attribute__((unused)),
                        uchar* data_ptr __attribute__((unused)))
unknown's avatar
unknown committed
2417 2418 2419
{
  return 0;
}
2420

unknown's avatar
unknown committed
2421

unknown's avatar
unknown committed
2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483
/**
  @brief Checks and removes sector protection.

  @param page            reference on the page content.
  @param file            transaction log descriptor.

  @retvat 0 OK
  @retval 1 Error
*/

static my_bool
translog_check_sector_protection(uchar *page, TRANSLOG_FILE *file)
{
  uint i, offset;
  uchar *table= page + page_overhead[page[TRANSLOG_PAGE_FLAGS]] -
    TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE; ;
  uint8 current= table[0];
  DBUG_ENTER("translog_check_sector_protection");

  for (i= 1, offset= DISK_DRIVE_SECTOR_SIZE;
       i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
       i++, offset+= DISK_DRIVE_SECTOR_SIZE)
  {
    /*
      TODO: add chunk counting for "suspecting" sectors (difference is
      more than 1-2), if difference more then present chunks then it is
      the problem.
    */
    uint8 test= page[offset];
    DBUG_PRINT("info", ("sector: #%u  offset: %u  current: %lx "
                        "read: 0x%x  stored: 0x%x%x",
                        i, offset, (ulong) current,
                        (uint) uint2korr(page + offset), (uint) table[i],
                        (uint) table[i + 1]));
    /*
      3 is minimal possible record length. So we can have "distance"
      between 2 sectors value more then DISK_DRIVE_SECTOR_SIZE / 3
      only if it is old value, i.e. the sector was not written.
    */
    if (((test < current) &&
         (0xFFL - current + test > DISK_DRIVE_SECTOR_SIZE / 3)) ||
        ((test >= current) &&
         (test - current > DISK_DRIVE_SECTOR_SIZE / 3)))
    {
      if (translog_recover_page_up_to_sector(page, offset))
        DBUG_RETURN(1);
      file->was_recovered= 1;
      DBUG_RETURN(0);
    }

    /* Restore value on the page */
    page[offset]= table[i];
    current= test;
    DBUG_PRINT("info", ("sector: #%u  offset: %u  current: %lx  "
                        "read: 0x%x  stored: 0x%x",
                        i, offset, (ulong) current,
                        (uint) page[offset], (uint) table[i]));
  }
  DBUG_RETURN(0);
}


unknown's avatar
unknown committed
2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496
/**
  @brief Log page validator (read callback)

  @param page            The page data to check
  @param page_no         The page number (<offset>/<page length>)
  @param data_ptr        Read callback data pointer (pointer to TRANSLOG_FILE)


  @todo: add turning loghandler to read-only mode after merging with
  that patch.

  @retval 0 OK
  @retval 1 Error
2497
*/
unknown's avatar
unknown committed
2498 2499 2500 2501

static my_bool translog_page_validator(uchar *page,
                                       pgcache_page_no_t page_no,
                                       uchar* data_ptr)
2502
{
unknown's avatar
unknown committed
2503 2504
  uint this_page_page_overhead;
  uint flags;
unknown's avatar
unknown committed
2505 2506 2507 2508 2509
  uchar *page_pos;
  TRANSLOG_FILE *data= (TRANSLOG_FILE *) data_ptr;
#ifndef DBUG_OFF
  uint32 offset= page_no * TRANSLOG_PAGE_SIZE;
#endif
2510 2511
  DBUG_ENTER("translog_page_validator");

unknown's avatar
unknown committed
2512
  data->was_recovered= 0;
2513

unknown's avatar
unknown committed
2514 2515
  if (uint3korr(page) != page_no ||
      uint3korr(page + 3) != data->number)
2516
  {
2517
    DBUG_PRINT("error", ("Page (%lu,0x%lx): "
unknown's avatar
unknown committed
2518
                         "page address written in the page is incorrect: "
2519
                         "File %lu instead of %lu or page %lu instead of %lu",
unknown's avatar
unknown committed
2520 2521
                         (ulong) data->number, (ulong) offset,
                         (ulong) uint3korr(page + 3), (ulong) data->number,
2522
                         (ulong) uint3korr(page),
unknown's avatar
unknown committed
2523
                         (ulong) page_no));
2524 2525
    DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
2526 2527
  flags= (uint)(page[TRANSLOG_PAGE_FLAGS]);
  this_page_page_overhead= page_overhead[flags];
2528 2529 2530
  if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
                TRANSLOG_RECORD_CRC))
  {
2531
    DBUG_PRINT("error", ("Page (%lu,0x%lx): "
2532
                         "Garbage in the page flags field detected : %x",
unknown's avatar
unknown committed
2533 2534
                         (ulong) data->number, (ulong) offset,
                         (uint) flags));
2535 2536
    DBUG_RETURN(1);
  }
unknown's avatar
unknown committed
2537
  page_pos= page + (3 + 3 + 1);
2538 2539
  if (flags & TRANSLOG_PAGE_CRC)
  {
unknown's avatar
unknown committed
2540 2541 2542 2543
    uint32 crc= translog_crc(page + this_page_page_overhead,
                             TRANSLOG_PAGE_SIZE -
                             this_page_page_overhead);
    if (crc != uint4korr(page_pos))
2544
    {
2545
      DBUG_PRINT("error", ("Page (%lu,0x%lx): "
2546
                           "CRC mismatch: calculated: %lx on the page %lx",
unknown's avatar
unknown committed
2547
                           (ulong) data->number, (ulong) offset,
unknown's avatar
unknown committed
2548
                           (ulong) crc, (ulong) uint4korr(page_pos)));
2549 2550
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
2551
    page_pos+= CRC_SIZE;                      /* Skip crc */
2552
  }
unknown's avatar
unknown committed
2553 2554
  if (flags & TRANSLOG_SECTOR_PROTECTION &&
      translog_check_sector_protection(page, data))
2555
  {
unknown's avatar
unknown committed
2556
    DBUG_RETURN(1);
2557 2558 2559 2560
  }
  DBUG_RETURN(0);
}

2561

unknown's avatar
unknown committed
2562 2563
/**
  @brief Locks the loghandler.
2564

unknown's avatar
unknown committed
2565
  @note See comment before buffer 'mutex' variable.
2566

unknown's avatar
unknown committed
2567 2568
  @retval 0 OK
  @retval 1 Error
2569 2570 2571 2572
*/

my_bool translog_lock()
{
2573
  uint8 current_buffer;
2574 2575 2576 2577 2578 2579 2580 2581
  DBUG_ENTER("translog_lock");

  /*
     Locking the loghandler mean locking current buffer, but it can change
     during locking, so we should check it
  */
  for (;;)
  {
2582 2583 2584 2585 2586 2587
    /*
      log_descriptor.bc.buffer_no is only one byte so its reading is
      an atomic operation
    */
    current_buffer= log_descriptor.bc.buffer_no;
    if (translog_buffer_lock(log_descriptor.buffers + current_buffer))
2588
      DBUG_RETURN(1);
2589
    if (log_descriptor.bc.buffer_no == current_buffer)
2590
      break;
2591
    translog_buffer_unlock(log_descriptor.buffers + current_buffer);
2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616
  }
  DBUG_RETURN(0);
}


/*
  Unlock the loghandler

  SYNOPSIS
    translog_unlock()

  RETURN
    0  OK
    1  Error
*/

my_bool translog_unlock()
{
  DBUG_ENTER("translog_unlock");
  translog_buffer_unlock(log_descriptor.bc.buffer);

  DBUG_RETURN(0);
}


2617 2618
/**
  @brief Get log page by file number and offset of the beginning of the page
2619

2620 2621
  @param data            validator data, which contains the page address
  @param buffer          buffer for page placing
2622
                         (might not be used in some cache implementations)
2623 2624
  @param direct_link     if it is not NULL then caller can accept direct
                         link to the page cache
2625

2626 2627
  @retval NULL Error
  @retval #    pointer to the page cache which should be used to read this page
2628 2629
*/

2630
static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer,
unknown's avatar
unknown committed
2631
                                PAGECACHE_BLOCK_LINK **direct_link)
2632
{
2633
  TRANSLOG_ADDRESS addr= *(data->addr), in_buffers;
2634
  uint32 file_no= LSN_FILE_NO(addr);
unknown's avatar
unknown committed
2635
  TRANSLOG_FILE *file;
2636
  DBUG_ENTER("translog_get_page");
unknown's avatar
unknown committed
2637
  DBUG_PRINT("enter", ("File: %lu  Offset: %lu(0x%lx)",
2638 2639 2640
                       (ulong) file_no,
                       (ulong) LSN_OFFSET(addr),
                       (ulong) LSN_OFFSET(addr)));
2641 2642

  /* it is really page address */
2643
  DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0);
2644

2645 2646 2647
  if (direct_link)
    *direct_link= NULL;

2648 2649
  in_buffers= translog_only_in_buffers();
  DBUG_PRINT("info", ("in_buffers: (%lu,0x%lx)",
unknown's avatar
unknown committed
2650
                      LSN_IN_PARTS(in_buffers)));
2651 2652 2653 2654 2655 2656 2657 2658 2659
  if (in_buffers != LSN_IMPOSSIBLE &&
      cmp_translog_addr(addr, in_buffers) >= 0)
  {
    translog_lock();
    /* recheck with locked loghandler */
    in_buffers= translog_only_in_buffers();
    if (cmp_translog_addr(addr, in_buffers) >= 0)
    {
      uint16 buffer_no= log_descriptor.bc.buffer_no;
2660
#ifndef DBUG_OFF
2661
      uint16 buffer_start= buffer_no;
2662
#endif
2663 2664 2665 2666 2667 2668
      struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
      struct st_translog_buffer *curr_buffer= log_descriptor.bc.buffer;
      for (;;)
      {
        /*
          if the page is in the buffer and it is the last version of the
2669
          page (in case of division the page by buffer flush)
2670
        */
unknown's avatar
unknown committed
2671
        if (curr_buffer->file != NULL &&
2672 2673 2674 2675 2676 2677 2678 2679
            cmp_translog_addr(addr, curr_buffer->offset) >= 0 &&
            cmp_translog_addr(addr,
                              (curr_buffer->next_buffer_offset ?
                               curr_buffer->next_buffer_offset:
                               curr_buffer->offset + curr_buffer->size)) < 0)
        {
          int is_last_unfinished_page;
          uint last_protected_sector= 0;
2680
          uchar *from, *table= NULL;
unknown's avatar
unknown committed
2681
          TRANSLOG_FILE file_copy;
2682 2683 2684 2685
          translog_wait_for_writers(curr_buffer);
          DBUG_ASSERT(LSN_FILE_NO(addr) ==  LSN_FILE_NO(curr_buffer->offset));
          from= curr_buffer->buffer + (addr - curr_buffer->offset);
          memcpy(buffer, from, TRANSLOG_PAGE_SIZE);
unknown's avatar
unknown committed
2686 2687 2688 2689 2690 2691 2692 2693 2694 2695
          /*
            We can use copy then in translog_page_validator() because it
            do not put it permanently somewhere.
            We have to use copy because after releasing log lock we can't
            guaranty that the file still be present (in real life it will be
            present but theoretically possible that it will be released
            already from last files cache);
          */
          file_copy= *(curr_buffer->file);
          file_copy.handler.callback_data= (uchar*) &file_copy;
2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706
          is_last_unfinished_page= ((log_descriptor.bc.buffer ==
                                     curr_buffer) &&
                                    (log_descriptor.bc.ptr >= from) &&
                                    (log_descriptor.bc.ptr <
                                     from + TRANSLOG_PAGE_SIZE));
          if (is_last_unfinished_page &&
              (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION))
          {
            last_protected_sector= ((log_descriptor.bc.previous_offset - 1) /
                                    DISK_DRIVE_SECTOR_SIZE);
            table= buffer + log_descriptor.page_overhead -
unknown's avatar
unknown committed
2707
              TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730
          }

          DBUG_ASSERT(buffer_unlock == curr_buffer);
          translog_buffer_unlock(buffer_unlock);
          if (is_last_unfinished_page)
          {
            uint i;
            /*
              This is last unfinished page => we should not check CRC and
              remove only that protection which already installed (no need
              to check it)

              We do not check the flag of sector protection, because if
              (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) is
              not set then last_protected_sector will be 0 so following loop
              will be never executed
            */
            DBUG_PRINT("info", ("This is last unfinished page, "
                                "last protected sector %u",
                                last_protected_sector));
            for (i= 1; i <= last_protected_sector; i++)
            {
              uint offset= i * DISK_DRIVE_SECTOR_SIZE;
unknown's avatar
unknown committed
2731 2732 2733 2734
              DBUG_PRINT("info", ("Sector %u: 0x%02x <- 0x%02x",
                                  i, buffer[offset],
                                  table[i]));
              buffer[offset]= table[i];
2735 2736 2737 2738 2739 2740 2741 2742
            }
          }
          else
          {
            /*
              This IF should be true because we use in-memory data which
              supposed to be correct.
            */
unknown's avatar
unknown committed
2743
            if (translog_page_validator((uchar*) buffer,
2744
                                        LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
unknown's avatar
unknown committed
2745
                                        (uchar*) &file_copy))
2746
            {
unknown's avatar
unknown committed
2747
              DBUG_ASSERT(0);
2748
              buffer= NULL;
2749
            }
2750 2751 2752 2753 2754 2755 2756 2757
          }
          DBUG_RETURN(buffer);
        }
        buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
        curr_buffer= log_descriptor.buffers + buffer_no;
        translog_buffer_lock(curr_buffer);
        translog_buffer_unlock(buffer_unlock);
        buffer_unlock= curr_buffer;
unknown's avatar
unknown committed
2758
        /* we can't make a full circle */
2759 2760 2761 2762 2763
        DBUG_ASSERT(buffer_start != buffer_no);
      }
    }
    translog_unlock();
  }
unknown's avatar
unknown committed
2764 2765
  file= get_logfile_by_number(file_no);
  buffer=
unknown's avatar
unknown committed
2766 2767 2768 2769 2770 2771 2772 2773
    (uchar*) pagecache_read(log_descriptor.pagecache, &file->handler,
                            LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
                            3, (direct_link ? NULL : (char*) buffer),
                            PAGECACHE_PLAIN_PAGE,
                            (direct_link ?
                             PAGECACHE_LOCK_READ :
                             PAGECACHE_LOCK_LEFT_UNLOCKED),
                            direct_link);
unknown's avatar
unknown committed
2774 2775 2776 2777
  DBUG_PRINT("info", ("Direct link is assigned to : 0x%lx * 0x%lx",
                      (ulong) direct_link,
                      (ulong)(direct_link ? *direct_link : NULL)));
  data->was_recovered= file->was_recovered;
2778 2779 2780
  DBUG_RETURN(buffer);
}

2781

2782 2783 2784 2785 2786 2787 2788
/**
  @brief free direct log page link

  @param direct_link the direct log page link to be freed

*/

unknown's avatar
unknown committed
2789
static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
2790 2791 2792 2793 2794 2795 2796
{
  DBUG_ENTER("translog_free_link");
  DBUG_PRINT("info", ("Direct link: 0x%lx",
                      (ulong) direct_link));
  if (direct_link)
    pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
                             PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
2797
                             LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0);
2798 2799
  DBUG_VOID_RETURN;
}
2800

2801

unknown's avatar
unknown committed
2802 2803
/**
  @brief Finds last full page of the given log file.
2804

unknown's avatar
unknown committed
2805
  @param addr            address structure to fill with data, which contain
2806
                         file number of the log file
unknown's avatar
unknown committed
2807 2808 2809
  @param last_page_ok    Result of the check whether last page OK.
                         (for now only we check only that file length
                         divisible on page length).
2810
  @param no_errors       suppress messages about non-critical errors
2811

unknown's avatar
unknown committed
2812 2813
  @retval 0 OK
  @retval 1 Error
2814 2815 2816
*/

static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
2817 2818
                                           my_bool *last_page_ok,
                                           my_bool no_errors)
2819
{
2820
  MY_STAT stat_buff, *local_stat;
2821
  char path[FN_REFLEN];
2822 2823
  uint32 rec_offset;
  uint32 file_no= LSN_FILE_NO(*addr);
2824 2825
  DBUG_ENTER("translog_get_last_page_addr");

2826
  if (!(local_stat= my_stat(translog_filename_by_fileno(file_no, path),
2827 2828
                            &stat_buff,
                            (no_errors ? MYF(0) : MYF(MY_WME)))))
2829
    DBUG_RETURN(1);
2830 2831
  DBUG_PRINT("info", ("File size: %lu", (ulong) local_stat->st_size));
  if (local_stat->st_size > TRANSLOG_PAGE_SIZE)
2832
  {
2833
    rec_offset= (((local_stat->st_size / TRANSLOG_PAGE_SIZE) - 1) *
2834
                       TRANSLOG_PAGE_SIZE);
2835
    *last_page_ok= (local_stat->st_size == rec_offset + TRANSLOG_PAGE_SIZE);
2836 2837 2838 2839
  }
  else
  {
    *last_page_ok= 0;
2840
    rec_offset= 0;
2841
  }
2842
  *addr= MAKE_LSN(file_no, rec_offset);
unknown's avatar
unknown committed
2843
  DBUG_PRINT("info", ("Last page: 0x%lx  ok: %d", (ulong) rec_offset,
2844 2845 2846 2847 2848
                      *last_page_ok));
  DBUG_RETURN(0);
}


unknown's avatar
unknown committed
2849 2850
/**
  @brief Get number bytes for record length storing
2851

unknown's avatar
unknown committed
2852
  @param length          Record length which will be encoded
2853

unknown's avatar
unknown committed
2854
  @return 1,3,4,5 - number of bytes to store given length
2855
*/
2856

2857 2858 2859 2860
static uint translog_variable_record_length_bytes(translog_size_t length)
{
  if (length < 250)
    return 1;
unknown's avatar
unknown committed
2861
  if (length < 0xFFFF)
2862
    return 3;
unknown's avatar
unknown committed
2863
  if (length < (ulong) 0xFFFFFF)
2864 2865 2866 2867 2868
    return 4;
  return 5;
}


unknown's avatar
unknown committed
2869
/**
unknown's avatar
unknown committed
2870
  @brief Gets header of this chunk.
2871

unknown's avatar
unknown committed
2872
  @param chunk           The pointer to the chunk beginning
2873

unknown's avatar
unknown committed
2874 2875
  @retval # total length of the chunk
  @retval 0 Error
2876 2877
*/

unknown's avatar
unknown committed
2878
static uint16 translog_get_chunk_header_length(uchar *chunk)
2879 2880
{
  DBUG_ENTER("translog_get_chunk_header_length");
unknown's avatar
unknown committed
2881
  switch (*chunk & TRANSLOG_CHUNK_TYPE) {
unknown's avatar
unknown committed
2882
  case TRANSLOG_CHUNK_LSN:
2883
  {
unknown's avatar
unknown committed
2884
    /* 0 chunk referred as LSN (head or tail) */
2885
    translog_size_t rec_len;
unknown's avatar
unknown committed
2886
    uchar *start= chunk;
unknown's avatar
unknown committed
2887
    uchar *ptr= start + 1 + 2;
2888 2889 2890 2891
    uint16 chunk_len, header_len;
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
    rec_len= translog_variable_record_1group_decode_len(&ptr);
    chunk_len= uint2korr(ptr);
unknown's avatar
unknown committed
2892 2893
    header_len= (ptr - start) +2;
    DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  header len: %u",
2894 2895 2896
                        (ulong) rec_len, (uint) chunk_len, (uint) header_len));
    if (chunk_len)
    {
unknown's avatar
unknown committed
2897
      /* TODO: fine header end */
unknown's avatar
unknown committed
2898 2899 2900 2901 2902
      /*
        The last chunk of multi-group record can be base for it header
        calculation (we skip to the first group to read the header) so if we
        stuck here something is wrong.
      */
2903
      DBUG_ASSERT(0);
unknown's avatar
unknown committed
2904
      DBUG_RETURN(0);                               /* Keep compiler happy */
2905 2906 2907
    }
    DBUG_RETURN(header_len);
  }
unknown's avatar
unknown committed
2908
  case TRANSLOG_CHUNK_FIXED:
2909
  {
unknown's avatar
unknown committed
2910
    /* 1 (pseudo)fixed record (also LSN) */
2911 2912 2913
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
    DBUG_RETURN(3);
  }
unknown's avatar
unknown committed
2914 2915
  case TRANSLOG_CHUNK_NOHDR:
    /* 2 no header chunk (till page end) */
2916 2917 2918
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
    DBUG_RETURN(1);
    break;
unknown's avatar
unknown committed
2919 2920
  case TRANSLOG_CHUNK_LNGTH:
    /* 3 chunk with chunk length */
2921 2922 2923 2924 2925
    DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
    DBUG_RETURN(3);
    break;
  default:
    DBUG_ASSERT(0);
unknown's avatar
unknown committed
2926
    DBUG_RETURN(0);                               /* Keep compiler happy */
2927 2928 2929 2930
  }
}


2931
/**
unknown's avatar
unknown committed
2932 2933
  @brief Truncate the log to the given address. Used during the startup if the
         end of log if corrupted.
2934 2935 2936 2937 2938 2939 2940 2941 2942

  @param addr            new horizon

  @retval 0 OK
  @retval 1 Error
*/

static my_bool translog_truncate_log(TRANSLOG_ADDRESS addr)
{
2943 2944
  uchar *page;
  TRANSLOG_ADDRESS current_page;
2945 2946 2947
  uint32 next_page_offset, page_rest;
  uint32 i;
  File fd;
2948 2949 2950
  TRANSLOG_VALIDATOR_DATA data;
  char path[FN_REFLEN];
  uchar page_buff[TRANSLOG_PAGE_SIZE];
2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971
  DBUG_ENTER("translog_truncate_log");
  /* TODO: write warning to the client */
  DBUG_PRINT("warning", ("removing all records from (%lx,0x%lx) "
                         "till (%lx,0x%lx)",
                         LSN_IN_PARTS(addr),
                         LSN_IN_PARTS(log_descriptor.horizon)));
  DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
  /* remove files between the address and horizon */
  for (i= LSN_FILE_NO(addr) + 1; i <= LSN_FILE_NO(log_descriptor.horizon); i++)
    if (my_delete(translog_filename_by_fileno(i, path),  MYF(MY_WME)))
    {
      translog_unlock();
      DBUG_RETURN(1);
    }

  /* truncate the last file up to the last page */
  next_page_offset= LSN_OFFSET(addr);
  next_page_offset= (next_page_offset -
                     ((next_page_offset - 1) % TRANSLOG_PAGE_SIZE + 1) +
                     TRANSLOG_PAGE_SIZE);
  page_rest= next_page_offset - LSN_OFFSET(addr);
2972
  memset(page_buff, TRANSLOG_FILLER, page_rest);
2973 2974
  if ((fd= open_logfile_by_number_no_cache(LSN_FILE_NO(addr))) < 0 ||
      my_chsize(fd, next_page_offset, TRANSLOG_FILLER, MYF(MY_WME)) ||
2975
      (page_rest && my_pwrite(fd, page_buff, page_rest, LSN_OFFSET(addr),
2976 2977
                              log_write_flags)) ||
      my_sync(fd, MYF(MY_WME)) ||
unknown's avatar
unknown committed
2978 2979 2980
      my_close(fd, MYF(MY_WME)) ||
      (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
       my_sync(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD))))
2981
    DBUG_RETURN(1);
unknown's avatar
unknown committed
2982

2983
  /* fix the horizon */
2984
  log_descriptor.horizon= addr;
2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998
  /* fix the buffer data */
  current_page= MAKE_LSN(LSN_FILE_NO(addr), (next_page_offset -
                                             TRANSLOG_PAGE_SIZE));
  data.addr= &current_page;
  if ((page= translog_get_page(&data, log_descriptor.buffers->buffer, NULL)) ==
      NULL)
    DBUG_RETURN(1);
  if (page != log_descriptor.buffers->buffer)
    memcpy(log_descriptor.buffers->buffer, page, TRANSLOG_PAGE_SIZE);
  log_descriptor.bc.buffer->offset= current_page;
  log_descriptor.bc.buffer->size= LSN_OFFSET(addr) - LSN_OFFSET(current_page);
  log_descriptor.bc.ptr=
    log_descriptor.buffers->buffer + log_descriptor.bc.buffer->size;
  log_descriptor.bc.current_page_fill= log_descriptor.bc.buffer->size;
2999 3000 3001
  DBUG_RETURN(0);
}

unknown's avatar
unknown committed
3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042

/**
  @brief Check log files presence

  @retval 0 no log files.
  @retval 1 there is at least 1 log file in the directory
*/

my_bool translog_is_log_files()
{
  MY_DIR *dirp;
  uint i;
  my_bool rc= FALSE;

  /* Finds and removes transaction log files */
  if (!(dirp = my_dir(log_descriptor.directory, MYF(MY_DONT_SORT))))
    return 1;

  for (i= 0; i < dirp->number_off_files; i++)
  {
    char *file= dirp->dir_entry[i].name;
    if (strncmp(file, "maria_log.", 10) == 0 &&
        file[10] >= '0' && file[10] <= '9' &&
        file[11] >= '0' && file[11] <= '9' &&
        file[12] >= '0' && file[12] <= '9' &&
        file[13] >= '0' && file[13] <= '9' &&
        file[14] >= '0' && file[14] <= '9' &&
        file[15] >= '0' && file[15] <= '9' &&
        file[16] >= '0' && file[16] <= '9' &&
        file[17] >= '0' && file[17] <= '9' &&
        file[18] == '\0')
    {
      rc= TRUE;
      break;
    }
  }
  my_dirend(dirp);
  return FALSE;
}


3043 3044 3045 3046 3047 3048 3049 3050 3051
/**
  @brief Initialize transaction log

  @param directory       Directory where log files are put
  @param log_file_max_size max size of one log size (for new logs creation)
  @param server_version  version of MySQL server (MYSQL_VERSION_ID)
  @param server_id       server ID (replication & Co)
  @param pagecache       Page cache for the log reads
  @param flags           flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
3052
                           TRANSLOG_RECORD_CRC)
3053 3054
  @param read_only       Put transaction log in read-only mode
  @param init_table_func function to initialize record descriptors table
3055
  @param no_errors       suppress messages about non-critical errors
3056

3057
  @todo
3058 3059
    Free used resources in case of error.

3060 3061
  @retval 0 OK
  @retval 1 Error
3062 3063
*/

3064 3065 3066 3067 3068
my_bool translog_init_with_table(const char *directory,
                                 uint32 log_file_max_size,
                                 uint32 server_version,
                                 uint32 server_id, PAGECACHE *pagecache,
                                 uint flags, my_bool readonly,
3069 3070
                                 void (*init_table_func)(),
                                 my_bool no_errors)
3071 3072 3073
{
  int i;
  int old_log_was_recovered= 0, logs_found= 0;
unknown's avatar
unknown committed
3074
  uint old_flags= flags;
unknown's avatar
unknown committed
3075
  uint32 start_file_num= 1;
3076
  TRANSLOG_ADDRESS sure_page, last_page, last_valid_page, checkpoint_lsn;
3077
  my_bool version_changed= 0;
unknown's avatar
unknown committed
3078 3079 3080 3081 3082 3083
  DBUG_ENTER("translog_init_with_table");

  id_to_share= NULL;

  (*init_table_func)();

3084 3085 3086
  if (readonly)
    log_descriptor.open_flags= O_BINARY | O_RDONLY;
  else
3087
    log_descriptor.open_flags= O_BINARY | O_RDWR;
3088
  if (pthread_mutex_init(&log_descriptor.sent_to_disk_lock,
3089 3090 3091 3092 3093
                         MY_MUTEX_INIT_FAST) ||
      pthread_mutex_init(&log_descriptor.file_header_lock,
                         MY_MUTEX_INIT_FAST) ||
      pthread_mutex_init(&log_descriptor.unfinished_files_lock,
                         MY_MUTEX_INIT_FAST) ||
3094 3095
      pthread_mutex_init(&log_descriptor.purger_lock,
                         MY_MUTEX_INIT_FAST) ||
3096 3097
      pthread_mutex_init(&log_descriptor.log_flush_lock,
                         MY_MUTEX_INIT_FAST) ||
unknown's avatar
unknown committed
3098 3099 3100 3101
      my_rwlock_init(&log_descriptor.open_files_lock,
                     NULL) ||
      my_init_dynamic_array(&log_descriptor.open_files,
                            sizeof(TRANSLOG_FILE*), 10, 10) ||
unknown's avatar
unknown committed
3102 3103 3104
      my_init_dynamic_array(&log_descriptor.unfinished_files,
                            sizeof(struct st_file_counter),
                            10, 10))
3105
    DBUG_RETURN(1);
3106
  log_descriptor.min_need_file= 0;
3107 3108
  log_descriptor.min_file_number= 0;
  log_descriptor.last_lsn_checked= LSN_IMPOSSIBLE;
3109 3110 3111 3112 3113 3114 3115

  /* Directory to store files */
  unpack_dirname(log_descriptor.directory, directory);

  if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
                                            O_RDONLY, MYF(MY_WME))) < 0)
  {
3116 3117
    my_errno= errno;
    DBUG_PRINT("error", ("Error %d during opening directory '%s'",
3118 3119 3120 3121
                         errno, log_descriptor.directory));
    DBUG_RETURN(1);
  }

3122
  log_descriptor.in_buffers_only= LSN_IMPOSSIBLE;
unknown's avatar
unknown committed
3123
  DBUG_ASSERT(log_file_max_size % TRANSLOG_PAGE_SIZE == 0 &&
unknown's avatar
unknown committed
3124
              log_file_max_size >= TRANSLOG_MIN_FILE_SIZE);
3125
  /* max size of one log size (for new logs creation) */
3126
  log_file_size= log_descriptor.log_file_max_size=
unknown's avatar
unknown committed
3127
    log_file_max_size;
3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138
  /* server version */
  log_descriptor.server_version= server_version;
  /* server ID */
  log_descriptor.server_id= server_id;
  /* Page cache for the log reads */
  log_descriptor.pagecache= pagecache;
  /* Flags */
  DBUG_ASSERT((flags &
               ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
                 TRANSLOG_RECORD_CRC)) == 0);
  log_descriptor.flags= flags;
unknown's avatar
unknown committed
3139 3140 3141 3142
  for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
  {
     page_overhead[i]= 7;
     if (i & TRANSLOG_PAGE_CRC)
unknown's avatar
unknown committed
3143
       page_overhead[i]+= CRC_SIZE;
unknown's avatar
unknown committed
3144
     if (i & TRANSLOG_SECTOR_PROTECTION)
unknown's avatar
unknown committed
3145 3146
       page_overhead[i]+= TRANSLOG_PAGE_SIZE /
                           DISK_DRIVE_SECTOR_SIZE;
unknown's avatar
unknown committed
3147 3148
  }
  log_descriptor.page_overhead= page_overhead[flags];
3149 3150
  log_descriptor.page_capacity_chunk_2=
    TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
unknown's avatar
unknown committed
3151
  compile_time_assert(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
3152 3153 3154 3155 3156 3157
  log_descriptor.buffer_capacity_chunk_2=
    (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
    log_descriptor.page_capacity_chunk_2;
  log_descriptor.half_buffer_capacity_chunk_2=
    log_descriptor.buffer_capacity_chunk_2 / 2;
  DBUG_PRINT("info",
unknown's avatar
unknown committed
3158
             ("Overhead: %u  pc2: %u  bc2: %u,  bc2/2: %u",
3159 3160 3161 3162 3163
              log_descriptor.page_overhead,
              log_descriptor.page_capacity_chunk_2,
              log_descriptor.buffer_capacity_chunk_2,
              log_descriptor.half_buffer_capacity_chunk_2));

unknown's avatar
unknown committed
3164 3165 3166 3167 3168 3169 3170 3171
  /* Just to init it somehow (hack for bootstrap)*/
  {
    TRANSLOG_FILE *file= 0;
    log_descriptor.min_file = log_descriptor.max_file= 1;
    insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
    translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
    pop_dynamic(&log_descriptor.open_files);
  }
3172 3173 3174 3175

  /* Buffers for log writing */
  for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
  {
unknown's avatar
unknown committed
3176 3177
    if (translog_buffer_init(log_descriptor.buffers + i))
      DBUG_RETURN(1);
3178 3179 3180
#ifndef DBUG_OFF
    log_descriptor.buffers[i].buffer_no= (uint8) i;
#endif
unknown's avatar
unknown committed
3181 3182
    DBUG_PRINT("info", ("translog_buffer buffer #%u: 0x%lx",
                        i, (ulong) log_descriptor.buffers + i));
3183 3184
  }

unknown's avatar
unknown committed
3185 3186 3187 3188
  /*
    last_logno and last_checkpoint_lsn were set in
    ma_control_file_create_or_open()
  */
unknown's avatar
unknown committed
3189
  logs_found= (last_logno != FILENO_IMPOSSIBLE);
3190

unknown's avatar
unknown committed
3191
  translog_status= (readonly ? TRANSLOG_READONLY : TRANSLOG_OK);
3192
  checkpoint_lsn= last_checkpoint_lsn;
unknown's avatar
unknown committed
3193

3194 3195 3196
  if (logs_found)
  {
    my_bool pageok;
unknown's avatar
unknown committed
3197
    DBUG_PRINT("info", ("log found..."));
3198
    /*
unknown's avatar
unknown committed
3199
      TODO: scan directory for maria_log.XXXXXXXX files and find
3200
       highest XXXXXXXX & set logs_found
unknown's avatar
unknown committed
3201
      TODO: check that last checkpoint within present log addresses space
3202

unknown's avatar
unknown committed
3203
      find the log end
unknown's avatar
unknown committed
3204
    */
unknown's avatar
unknown committed
3205
    if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE)
3206
    {
3207
      DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0);
3208 3209
      /* only last log needs to be checked */
      sure_page= MAKE_LSN(last_logno, TRANSLOG_PAGE_SIZE);
3210 3211 3212 3213
    }
    else
    {
      sure_page= last_checkpoint_lsn;
3214 3215
      DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0);
      sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE;
3216
    }
unknown's avatar
unknown committed
3217
    /* Set horizon to the beginning of the last file first */
unknown's avatar
unknown committed
3218
    log_descriptor.horizon= last_page= MAKE_LSN(last_logno, 0);
3219
    if (translog_get_last_page_addr(&last_page, &pageok, no_errors))
unknown's avatar
unknown committed
3220 3221 3222
    {
      if (!translog_is_log_files())
      {
3223 3224 3225 3226
        /*
          Files was deleted, just start from the next log number, so that
          existing tables are in the past.
        */
unknown's avatar
unknown committed
3227
        start_file_num= last_logno + 1;
3228
        checkpoint_lsn= LSN_IMPOSSIBLE; /* no log so no checkpoint */
unknown's avatar
unknown committed
3229 3230 3231 3232 3233 3234
        logs_found= 0;
      }
      else
        DBUG_RETURN(1);
    }
    else if (LSN_OFFSET(last_page) == 0)
3235
    {
3236
      if (LSN_FILE_NO(last_page) == 1)
3237 3238
      {
        logs_found= 0;                          /* file #1 has no pages */
unknown's avatar
unknown committed
3239
        DBUG_PRINT("info", ("log found. But is is empty => no log assumed"));
3240 3241 3242
      }
      else
      {
3243
        last_page-= LSN_ONE_FILE;
3244
        if (translog_get_last_page_addr(&last_page, &pageok, 0))
3245 3246 3247
          DBUG_RETURN(1);
      }
    }
unknown's avatar
unknown committed
3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293
    if (logs_found)
    {
      uint32 i;
      log_descriptor.min_file= translog_first_file(log_descriptor.horizon, 1);
      log_descriptor.max_file= last_logno;
      /* Open all files */
      if (allocate_dynamic(&log_descriptor.open_files,
                           log_descriptor.max_file -
                           log_descriptor.min_file + 1))
        DBUG_RETURN(1);
      for (i = log_descriptor.max_file; i >= log_descriptor.min_file; i--)
      {
        /*
          We can't allocate all file together because they will be freed
          one by one
        */
        TRANSLOG_FILE *file= (TRANSLOG_FILE *)my_malloc(sizeof(TRANSLOG_FILE),
                                                        MYF(0));
        if (file == NULL ||
            (file->handler.file=
             open_logfile_by_number_no_cache(i)) < 0)
        {
          int j;
          for (j= i - log_descriptor.min_file - 1; j > 0; j--)
          {
            TRANSLOG_FILE *el=
              *dynamic_element(&log_descriptor.open_files, j,
                               TRANSLOG_FILE **);
            my_close(el->handler.file, MYF(MY_WME));
            my_free(el, MYF(0));
          }
          if (file)
          {
            free(file);
            DBUG_RETURN(1);
          }
          else
            DBUG_RETURN(1);
        }
        translog_file_init(file, i, 1);
        /* we allocated space so it can't fail */
        insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
      }
      DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
                  log_descriptor.open_files.elements);
    }
3294
  }
3295 3296 3297 3298 3299 3300 3301
  else if (readonly)
  {
    /* There is no logs and there is read-only mode => nothing to read */
    DBUG_PRINT("error", ("No logs and read-only mode"));
    DBUG_RETURN(1);
  }

3302 3303 3304 3305 3306
  if (logs_found)
  {
    TRANSLOG_ADDRESS current_page= sure_page;
    my_bool pageok;

unknown's avatar
unknown committed
3307
    DBUG_PRINT("info", ("The log is really present"));
3308
    DBUG_ASSERT(sure_page <= last_page);
3309 3310 3311

    /* TODO: check page size */

unknown's avatar
unknown committed
3312
    last_valid_page= LSN_IMPOSSIBLE;
unknown's avatar
unknown committed
3313 3314 3315 3316 3317 3318
    /*
      Scans and validate pages. We need it to show "outside" only for sure
      valid part of the log. If the log was damaged then fixed we have to
      cut off damaged part before some other process start write something
      in the log.
    */
3319 3320 3321
    do
    {
      TRANSLOG_ADDRESS current_file_last_page;
3322
      current_file_last_page= current_page;
3323
      if (translog_get_last_page_addr(&current_file_last_page, &pageok, 0))
3324 3325 3326
        DBUG_RETURN(1);
      if (!pageok)
      {
3327 3328
        DBUG_PRINT("error", ("File %lu have no complete last page",
                             (ulong) LSN_FILE_NO(current_file_last_page)));
3329 3330 3331 3332 3333 3334 3335
        old_log_was_recovered= 1;
        /* This file is not written till the end so it should be last */
        last_page= current_file_last_page;
        /* TODO: issue warning */
      }
      do
      {
unknown's avatar
unknown committed
3336
        TRANSLOG_VALIDATOR_DATA data;
unknown's avatar
unknown committed
3337
        uchar buffer[TRANSLOG_PAGE_SIZE], *page;
unknown's avatar
unknown committed
3338
        data.addr= &current_page;
3339
        if ((page= translog_get_page(&data, buffer, NULL)) == NULL)
3340 3341 3342
          DBUG_RETURN(1);
        if (data.was_recovered)
        {
unknown's avatar
unknown committed
3343 3344 3345
          DBUG_PRINT("error", ("file no: %lu (%d)  "
                               "rec_offset: 0x%lx (%lu) (%d)",
                               (ulong) LSN_FILE_NO(current_page),
3346 3347 3348 3349
                               (uint3korr(page + 3) !=
                                LSN_FILE_NO(current_page)),
                               (ulong) LSN_OFFSET(current_page),
                               (ulong) (LSN_OFFSET(current_page) /
3350 3351
                                        TRANSLOG_PAGE_SIZE),
                               (uint3korr(page) !=
3352 3353
                                LSN_OFFSET(current_page) /
                                TRANSLOG_PAGE_SIZE)));
3354 3355 3356
          old_log_was_recovered= 1;
          break;
        }
unknown's avatar
unknown committed
3357
        old_flags= page[TRANSLOG_PAGE_FLAGS];
3358
        last_valid_page= current_page;
3359 3360 3361 3362 3363
        current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
      } while (current_page <= current_file_last_page);
      current_page+= LSN_ONE_FILE;
      current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
    } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
3364
             !old_log_was_recovered);
unknown's avatar
unknown committed
3365
    if (last_valid_page == LSN_IMPOSSIBLE)
3366 3367 3368 3369 3370
    {
      /* Panic!!! Even page which should be valid is invalid */
      /* TODO: issue error */
      DBUG_RETURN(1);
    }
unknown's avatar
unknown committed
3371 3372 3373 3374
    DBUG_PRINT("info", ("Last valid page is in file: %lu  "
                        "offset: %lu (0x%lx)  "
                        "Logs found: %d  was recovered: %d  "
                        "flags match: %d",
3375 3376 3377
                        (ulong) LSN_FILE_NO(last_valid_page),
                        (ulong) LSN_OFFSET(last_valid_page),
                        (ulong) LSN_OFFSET(last_valid_page),
unknown's avatar
unknown committed
3378 3379
                        logs_found, old_log_was_recovered,
                        (old_flags == flags)));
3380 3381

    /* TODO: check server ID */
unknown's avatar
unknown committed
3382
    if (logs_found && !old_log_was_recovered && old_flags == flags)
3383
    {
unknown's avatar
unknown committed
3384
      TRANSLOG_VALIDATOR_DATA data;
unknown's avatar
unknown committed
3385
      uchar buffer[TRANSLOG_PAGE_SIZE], *page;
3386
      uint16 chunk_offset;
unknown's avatar
unknown committed
3387
      data.addr= &last_valid_page;
3388
      /* continue old log */
3389 3390
      DBUG_ASSERT(LSN_FILE_NO(last_valid_page)==
                  LSN_FILE_NO(log_descriptor.horizon));
3391
      if ((page= translog_get_page(&data, buffer, NULL)) == NULL ||
3392 3393 3394 3395 3396 3397 3398
          (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
        DBUG_RETURN(1);

      /* Puts filled part of old page in the buffer */
      log_descriptor.horizon= last_valid_page;
      translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
      /*
unknown's avatar
unknown committed
3399 3400
         Free space if filled with TRANSLOG_FILLER and first uchar of
         real chunk can't be TRANSLOG_FILLER
3401
      */
unknown's avatar
unknown committed
3402 3403
      while (chunk_offset < TRANSLOG_PAGE_SIZE &&
             page[chunk_offset] != TRANSLOG_FILLER)
3404 3405 3406 3407 3408
      {
        uint16 chunk_length;
        if ((chunk_length=
             translog_get_total_chunk_length(page, chunk_offset)) == 0)
          DBUG_RETURN(1);
unknown's avatar
unknown committed
3409
        DBUG_PRINT("info", ("chunk: offset: %u  length: %u",
3410 3411 3412 3413 3414 3415
                            (uint) chunk_offset, (uint) chunk_length));
        chunk_offset+= chunk_length;

        /* chunk can't cross the page border */
        DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
      }
unknown's avatar
unknown committed
3416
      memcpy(log_descriptor.buffers->buffer, page, chunk_offset);
3417 3418
      log_descriptor.bc.buffer->size+= chunk_offset;
      log_descriptor.bc.ptr+= chunk_offset;
unknown's avatar
unknown committed
3419
      log_descriptor.bc.current_page_fill= chunk_offset;
3420 3421 3422
      log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
                                                 (chunk_offset +
                                                  LSN_OFFSET(last_valid_page)));
unknown's avatar
unknown committed
3423
      DBUG_PRINT("info", ("Move Page #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
3424 3425 3426 3427
                          (uint) log_descriptor.bc.buffer_no,
                          (ulong) log_descriptor.bc.buffer,
                          log_descriptor.bc.chaser,
                          (ulong) log_descriptor.bc.buffer->size,
unknown's avatar
unknown committed
3428
                          (ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
3429
                                   buffer->buffer)));
unknown's avatar
unknown committed
3430
      translog_check_cursor(&log_descriptor.bc);
3431
    }
3432 3433 3434
    if (!old_log_was_recovered && old_flags == flags)
    {
      LOGHANDLER_FILE_INFO info;
unknown's avatar
unknown committed
3435 3436 3437 3438 3439 3440 3441 3442 3443
      /*
        Accessing &log_descriptor.open_files without mutex is safe
        because it is initialization
      */
      if (translog_read_file_header(&info,
                                    (*dynamic_element(&log_descriptor.
                                                      open_files,
                                                      0, TRANSLOG_FILE **))->
                                    handler.file))
3444 3445 3446
        DBUG_RETURN(1);
      version_changed= (info.maria_version != TRANSLOG_VERSION_ID);
    }
3447
  }
unknown's avatar
unknown committed
3448
  DBUG_PRINT("info", ("Logs found: %d  was recovered: %d",
3449 3450 3451
                      logs_found, old_log_was_recovered));
  if (!logs_found)
  {
unknown's avatar
unknown committed
3452 3453
    TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(sizeof(TRANSLOG_FILE),
                                                   MYF(0));
unknown's avatar
unknown committed
3454
    DBUG_PRINT("info", ("The log is not found => we will create new log"));
unknown's avatar
unknown committed
3455 3456
    if (file == NULL)
       DBUG_RETURN(1);
3457
    /* Start new log system from scratch */
unknown's avatar
unknown committed
3458 3459 3460 3461 3462 3463 3464 3465 3466 3467
    log_descriptor.horizon= MAKE_LSN(start_file_num,
                                     TRANSLOG_PAGE_SIZE); /* header page */
    if ((file->handler.file=
         create_logfile_by_number_no_cache(start_file_num)) == -1)
      DBUG_RETURN(1);
    translog_file_init(file, start_file_num, 0);
    if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
      DBUG_RETURN(1);
    log_descriptor.min_file= log_descriptor.max_file= start_file_num;
    if (translog_write_file_header())
3468
      DBUG_RETURN(1);
unknown's avatar
unknown committed
3469 3470 3471
    DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
                log_descriptor.open_files.elements);

3472 3473
    if (ma_control_file_write_and_force(checkpoint_lsn, start_file_num,
                                        CONTROL_FILE_UPDATE_ALL))
3474 3475 3476 3477 3478
      DBUG_RETURN(1);
    /* assign buffer 0 */
    translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
    translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
  }
3479 3480
  else if ((old_log_was_recovered || old_flags != flags || version_changed) &&
           !readonly)
unknown's avatar
unknown committed
3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494
  {
    /* leave the damaged file untouched */
    log_descriptor.horizon+= LSN_ONE_FILE;
    /* header page */
    log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
                                               TRANSLOG_PAGE_SIZE);
    if (translog_create_new_file())
      DBUG_RETURN(1);
    /*
      Buffer system left untouched after recovery => we should init it
      (starting from buffer 0)
    */
    translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
    translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
3495 3496 3497
  }

  /* all LSNs that are on disk are flushed */
3498
  log_descriptor.sent_to_disk=
3499 3500
    log_descriptor.flushed= log_descriptor.horizon;
  log_descriptor.in_buffers_only= log_descriptor.bc.buffer->offset;
3501
  log_descriptor.max_lsn= LSN_IMPOSSIBLE; /* set to 0 */
3502
  log_descriptor.previous_flush_horizon= log_descriptor.horizon;
unknown's avatar
unknown committed
3503
  /*
unknown's avatar
unknown committed
3504 3505 3506 3507 3508 3509
    Now 'flushed' is set to 'horizon' value, but 'horizon' is (potentially)
    address of the next LSN and we want indicate that all LSNs that are
    already on the disk are flushed so we need decrease horizon on 1 (we are
    sure that there is no LSN on the disk which is greater then 'flushed'
    and there will not be LSN created that is equal or less then the value
    of the 'flushed').
unknown's avatar
unknown committed
3510
  */
3511
  log_descriptor.flushed--; /* offset decreased */
3512
  log_descriptor.sent_to_disk--; /* offset decreased */
3513 3514 3515 3516 3517
  /*
    Log records will refer to a MARIA_SHARE by a unique 2-byte id; set up
    structures for generating 2-byte ids:
  */
  my_atomic_rwlock_init(&LOCK_id_to_share);
unknown's avatar
unknown committed
3518 3519
  id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX * sizeof(MARIA_SHARE*),
                                          MYF(MY_WME | MY_ZEROFILL));
3520 3521 3522
  if (unlikely(!id_to_share))
    DBUG_RETURN(1);
  id_to_share--; /* min id is 1 */
3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535

  /* Check the last LSN record integrity */
  if (logs_found)
  {
    TRANSLOG_SCANNER_DATA scanner;
    TRANSLOG_ADDRESS page_addr;
    LSN last_lsn= LSN_IMPOSSIBLE;
    /*
      take very last page address and try to find LSN record on it
      if it fail take address of previous page and so on
    */
    page_addr= (log_descriptor.horizon -
                ((log_descriptor.horizon - 1) % TRANSLOG_PAGE_SIZE + 1));
unknown's avatar
unknown committed
3536
    if (translog_scanner_init(page_addr, 1, &scanner, 1))
3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598
      DBUG_RETURN(1);
    scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
    for (;;)
    {
      uint chunk_type;
      chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
      DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                          (uint) scanner.page[scanner.page_offset]));
      while (chunk_type != TRANSLOG_CHUNK_LSN &&
             chunk_type != TRANSLOG_CHUNK_FIXED &&
             scanner.page != END_OF_LOG &&
             scanner.page[scanner.page_offset] != TRANSLOG_FILLER &&
             scanner.page_addr == page_addr)
      {
        if (translog_get_next_chunk(&scanner))
        {
          translog_destroy_scanner(&scanner);
          DBUG_RETURN(1);
        }
        if (scanner.page != END_OF_LOG)
        {
          chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
          DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                              (uint) scanner.page[scanner.page_offset]));
        }
      }
      if (chunk_type == TRANSLOG_CHUNK_LSN ||
          chunk_type == TRANSLOG_CHUNK_FIXED)
      {
        last_lsn= scanner.page_addr + scanner.page_offset;
        if (translog_get_next_chunk(&scanner))
        {
          translog_destroy_scanner(&scanner);
          DBUG_RETURN(1);
        }
        if (scanner.page == END_OF_LOG)
          break; /* it was the last record */
        chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
        DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                            (uint) scanner.page[scanner.page_offset]));
        continue; /* try to find other record on this page */
      }

      if (last_lsn != LSN_IMPOSSIBLE)
        break; /* there is no more records on the page */

      /* We have to make step back */
      if (unlikely(LSN_OFFSET(page_addr) == TRANSLOG_PAGE_SIZE))
      {
        uint32 file_no= LSN_FILE_NO(page_addr);
        bool last_page_ok;
        /* it is beginning of the current file */
        if (unlikely(file_no == 1))
        {
          /*
            It is beginning of the log => there is no LSNs in the log =>
            There is no harm in leaving it "as-is".
          */
          DBUG_RETURN(0);
        }
        file_no--;
        page_addr= MAKE_LSN(file_no, TRANSLOG_PAGE_SIZE);
3599
        translog_get_last_page_addr(&page_addr, &last_page_ok, 0);
3600 3601 3602 3603 3604 3605 3606 3607
        /* page should be OK as it is not the last file */
        DBUG_ASSERT(last_page_ok);
      }
      else
      {
         page_addr-= TRANSLOG_PAGE_SIZE;
      }
      translog_destroy_scanner(&scanner);
unknown's avatar
unknown committed
3608
      if (translog_scanner_init(page_addr, 1, &scanner, 1))
3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630
        DBUG_RETURN(1);
      scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
    }
    translog_destroy_scanner(&scanner);

    /* Now scanner points to the last LSN chunk, lets check it */
    {
      TRANSLOG_HEADER_BUFFER rec;
      translog_size_t rec_len;
      int len;
      uchar buffer[1];
      DBUG_PRINT("info", ("going to check the last found record (%lu,0x%lx)",
                          LSN_IN_PARTS(last_lsn)));

      len=
        translog_read_record_header(last_lsn, &rec);
      if (unlikely (len == RECHEADER_READ_ERROR ||
                    len == RECHEADER_READ_EOF))
      {
        DBUG_PRINT("error", ("unexpected end of log or record during "
                             "reading record header: (%lu,0x%lx)  len: %d",
                             LSN_IN_PARTS(last_lsn), len));
3631 3632 3633
        if (readonly)
          log_descriptor.horizon= last_lsn;
        else if (translog_truncate_log(last_lsn))
3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652
          DBUG_RETURN(1);
      }
      else
      {
        DBUG_ASSERT(last_lsn == rec.lsn);
        if (likely(rec.record_length != 0))
        {
          /*
            Reading the last byte of record will trigger scanning all
            record chunks for now
          */
          rec_len= translog_read_record(rec.lsn, rec.record_length - 1, 1,
                                        buffer, NULL);
          if (rec_len != 1)
          {
            DBUG_PRINT("error", ("unexpected end of log or record during "
                                 "reading record body: (%lu,0x%lx)  len: %d",
                                 LSN_IN_PARTS(rec.lsn),
                                 len));
3653 3654 3655
            if (readonly)
              log_descriptor.horizon= last_lsn;
            else if (translog_truncate_log(last_lsn))
3656 3657 3658 3659 3660 3661 3662
              DBUG_RETURN(1);
          }
        }
      }
    }
  }

3663 3664 3665 3666 3667
  DBUG_RETURN(0);
}


/*
unknown's avatar
unknown committed
3668
  @brief Free transaction log file buffer.
3669

unknown's avatar
unknown committed
3670
  @param buffer_no       The buffer to free
3671 3672 3673 3674 3675 3676
*/

static void translog_buffer_destroy(struct st_translog_buffer *buffer)
{
  DBUG_ENTER("translog_buffer_destroy");
  DBUG_PRINT("enter",
unknown's avatar
unknown committed
3677
             ("Buffer #%u: 0x%lx  file: %d  offset: (%lu,0x%lx)  size: %lu",
3678
              (uint) buffer->buffer_no, (ulong) buffer,
unknown's avatar
unknown committed
3679
              (buffer->file ? buffer->file->handler.file : -1),
unknown's avatar
unknown committed
3680
              LSN_IN_PARTS(buffer->offset),
3681
              (ulong) buffer->size));
unknown's avatar
unknown committed
3682
  if (buffer->file != NULL)
3683 3684
  {
    /*
unknown's avatar
unknown committed
3685
       We ignore errors here, because we can't do something about it
3686 3687
       (it is shutting down)
    */
3688
    translog_buffer_lock(buffer);
3689
    translog_buffer_flush(buffer);
3690
    translog_buffer_unlock(buffer);
3691
  }
unknown's avatar
unknown committed
3692
  DBUG_PRINT("info", ("Destroy mutex: 0x%lx", (ulong) &buffer->mutex));
3693
  pthread_mutex_destroy(&buffer->mutex);
unknown's avatar
unknown committed
3694
  pthread_cond_destroy(&buffer->waiting_filling_buffer);
3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707
  DBUG_VOID_RETURN;
}


/*
  Free log handler resources

  SYNOPSIS
    translog_destroy()
*/

void translog_destroy()
{
unknown's avatar
unknown committed
3708
  TRANSLOG_FILE **file;
unknown's avatar
unknown committed
3709
  uint i;
3710
  DBUG_ENTER("translog_destroy");
3711

unknown's avatar
unknown committed
3712 3713
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
3714
  translog_lock();
unknown's avatar
unknown committed
3715 3716 3717
  translog_status= (translog_status == TRANSLOG_READONLY ?
                    TRANSLOG_UNINITED :
                    TRANSLOG_SHUTDOWN);
unknown's avatar
unknown committed
3718
  if (log_descriptor.bc.buffer->file != NULL)
3719 3720
    translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);
  translog_unlock();
3721

3722 3723 3724 3725 3726
  for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
  {
    struct st_translog_buffer *buffer= log_descriptor.buffers + i;
    translog_buffer_destroy(buffer);
  }
unknown's avatar
unknown committed
3727
  translog_status= TRANSLOG_UNINITED;
3728

3729
  /* close files */
unknown's avatar
unknown committed
3730 3731
  while ((file= (TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files)))
    translog_close_log_file(*file);
3732 3733 3734 3735 3736
  pthread_mutex_destroy(&log_descriptor.sent_to_disk_lock);
  pthread_mutex_destroy(&log_descriptor.file_header_lock);
  pthread_mutex_destroy(&log_descriptor.unfinished_files_lock);
  pthread_mutex_destroy(&log_descriptor.purger_lock);
  pthread_mutex_destroy(&log_descriptor.log_flush_lock);
unknown's avatar
unknown committed
3737 3738
  rwlock_destroy(&log_descriptor.open_files_lock);
  delete_dynamic(&log_descriptor.open_files);
3739 3740 3741 3742
  delete_dynamic(&log_descriptor.unfinished_files);

  my_close(log_descriptor.directory_fd, MYF(MY_WME));
  my_atomic_rwlock_destroy(&LOCK_id_to_share);
3743 3744
  if (id_to_share != NULL)
    my_free((uchar*)(id_to_share + 1), MYF(MY_WME));
3745 3746 3747 3748 3749
  DBUG_VOID_RETURN;
}


/*
unknown's avatar
unknown committed
3750
  @brief Starts new page.
3751

unknown's avatar
unknown committed
3752 3753 3754 3755
  @param horizon         \ Position in file and buffer where we are
  @param cursor          /
  @param prev_buffer     Buffer which should be flushed will be assigned here.
                         This is always set (to NULL if nothing to flush).
3756

unknown's avatar
unknown committed
3757 3758 3759
  @note We do not want to flush the buffer immediately because we want to
  let caller of this function first advance 'horizon' pointer and unlock the
  loghandler and only then flush the log which can take some time.
3760

unknown's avatar
unknown committed
3761 3762
  @retval 0 OK
  @retval 1 Error
3763 3764 3765 3766 3767 3768 3769 3770 3771
*/

static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
                                  struct st_buffer_cursor *cursor,
                                  struct st_translog_buffer **prev_buffer)
{
  struct st_translog_buffer *buffer= cursor->buffer;
  DBUG_ENTER("translog_page_next");

unknown's avatar
unknown committed
3772
  if ((cursor->ptr + TRANSLOG_PAGE_SIZE >
3773
       cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
3774 3775
      (LSN_OFFSET(*horizon) >
       log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE))
3776
  {
unknown's avatar
unknown committed
3777 3778
    DBUG_PRINT("info", ("Switch to next buffer  Buffer Size: %lu (%lu) => %d  "
                        "File size: %lu  max: %lu => %d",
3779
                        (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3780
                        (ulong) (cursor->ptr - cursor->buffer->buffer),
3781
                        (cursor->ptr + TRANSLOG_PAGE_SIZE >
3782
                         cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
3783
                        (ulong) LSN_OFFSET(*horizon),
3784
                        (ulong) log_descriptor.log_file_max_size,
3785 3786 3787
                        (LSN_OFFSET(*horizon) >
                         (log_descriptor.log_file_max_size -
                          TRANSLOG_PAGE_SIZE))));
3788
    if (translog_buffer_next(horizon, cursor,
3789 3790 3791
                             LSN_OFFSET(*horizon) >
                             (log_descriptor.log_file_max_size -
                              TRANSLOG_PAGE_SIZE)))
3792 3793
      DBUG_RETURN(1);
    *prev_buffer= buffer;
unknown's avatar
unknown committed
3794
    DBUG_PRINT("info", ("Buffer #%u (0x%lu): have to be flushed",
3795 3796 3797 3798
                        (uint) buffer->buffer_no, (ulong) buffer));
  }
  else
  {
unknown's avatar
unknown committed
3799 3800
    DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu): "
                        "Buffer Size: %lu (%lu)",
3801 3802 3803
                        (uint) buffer->buffer_no,
                        (ulong) buffer,
                        (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3804
                        (ulong) (cursor->ptr - cursor->buffer->buffer)));
3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823
    translog_finish_page(horizon, cursor);
    translog_new_page_header(horizon, cursor);
    *prev_buffer= NULL;
  }
  DBUG_RETURN(0);
}


/*
  Write data of given length to the current page

  SYNOPSIS
    translog_write_data_on_page()
    horizon              \ Pointers on file and buffer
    cursor               /
    length               IN     length of the chunk
    buffer               buffer with data

  RETURN
unknown's avatar
unknown committed
3824 3825
    0  OK
    1  Error
3826 3827
*/

3828 3829 3830
static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
                                           struct st_buffer_cursor *cursor,
                                           translog_size_t length,
unknown's avatar
unknown committed
3831
                                           uchar *buffer)
3832 3833
{
  DBUG_ENTER("translog_write_data_on_page");
unknown's avatar
unknown committed
3834 3835
  DBUG_PRINT("enter", ("Chunk length: %lu  Page size %u",
                       (ulong) length, (uint) cursor->current_page_fill));
3836
  DBUG_ASSERT(length > 0);
unknown's avatar
unknown committed
3837
  DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
unknown's avatar
unknown committed
3838
  DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
3839 3840
              TRANSLOG_WRITE_BUFFER);

unknown's avatar
unknown committed
3841
  memcpy(cursor->ptr, buffer, length);
3842
  cursor->ptr+= length;
unknown's avatar
unknown committed
3843 3844
  (*horizon)+= length; /* adds offset */
  cursor->current_page_fill+= length;
3845 3846
  if (!cursor->chaser)
    cursor->buffer->size+= length;
unknown's avatar
unknown committed
3847 3848
  DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx  "
                      "chaser: %d  Size: %lu (%lu)",
3849 3850
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3851
                      (ulong) (cursor->ptr - cursor->buffer->buffer)));
unknown's avatar
unknown committed
3852
  translog_check_cursor(cursor);
3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868

  DBUG_RETURN(0);
}


/*
  Write data from parts of given length to the current page

  SYNOPSIS
    translog_write_parts_on_page()
    horizon              \ Pointers on file and buffer
    cursor               /
    length               IN     length of the chunk
    parts                IN/OUT chunk source

  RETURN
unknown's avatar
unknown committed
3869 3870
    0  OK
    1  Error
3871 3872
*/

3873 3874 3875 3876
static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
                                            struct st_buffer_cursor *cursor,
                                            translog_size_t length,
                                            struct st_translog_parts *parts)
3877 3878 3879 3880
{
  translog_size_t left= length;
  uint cur= (uint) parts->current;
  DBUG_ENTER("translog_write_parts_on_page");
unknown's avatar
unknown committed
3881
  DBUG_PRINT("enter", ("Chunk length: %lu  parts: %u of %u. Page size: %u  "
3882 3883
                       "Buffer size: %lu (%lu)",
                       (ulong) length,
3884
                       (uint) (cur + 1), (uint) parts->elements,
unknown's avatar
unknown committed
3885
                       (uint) cursor->current_page_fill,
3886
                       (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3887
                       (ulong) (cursor->ptr - cursor->buffer->buffer)));
3888
  DBUG_ASSERT(length > 0);
unknown's avatar
unknown committed
3889
  DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
unknown's avatar
unknown committed
3890
  DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
3891 3892 3893 3894 3895
              TRANSLOG_WRITE_BUFFER);

  do
  {
    translog_size_t len;
3896
    LEX_STRING *part;
unknown's avatar
unknown committed
3897
    uchar *buff;
3898

3899 3900
    DBUG_ASSERT(cur < parts->elements);
    part= parts->parts + cur;
unknown's avatar
unknown committed
3901
    buff= (uchar*) part->str;
3902 3903 3904
    DBUG_PRINT("info", ("Part: %u  Length: %lu  left: %lu  buff: 0x%lx",
                        (uint) (cur + 1), (ulong) part->length, (ulong) left,
                        (ulong) buff));
3905

3906
    if (part->length > left)
3907 3908 3909
    {
      /* we should write less then the current part */
      len= left;
3910 3911
      part->length-= len;
      part->str+= len;
unknown's avatar
unknown committed
3912
      DBUG_PRINT("info", ("Set new part: %u  Length: %lu",
3913
                          (uint) (cur + 1), (ulong) part->length));
3914 3915 3916
    }
    else
    {
3917
      len= part->length;
3918 3919 3920 3921 3922
      cur++;
      DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
    }
    DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx  %u",
                        (ulong) cursor->ptr, (ulong)buff, (uint)len));
3923 3924 3925 3926 3927 3928
    if (likely(len))
    {
      memcpy(cursor->ptr, buff, len);
      left-= len;
      cursor->ptr+= len;
    }
3929 3930
  } while (left);

unknown's avatar
unknown committed
3931
  DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)  Length %lu(0x%lx)",
unknown's avatar
unknown committed
3932
                      LSN_IN_PARTS(*horizon),
3933
                      (ulong) length, (ulong) length));
3934
  parts->current= cur;
unknown's avatar
unknown committed
3935 3936
  (*horizon)+= length; /* offset increasing */
  cursor->current_page_fill+= length;
3937 3938
  if (!cursor->chaser)
    cursor->buffer->size+= length;
unknown's avatar
unknown committed
3939 3940 3941 3942
  /*
    We do not not updating parts->total_record_length here because it is
    need only before writing record to have total length
  */
3943 3944
  DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx  "
                      "chaser: %d  Size: %lu (%lu)  "
unknown's avatar
unknown committed
3945
                      "Horizon: (%lu,0x%lx)  buff offset: 0x%lx",
3946 3947
                      (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
                      cursor->chaser, (ulong) cursor->buffer->size,
unknown's avatar
unknown committed
3948
                      (ulong) (cursor->ptr - cursor->buffer->buffer),
unknown's avatar
unknown committed
3949
                      LSN_IN_PARTS(*horizon),
3950 3951
                      (ulong) (LSN_OFFSET(cursor->buffer->offset) +
                               cursor->buffer->size)));
unknown's avatar
unknown committed
3952
  translog_check_cursor(cursor);
3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963

  DBUG_RETURN(0);
}


/*
  Put 1 group chunk type 0 header into parts array

  SYNOPSIS
    translog_write_variable_record_1group_header()
    parts                Descriptor of record source parts
unknown's avatar
unknown committed
3964
    type                 The log record type
3965
    short_trid           Short transaction ID or 0 if it has no sense
3966 3967 3968 3969 3970 3971 3972 3973 3974
    header_length        Calculated header length of chunk type 0
    chunk0_header        Buffer for the chunk header writing
*/

static void
translog_write_variable_record_1group_header(struct st_translog_parts *parts,
                                             enum translog_record_type type,
                                             SHORT_TRANSACTION_ID short_trid,
                                             uint16 header_length,
unknown's avatar
unknown committed
3975
                                             uchar *chunk0_header)
3976
{
3977
  LEX_STRING *part;
unknown's avatar
unknown committed
3978
  DBUG_ASSERT(parts->current != 0);     /* first part is left for header */
3979 3980 3981
  part= parts->parts + (--parts->current);
  parts->total_record_length+= (part->length= header_length);
  part->str= (char*)chunk0_header;
unknown's avatar
unknown committed
3982
  /* puts chunk type */
unknown's avatar
unknown committed
3983
  *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
3984
  int2store(chunk0_header + 1, short_trid);
unknown's avatar
unknown committed
3985
  /* puts record length */
3986 3987 3988
  translog_write_variable_record_1group_code_len(chunk0_header + 3,
                                                 parts->record_length,
                                                 header_length);
unknown's avatar
unknown committed
3989
  /* puts 0 as chunk length which indicate 1 group record */
3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001
  int2store(chunk0_header + header_length - 2, 0);
}


/*
  Increase number of writers for this buffer

  SYNOPSIS
    translog_buffer_increase_writers()
    buffer               target buffer
*/

unknown's avatar
unknown committed
4002 4003
static inline void
translog_buffer_increase_writers(struct st_translog_buffer *buffer)
4004 4005
{
  DBUG_ENTER("translog_buffer_increase_writers");
unknown's avatar
unknown committed
4006
  translog_buffer_lock_assert_owner(buffer);
4007
  buffer->copy_to_buffer_in_progress++;
4008
  DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u  0x%lx  progress: %d",
4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026
                      (uint) buffer->buffer_no, (ulong) buffer,
                      buffer->copy_to_buffer_in_progress));
  DBUG_VOID_RETURN;
}


/*
  Decrease number of writers for this buffer

  SYNOPSIS
    translog_buffer_decrease_writers()
    buffer               target buffer
*/


static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
{
  DBUG_ENTER("translog_buffer_decrease_writers");
unknown's avatar
unknown committed
4027
  translog_buffer_lock_assert_owner(buffer);
4028
  buffer->copy_to_buffer_in_progress--;
4029 4030 4031 4032
  DBUG_PRINT("info",
             ("copy_to_buffer_in_progress. Buffer #%u  0x%lx  progress: %d",
              (uint) buffer->buffer_no, (ulong) buffer,
              buffer->copy_to_buffer_in_progress));
unknown's avatar
unknown committed
4033 4034
  if (buffer->copy_to_buffer_in_progress == 0)
    pthread_cond_broadcast(&buffer->waiting_filling_buffer);
4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048
  DBUG_VOID_RETURN;
}


/*
  Put chunk 2 from new page beginning

  SYNOPSIS
    translog_write_variable_record_chunk2_page()
    parts                Descriptor of record source parts
    horizon              \ Pointers on file position and buffer
    cursor               /

  RETURN
unknown's avatar
unknown committed
4049 4050
    0  OK
    1  Error
4051 4052 4053 4054 4055 4056 4057
*/

static my_bool
translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
                                           TRANSLOG_ADDRESS *horizon,
                                           struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
4058
  struct st_translog_buffer *buffer_to_flush;
4059
  int rc;
unknown's avatar
unknown committed
4060
  uchar chunk2_header[1];
4061
  DBUG_ENTER("translog_write_variable_record_chunk2_page");
unknown's avatar
unknown committed
4062
  chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
4063

unknown's avatar
unknown committed
4064
  LINT_INIT(buffer_to_flush);
4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076
  rc= translog_page_next(horizon, cursor, &buffer_to_flush);
  if (buffer_to_flush != NULL)
  {
    rc|= translog_buffer_lock(buffer_to_flush);
    translog_buffer_decrease_writers(buffer_to_flush);
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }
  if (rc)
    DBUG_RETURN(1);

unknown's avatar
unknown committed
4077
  /* Puts chunk type */
4078
  translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
unknown's avatar
unknown committed
4079
  /* Puts chunk body */
4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096
  translog_write_parts_on_page(horizon, cursor,
                               log_descriptor.page_capacity_chunk_2, parts);
  DBUG_RETURN(0);
}


/*
  Put chunk 3 of requested length in the buffer from new page beginning

  SYNOPSIS
    translog_write_variable_record_chunk3_page()
    parts                Descriptor of record source parts
    length               Length of this chunk
    horizon              \ Pointers on file position and buffer
    cursor               /

  RETURN
unknown's avatar
unknown committed
4097 4098
    0  OK
    1  Error
4099 4100 4101 4102 4103 4104 4105 4106
*/

static my_bool
translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
                                           uint16 length,
                                           TRANSLOG_ADDRESS *horizon,
                                           struct st_buffer_cursor *cursor)
{
unknown's avatar
unknown committed
4107
  struct st_translog_buffer *buffer_to_flush;
4108
  LEX_STRING *part;
4109
  int rc;
unknown's avatar
unknown committed
4110
  uchar chunk3_header[1 + 2];
4111 4112
  DBUG_ENTER("translog_write_variable_record_chunk3_page");

unknown's avatar
unknown committed
4113
  LINT_INIT(buffer_to_flush);
4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131
  rc= translog_page_next(horizon, cursor, &buffer_to_flush);
  if (buffer_to_flush != NULL)
  {
    rc|= translog_buffer_lock(buffer_to_flush);
    translog_buffer_decrease_writers(buffer_to_flush);
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }
  if (rc)
    DBUG_RETURN(1);
  if (length == 0)
  {
    /* It was call to write page header only (no data for chunk 3) */
    DBUG_PRINT("info", ("It is a call to make page header only"));
    DBUG_RETURN(0);
  }

unknown's avatar
unknown committed
4132
  DBUG_ASSERT(parts->current != 0);       /* first part is left for header */
4133 4134 4135
  part= parts->parts + (--parts->current);
  parts->total_record_length+= (part->length= 1 + 2);
  part->str= (char*)chunk3_header;
unknown's avatar
unknown committed
4136
  /* Puts chunk type */
unknown's avatar
unknown committed
4137
  *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH);
unknown's avatar
unknown committed
4138
  /* Puts chunk length */
4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154
  int2store(chunk3_header + 1, length);

  translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
  DBUG_RETURN(0);
}

/*
  Move log pointer (horizon) on given number pages starting from next page,
  and given offset on the last page

  SYNOPSIS
    translog_advance_pointer()
    pages                Number of full pages starting from the next one
    last_page_data       Plus this data on the last page

  RETURN
unknown's avatar
unknown committed
4155 4156
    0  OK
    1  Error
4157 4158 4159 4160
*/

static my_bool translog_advance_pointer(uint pages, uint16 last_page_data)
{
unknown's avatar
unknown committed
4161 4162
  translog_size_t last_page_offset= (log_descriptor.page_overhead +
                                     last_page_data);
4163
  translog_size_t offset= (TRANSLOG_PAGE_SIZE -
unknown's avatar
unknown committed
4164
                           log_descriptor.bc.current_page_fill +
4165
                           pages * TRANSLOG_PAGE_SIZE + last_page_offset);
4166 4167
  translog_size_t buffer_end_offset, file_end_offset, min_offset;
  DBUG_ENTER("translog_advance_pointer");
4168
  DBUG_PRINT("enter", ("Pointer:  (%lu, 0x%lx) + %u + %u pages + %u + %u",
unknown's avatar
unknown committed
4169
                       LSN_IN_PARTS(log_descriptor.horizon),
4170
                       (uint) (TRANSLOG_PAGE_SIZE -
unknown's avatar
unknown committed
4171
                               log_descriptor.bc.current_page_fill),
4172 4173
                       pages, (uint) log_descriptor.page_overhead,
                       (uint) last_page_data));
4174
  translog_lock_assert_owner();
4175

unknown's avatar
unknown committed
4176 4177 4178 4179 4180 4181 4182 4183
  /*
    The loop will be executed 1-3 times. Usually we advance the
    pointer to fill only the current buffer (if we have more then 1/2 of
    buffer free or 2 buffers (rest of current and all next). In case of
    really huge record end where we write last group with "table of
    content" of all groups and ignore buffer borders we can occupy
    3 buffers.
  */
4184 4185
  for (;;)
  {
unknown's avatar
unknown committed
4186
    uint8 new_buffer_no;
4187 4188 4189
    struct st_translog_buffer *new_buffer;
    struct st_translog_buffer *old_buffer;
    buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
unknown's avatar
unknown committed
4190 4191 4192
    file_end_offset= (log_descriptor.log_file_max_size -
                      LSN_OFFSET(log_descriptor.horizon));
    DBUG_PRINT("info", ("offset: %lu  buffer_end_offs: %lu, "
4193 4194 4195 4196 4197 4198 4199 4200
                        "file_end_offs:  %lu",
                        (ulong) offset, (ulong) buffer_end_offset,
                        (ulong) file_end_offset));
    DBUG_PRINT("info", ("Buff #%u %u (0x%lx) offset 0x%lx + size 0x%lx = "
                        "0x%lx (0x%lx)",
                        (uint) log_descriptor.bc.buffer->buffer_no,
                        (uint) log_descriptor.bc.buffer_no,
                        (ulong) log_descriptor.bc.buffer,
4201
                        (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset),
4202
                        (ulong) log_descriptor.bc.buffer->size,
4203
                        (ulong) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
4204
                                 log_descriptor.bc.buffer->size),
4205 4206
                        (ulong) LSN_OFFSET(log_descriptor.horizon)));
    DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
4207
                log_descriptor.bc.buffer->size ==
4208
                LSN_OFFSET(log_descriptor.horizon));
4209 4210 4211 4212 4213 4214 4215 4216 4217 4218

    if (offset <= buffer_end_offset && offset <= file_end_offset)
      break;
    old_buffer= log_descriptor.bc.buffer;
    new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
    new_buffer= log_descriptor.buffers + new_buffer_no;

    translog_buffer_lock(new_buffer);
    translog_wait_for_buffer_free(new_buffer);

unknown's avatar
unknown committed
4219
    min_offset= min(buffer_end_offset, file_end_offset);
unknown's avatar
unknown committed
4220
    /* TODO: check is it ptr or size enough */
4221
    log_descriptor.bc.buffer->size+= min_offset;
4222
    log_descriptor.bc.ptr+= min_offset;
unknown's avatar
unknown committed
4223
    DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx  chaser: %d  Size: %lu (%lu)",
4224 4225 4226 4227 4228 4229
                        (uint) log_descriptor.bc.buffer->buffer_no,
                        (ulong) log_descriptor.bc.buffer,
                        log_descriptor.bc.chaser,
                        (ulong) log_descriptor.bc.buffer->size,
                        (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
                                 buffer->buffer)));
unknown's avatar
unknown committed
4230 4231
    DBUG_ASSERT((ulong) (log_descriptor.bc.ptr -
                         log_descriptor.bc.buffer->buffer) ==
4232 4233 4234 4235 4236 4237 4238
                log_descriptor.bc.buffer->size);
    DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
                log_descriptor.bc.buffer_no);
    translog_buffer_increase_writers(log_descriptor.bc.buffer);

    if (file_end_offset <= buffer_end_offset)
    {
4239 4240 4241
      log_descriptor.horizon+= LSN_ONE_FILE;
      log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
                                                 TRANSLOG_PAGE_SIZE);
unknown's avatar
unknown committed
4242
      DBUG_PRINT("info", ("New file: %lu",
4243
                          (ulong) LSN_FILE_NO(log_descriptor.horizon)));
4244 4245 4246 4247 4248 4249 4250 4251
      if (translog_create_new_file())
      {
        DBUG_RETURN(1);
      }
    }
    else
    {
      DBUG_PRINT("info", ("The same file"));
4252
      log_descriptor.horizon+= min_offset; /* offset increasing */
4253 4254
    }
    translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
4255
    old_buffer->next_buffer_offset= new_buffer->offset;
4256 4257 4258 4259 4260 4261 4262
    if (translog_buffer_unlock(old_buffer))
      DBUG_RETURN(1);
    offset-= min_offset;
  }
  log_descriptor.bc.ptr+= offset;
  log_descriptor.bc.buffer->size+= offset;
  translog_buffer_increase_writers(log_descriptor.bc.buffer);
4263
  log_descriptor.horizon+= offset; /* offset increasing */
unknown's avatar
unknown committed
4264
  log_descriptor.bc.current_page_fill= last_page_offset;
4265 4266 4267
  DBUG_PRINT("info", ("drop write_counter"));
  log_descriptor.bc.write_counter= 0;
  log_descriptor.bc.previous_offset= 0;
unknown's avatar
unknown committed
4268 4269
  DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx  chaser: %d  Size: %lu (%lu)  "
                      "offset: %u  last page: %u",
4270 4271 4272 4273
                      (uint) log_descriptor.bc.buffer->buffer_no,
                      (ulong) log_descriptor.bc.buffer,
                      log_descriptor.bc.chaser,
                      (ulong) log_descriptor.bc.buffer->size,
unknown's avatar
unknown committed
4274 4275
                      (ulong) (log_descriptor.bc.ptr -
                               log_descriptor.bc.buffer->
4276 4277 4278
                               buffer), (uint) offset,
                      (uint) last_page_offset));
  DBUG_PRINT("info",
4279
             ("pointer moved to: (%lu, 0x%lx)",
unknown's avatar
unknown committed
4280
              LSN_IN_PARTS(log_descriptor.horizon)));
unknown's avatar
unknown committed
4281
  translog_check_cursor(&log_descriptor.bc);
4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299
  log_descriptor.bc.protected= 0;
  DBUG_RETURN(0);
}



/*
  Get page rest

  SYNOPSIS
    translog_get_current_page_rest()

  NOTE loghandler should be locked

  RETURN
    number of bytes left on the current page
*/

unknown's avatar
unknown committed
4300 4301 4302 4303 4304
static uint translog_get_current_page_rest()
{
  return (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill);
}

4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317

/*
  Get buffer rest in full pages

  SYNOPSIS
     translog_get_current_buffer_rest()

  NOTE loghandler should be locked

  RETURN
    number of full pages left on the current buffer
*/

unknown's avatar
unknown committed
4318 4319 4320 4321 4322 4323
static uint translog_get_current_buffer_rest()
{
  return ((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER -
           log_descriptor.bc.ptr) /
          TRANSLOG_PAGE_SIZE);
}
4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341

/*
  Calculate possible group size without first (current) page

  SYNOPSIS
    translog_get_current_group_size()

  NOTE loghandler should be locked

  RETURN
    group size without first (current) page
*/

static translog_size_t translog_get_current_group_size()
{
  /* buffer rest in full pages */
  translog_size_t buffer_rest= translog_get_current_buffer_rest();
  DBUG_ENTER("translog_get_current_group_size");
unknown's avatar
unknown committed
4342
  DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest));
4343 4344 4345 4346 4347

  buffer_rest*= log_descriptor.page_capacity_chunk_2;
  /* in case of only half of buffer free we can write this and next buffer */
  if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
  {
unknown's avatar
unknown committed
4348 4349
    DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu",
                        (ulong) buffer_rest,
4350 4351 4352 4353
                        (ulong) log_descriptor.buffer_capacity_chunk_2));
    buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
  }

unknown's avatar
unknown committed
4354
  DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest));
4355 4356 4357 4358 4359

  DBUG_RETURN(buffer_rest);
}


unknown's avatar
unknown committed
4360 4361
/**
   @brief Write variable record in 1 group.
4362

unknown's avatar
unknown committed
4363 4364 4365 4366 4367 4368 4369 4370 4371 4372
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  buffer_to_flush Buffer which have to be flushed if it is not 0
   @param  header_length   Calculated header length of chunk type 0
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
4373

unknown's avatar
unknown committed
4374 4375 4376
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4377 4378 4379 4380 4381
*/

static my_bool
translog_write_variable_record_1group(LSN *lsn,
                                      enum translog_record_type type,
unknown's avatar
unknown committed
4382
                                      MARIA_HA *tbl_info,
4383 4384 4385 4386
                                      SHORT_TRANSACTION_ID short_trid,
                                      struct st_translog_parts *parts,
                                      struct st_translog_buffer
                                      *buffer_to_flush, uint16 header_length,
unknown's avatar
unknown committed
4387
                                      TRN *trn, void *hook_arg)
4388 4389 4390 4391 4392 4393 4394
{
  TRANSLOG_ADDRESS horizon;
  struct st_buffer_cursor cursor;
  int rc= 0;
  uint i;
  translog_size_t record_rest, full_pages, first_page;
  uint additional_chunk3_page= 0;
unknown's avatar
unknown committed
4395
  uchar chunk0_header[1 + 2 + 5 + 2];
4396
  DBUG_ENTER("translog_write_variable_record_1group");
unknown's avatar
unknown committed
4397
  translog_lock_assert_owner();
4398 4399

  *lsn= horizon= log_descriptor.horizon;
4400 4401 4402 4403
  if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
                             *lsn, TRUE) ||
      (log_record_type_descriptor[type].inwrite_hook &&
       (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
unknown's avatar
unknown committed
4404
                                                        lsn, hook_arg)))
4405
  {
unknown's avatar
unknown committed
4406
    translog_unlock();
4407 4408 4409 4410 4411
    DBUG_RETURN(1);
  }
  cursor= log_descriptor.bc;
  cursor.chaser= 1;

unknown's avatar
unknown committed
4412
  /* Advance pointer to be able unlock the loghandler */
4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425
  first_page= translog_get_current_page_rest();
  record_rest= parts->record_length - (first_page - header_length);
  full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
  record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);

  if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
  {
    DBUG_PRINT("info", ("2 chunks type 3 is needed"));
    /* We will write 2 chunks type 3 at the end of this group */
    additional_chunk3_page= 1;
    record_rest= 1;
  }

unknown's avatar
unknown committed
4426 4427
  DBUG_PRINT("info", ("first_page: %u (%u)  full_pages: %u (%lu)  "
                      "additional: %u (%u)  rest %u = %u",
4428 4429 4430 4431 4432 4433 4434 4435 4436
                      first_page, first_page - header_length,
                      full_pages,
                      (ulong) full_pages *
                      log_descriptor.page_capacity_chunk_2,
                      additional_chunk3_page,
                      additional_chunk3_page *
                      (log_descriptor.page_capacity_chunk_2 - 1),
                      record_rest, parts->record_length));
  /* record_rest + 3 is chunk type 3 overhead + record_rest */
unknown's avatar
unknown committed
4437 4438
  rc|= translog_advance_pointer(full_pages + additional_chunk3_page,
                                (record_rest ? record_rest + 3 : 0));
4439 4440 4441 4442 4443
  log_descriptor.bc.buffer->last_lsn= *lsn;

  rc|= translog_unlock();

  /*
unknown's avatar
unknown committed
4444
     Check if we switched buffer and need process it (current buffer is
4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462
     unlocked already => we will not delay other threads
  */
  if (buffer_to_flush != NULL)
  {
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }
  if (rc)
    DBUG_RETURN(1);

  translog_write_variable_record_1group_header(parts, type, short_trid,
                                               header_length, chunk0_header);

  /* fill the pages */
  translog_write_parts_on_page(&horizon, &cursor, first_page, parts);


unknown's avatar
unknown committed
4463
  DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
4464 4465
                      LSN_IN_PARTS(log_descriptor.horizon),
                      LSN_IN_PARTS(horizon)));
4466 4467 4468 4469 4470 4471

  for (i= 0; i < full_pages; i++)
  {
    if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
      DBUG_RETURN(1);

unknown's avatar
unknown committed
4472
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
4473 4474
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon)));
4475 4476 4477 4478 4479 4480 4481 4482 4483
  }

  if (additional_chunk3_page)
  {
    if (translog_write_variable_record_chunk3_page(parts,
                                                   log_descriptor.
                                                   page_capacity_chunk_2 - 2,
                                                   &horizon, &cursor))
      DBUG_RETURN(1);
unknown's avatar
unknown committed
4484
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
4485 4486
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon)));
unknown's avatar
unknown committed
4487
    DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE);
4488 4489 4490 4491 4492 4493
  }

  if (translog_write_variable_record_chunk3_page(parts,
                                                 record_rest,
                                                 &horizon, &cursor))
    DBUG_RETURN(1);
unknown's avatar
unknown committed
4494
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
4495 4496 4497 4498
                        (ulong) LSN_FILE_NO(log_descriptor.horizon),
                        (ulong) LSN_OFFSET(log_descriptor.horizon),
                        (ulong) LSN_FILE_NO(horizon),
                        (ulong) LSN_OFFSET(horizon)));
4499

unknown's avatar
unknown committed
4500
  if (!(rc= translog_buffer_lock(cursor.buffer)))
4501 4502 4503 4504 4505 4506 4507 4508
  {
    translog_buffer_decrease_writers(cursor.buffer);
  }
  rc|= translog_buffer_unlock(cursor.buffer);
  DBUG_RETURN(rc);
}


unknown's avatar
unknown committed
4509 4510
/**
   @brief Write variable record in 1 chunk.
4511

unknown's avatar
unknown committed
4512 4513 4514 4515 4516 4517 4518 4519 4520 4521
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  buffer_to_flush Buffer which have to be flushed if it is not 0
   @param  header_length   Calculated header length of chunk type 0
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
4522

unknown's avatar
unknown committed
4523 4524 4525
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4526 4527 4528 4529 4530
*/

static my_bool
translog_write_variable_record_1chunk(LSN *lsn,
                                      enum translog_record_type type,
unknown's avatar
unknown committed
4531
                                      MARIA_HA *tbl_info,
4532 4533 4534 4535
                                      SHORT_TRANSACTION_ID short_trid,
                                      struct st_translog_parts *parts,
                                      struct st_translog_buffer
                                      *buffer_to_flush, uint16 header_length,
unknown's avatar
unknown committed
4536
                                      TRN *trn, void *hook_arg)
4537 4538
{
  int rc;
unknown's avatar
unknown committed
4539
  uchar chunk0_header[1 + 2 + 5 + 2];
4540
  DBUG_ENTER("translog_write_variable_record_1chunk");
unknown's avatar
unknown committed
4541
  translog_lock_assert_owner();
4542 4543 4544 4545 4546

  translog_write_variable_record_1group_header(parts, type, short_trid,
                                               header_length, chunk0_header);

  *lsn= log_descriptor.horizon;
4547 4548 4549 4550
  if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
                                 *lsn, TRUE) ||
      (log_record_type_descriptor[type].inwrite_hook &&
       (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
unknown's avatar
unknown committed
4551
                                                        lsn, hook_arg)))
4552
  {
unknown's avatar
unknown committed
4553
    translog_unlock();
4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578
    DBUG_RETURN(1);
  }

  rc= translog_write_parts_on_page(&log_descriptor.horizon,
                                   &log_descriptor.bc,
                                   parts->total_record_length, parts);
  log_descriptor.bc.buffer->last_lsn= *lsn;
  rc|= translog_unlock();

  /*
     check if we switched buffer and need process it (current buffer is
     unlocked already => we will not delay other threads
  */
  if (buffer_to_flush != NULL)
  {
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }

  DBUG_RETURN(rc);
}


/*
unknown's avatar
unknown committed
4579
  @brief Calculates and write LSN difference (compressed LSN).
4580

unknown's avatar
unknown committed
4581 4582 4583
  @param base_lsn        LSN from which we calculate difference
  @param lsn             LSN for codding
  @param dst             Result will be written to dst[-pack_length] .. dst[-1]
4584

unknown's avatar
unknown committed
4585 4586
  @note To store an LSN in a compact way we will use the following compression:
    If a log record has LSN1, and it contains the LSN2 as a back reference,
unknown's avatar
unknown committed
4587
    Instead of LSN2 we write LSN1-LSN2, encoded as:
4588 4589 4590
     two bits     the number N (see below)
     14 bits
     N bytes
unknown's avatar
unknown committed
4591
     That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
4592 4593
     is stored in the first two bits.

unknown's avatar
unknown committed
4594 4595 4596
  @note function made to write the result in backward direction with no
  special sense or tricks both directions are equal in complicity

unknown's avatar
unknown committed
4597
  @retval #    pointer on coded LSN
4598 4599
*/

unknown's avatar
unknown committed
4600
static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst)
4601
{
unknown's avatar
unknown committed
4602
  uint64 diff;
4603
  DBUG_ENTER("translog_put_LSN_diff");
unknown's avatar
unknown committed
4604
  DBUG_PRINT("enter", ("Base: (0x%lu,0x%lx)  val: (0x%lu,0x%lx)  dst: 0x%lx",
unknown's avatar
unknown committed
4605 4606
                       LSN_IN_PARTS(base_lsn), LSN_IN_PARTS(lsn),
                       (ulong) dst));
unknown's avatar
unknown committed
4607 4608 4609 4610
  DBUG_ASSERT(base_lsn > lsn);
  diff= base_lsn - lsn;
  DBUG_PRINT("info", ("Diff: 0x%llx", (ulonglong) diff));
  if (diff <= 0x3FFF)
4611
  {
unknown's avatar
unknown committed
4612 4613 4614 4615 4616 4617 4618
    dst-= 2;
    /*
      Note we store this high uchar first to ensure that first uchar has
      0 in the 3 upper bits.
    */
    dst[0]= diff >> 8;
    dst[1]= (diff & 0xFF);
4619
  }
unknown's avatar
unknown committed
4620
  else if (diff <= 0x3FFFFFL)
4621
  {
unknown's avatar
unknown committed
4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632
    dst-= 3;
    dst[0]= 0x40 | (diff >> 16);
    int2store(dst + 1, diff & 0xFFFF);
  }
  else if (diff <= 0x3FFFFFFFL)
  {
    dst-= 4;
    dst[0]= 0x80 | (diff >> 24);
    int3store(dst + 1, diff & 0xFFFFFFL);
  }
  else if (diff <= LL(0x3FFFFFFFFF))
unknown's avatar
unknown committed
4633

unknown's avatar
unknown committed
4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648
  {
    dst-= 5;
    dst[0]= 0xC0 | (diff >> 32);
    int4store(dst + 1, diff & 0xFFFFFFFFL);
  }
  else
  {
    /*
      It is full LSN after special 1 diff (which is impossible
      in real life)
    */
    dst-= 2 + LSN_STORE_SIZE;
    dst[0]= 0;
    dst[1]= 1;
    lsn_store(dst + 2, lsn);
4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664
  }
  DBUG_PRINT("info", ("new dst: 0x%lx", (ulong) dst));
  DBUG_RETURN(dst);
}


/*
  Get LSN from LSN-difference (compressed LSN)

  SYNOPSIS
    translog_get_LSN_from_diff()
    base_lsn             LSN from which we calculate difference
    src                  pointer to coded lsn
    dst                  pointer to buffer where to write 7byte LSN

  NOTE:
unknown's avatar
unknown committed
4665
    To store an LSN in a compact way we will use the following compression:
4666 4667

    If a log record has LSN1, and it contains the lSN2 as a back reference,
unknown's avatar
unknown committed
4668
    Instead of LSN2 we write LSN1-LSN2, encoded as:
4669 4670 4671 4672 4673

     two bits     the number N (see below)
     14 bits
     N bytes

unknown's avatar
unknown committed
4674 4675
    That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
    is stored in the first two bits.
4676 4677 4678 4679 4680

  RETURN
    pointer to buffer after decoded LSN
*/

unknown's avatar
unknown committed
4681
static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
4682 4683 4684 4685
{
  LSN lsn;
  uint32 diff;
  uint32 first_byte;
unknown's avatar
unknown committed
4686
  uint32 file_no, rec_offset;
4687 4688
  uint8 code;
  DBUG_ENTER("translog_get_LSN_from_diff");
unknown's avatar
unknown committed
4689
  DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx)  src: 0x%lx  dst 0x%lx",
unknown's avatar
unknown committed
4690
                       LSN_IN_PARTS(base_lsn), (ulong) src, (ulong) dst));
4691
  first_byte= *((uint8*) src);
4692
  code= first_byte >> 6; /* Length is in 2 most significant bits */
unknown's avatar
unknown committed
4693 4694 4695 4696 4697
  first_byte&= 0x3F;
  src++;                                        /* Skip length + encode */
  file_no= LSN_FILE_NO(base_lsn);               /* Assume relative */
  DBUG_PRINT("info", ("code: %u  first byte: %lu",
                      (uint) code, (ulong) first_byte));
4698
  switch (code) {
unknown's avatar
unknown committed
4699
  case 0:
4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710
    if (first_byte == 0 && *((uint8*)src) == 1)
    {
      /*
        It is full LSN after special 1 diff (which is impossible
        in real life)
      */
      memcpy(dst, src + 1, LSN_STORE_SIZE);
      DBUG_PRINT("info", ("Special case of full LSN, new src: 0x%lx",
                          (ulong) (src + 1 + LSN_STORE_SIZE)));
      DBUG_RETURN(src + 1 + LSN_STORE_SIZE);
    }
unknown's avatar
unknown committed
4711
    rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src));
4712
    break;
unknown's avatar
unknown committed
4713 4714 4715
  case 1:
    diff= uint2korr(src);
    rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff);
4716
    break;
unknown's avatar
unknown committed
4717 4718 4719
  case 2:
    diff= uint3korr(src);
    rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff);
4720
    break;
unknown's avatar
unknown committed
4721
  case 3:
4722
  {
4723
    ulonglong base_offset= LSN_OFFSET(base_lsn);
unknown's avatar
unknown committed
4724
    diff= uint4korr(src);
4725
    if (diff > LSN_OFFSET(base_lsn))
4726 4727 4728
    {
      /* take 1 from file offset */
      first_byte++;
unknown's avatar
unknown committed
4729
      base_offset+= LL(0x100000000);
4730
    }
unknown's avatar
unknown committed
4731 4732
    file_no= LSN_FILE_NO(base_lsn) - first_byte;
    rec_offset= base_offset - diff;
4733 4734 4735 4736 4737 4738
    break;
  }
  default:
    DBUG_ASSERT(0);
    DBUG_RETURN(NULL);
  }
unknown's avatar
unknown committed
4739 4740 4741
  lsn= MAKE_LSN(file_no, rec_offset);
  src+= code + 1;
  lsn_store(dst, lsn);
4742
  DBUG_PRINT("info", ("new src: 0x%lx", (ulong) src));
4743 4744 4745 4746
  DBUG_RETURN(src);
}


unknown's avatar
unknown committed
4747 4748
/**
  @brief Encodes relative LSNs listed in the parameters.
4749

unknown's avatar
unknown committed
4750 4751 4752 4753
  @param parts           Parts list with encoded LSN(s)
  @param base_lsn        LSN which is base for encoding
  @param lsns            number of LSN(s) to encode
  @param compressed_LSNs buffer which can be used for storing compressed LSN(s)
4754 4755
*/

unknown's avatar
unknown committed
4756 4757 4758
static void  translog_relative_LSN_encode(struct st_translog_parts *parts,
                                          LSN base_lsn,
                                          uint lsns, uchar *compressed_LSNs)
4759
{
4760
  LEX_STRING *part;
unknown's avatar
unknown committed
4761
  uint lsns_len= lsns * LSN_STORE_SIZE;
4762 4763
  char buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
  char *buffer= buffer_src;
4764 4765 4766

  DBUG_ENTER("translog_relative_LSN_encode");

4767
  DBUG_ASSERT(parts->current != 0);
4768
  part= parts->parts + parts->current;
4769

4770
  /* collect all LSN(s) in one chunk if it (they) is (are) divided */
4771
  if (part->length < lsns_len)
4772
  {
4773 4774
    uint copied= part->length;
    LEX_STRING *next_part;
unknown's avatar
unknown committed
4775
    DBUG_PRINT("info", ("Using buffer: 0x%lx", (ulong) compressed_LSNs));
unknown's avatar
unknown committed
4776
    memcpy(buffer, (uchar*)part->str, part->length);
4777
    next_part= parts->parts + parts->current + 1;
4778 4779
    do
    {
4780 4781
      DBUG_ASSERT(next_part < parts->parts + parts->elements);
      if ((next_part->length + copied) < lsns_len)
4782
      {
unknown's avatar
unknown committed
4783
        memcpy(buffer + copied, (uchar*)next_part->str,
4784 4785 4786 4787 4788
               next_part->length);
        copied+= next_part->length;
        next_part->length= 0; next_part->str= 0;
        /* delete_dynamic_element(&parts->parts, parts->current + 1); */
        next_part++;
4789 4790
        parts->current++;
        part= parts->parts + parts->current;
4791 4792 4793 4794
      }
      else
      {
        uint len= lsns_len - copied;
unknown's avatar
unknown committed
4795
        memcpy(buffer + copied, (uchar*)next_part->str, len);
4796
        copied= lsns_len;
4797 4798
        next_part->str+= len;
        next_part->length-= len;
4799 4800 4801
      }
    } while (copied < lsns_len);
  }
4802 4803 4804 4805 4806 4807 4808 4809 4810
  else
  {
    buffer= part->str;
    part->str+= lsns_len;
    part->length-= lsns_len;
    parts->current--;
    part= parts->parts + parts->current;
  }

4811 4812 4813
  {
    /* Compress */
    LSN ref;
4814
    int economy;
unknown's avatar
unknown committed
4815 4816
    uchar *src_ptr;
    uchar *dst_ptr= compressed_LSNs + (MAX_NUMBER_OF_LSNS_PER_RECORD *
4817
                                      COMPRESSED_LSN_MAX_STORE_SIZE);
unknown's avatar
unknown committed
4818 4819 4820 4821
    /*
      We write the result in backward direction with no special sense or
      tricks both directions are equal in complicity
    */
4822
    for (src_ptr= buffer + lsns_len - LSN_STORE_SIZE;
4823
         src_ptr >= (uchar*) buffer;
4824
         src_ptr-= LSN_STORE_SIZE)
4825
    {
4826
      ref= lsn_korr(src_ptr);
unknown's avatar
unknown committed
4827
      dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr);
4828
    }
4829 4830 4831 4832 4833
    part->length= (uint)((compressed_LSNs +
                          (MAX_NUMBER_OF_LSNS_PER_RECORD *
                           COMPRESSED_LSN_MAX_STORE_SIZE)) -
                         dst_ptr);
    parts->record_length-= (economy= lsns_len - part->length);
4834 4835
    DBUG_PRINT("info", ("new length of LSNs: %lu  economy: %d",
                        (ulong)part->length, economy));
4836
    parts->total_record_length-= economy;
4837
    part->str= (char*)dst_ptr;
4838
  }
unknown's avatar
unknown committed
4839
  DBUG_VOID_RETURN;
4840 4841 4842
}


unknown's avatar
unknown committed
4843 4844
/**
   @brief Write multi-group variable-size record.
4845

unknown's avatar
unknown committed
4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  buffer_to_flush Buffer which have to be flushed if it is not 0
   @param  header_length   Header length calculated for 1 group
   @param  buffer_rest     Beginning from which we plan to write in full pages
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
4857

unknown's avatar
unknown committed
4858 4859 4860
   @return Operation status
     @retval 0      OK
     @retval 1      Error
4861 4862 4863 4864 4865
*/

static my_bool
translog_write_variable_record_mgroup(LSN *lsn,
                                      enum translog_record_type type,
unknown's avatar
unknown committed
4866
                                      MARIA_HA *tbl_info,
4867 4868 4869 4870 4871 4872
                                      SHORT_TRANSACTION_ID short_trid,
                                      struct st_translog_parts *parts,
                                      struct st_translog_buffer
                                      *buffer_to_flush,
                                      uint16 header_length,
                                      translog_size_t buffer_rest,
unknown's avatar
unknown committed
4873
                                      TRN *trn, void *hook_arg)
4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887
{
  TRANSLOG_ADDRESS horizon;
  struct st_buffer_cursor cursor;
  int rc= 0;
  uint i, chunk2_page, full_pages;
  uint curr_group= 0;
  translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
  translog_size_t done= 0;
  struct st_translog_group_descriptor group;
  DYNAMIC_ARRAY groups;
  uint16 chunk3_size;
  uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
  uint16 last_page_capacity;
  my_bool new_page_before_chunk0= 1, first_chunk0= 1;
unknown's avatar
unknown committed
4888 4889
  uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
  uchar chunk2_header[1];
4890 4891
  uint header_fixed_part= header_length + 2;
  uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
4892
  uint file_of_the_first_group;
4893
  DBUG_ENTER("translog_write_variable_record_mgroup");
unknown's avatar
unknown committed
4894
  translog_lock_assert_owner();
4895

unknown's avatar
unknown committed
4896 4897
  chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;

unknown's avatar
unknown committed
4898 4899 4900
  if (my_init_dynamic_array(&groups,
                            sizeof(struct st_translog_group_descriptor),
                            10, 10))
4901
  {
unknown's avatar
unknown committed
4902
    translog_unlock();
4903
    DBUG_PRINT("error", ("init array failed"));
4904 4905 4906 4907 4908 4909 4910 4911 4912
    DBUG_RETURN(1);
  }

  first_page= translog_get_current_page_rest();
  record_rest= parts->record_length - (first_page - 1);
  DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));

  if (record_rest < buffer_rest)
  {
unknown's avatar
unknown committed
4913 4914 4915 4916 4917 4918 4919
    /*
      The record (group 1 type) is larger than the free space on the page
      - we need to split it in two. But when we split it in two, the first
      part is big enough to hold all the data of the record (because the
      header of the first part of the split is smaller than the header of
      the record as a whole when it takes only one chunk)
    */
4920 4921 4922 4923 4924
    DBUG_PRINT("info", ("too many free space because changing header"));
    buffer_rest-= log_descriptor.page_capacity_chunk_2;
    DBUG_ASSERT(record_rest >= buffer_rest);
  }

4925 4926
  file_of_the_first_group= LSN_FILE_NO(log_descriptor.horizon);
  translog_mark_file_unfinished(file_of_the_first_group);
4927 4928 4929 4930 4931 4932 4933
  do
  {
    group.addr= horizon= log_descriptor.horizon;
    cursor= log_descriptor.bc;
    cursor.chaser= 1;
    if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
    {
unknown's avatar
unknown committed
4934
      /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */
4935 4936 4937 4938 4939
      full_pages= 255;
      buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
    }
    /*
       group chunks =
unknown's avatar
unknown committed
4940
       full pages + first page (which actually can be full, too).
4941 4942 4943
       But here we assign number of chunks - 1
    */
    group.num= full_pages;
unknown's avatar
unknown committed
4944
    if (insert_dynamic(&groups, (uchar*) &group))
4945
    {
4946
      DBUG_PRINT("error", ("insert into array failed"));
unknown's avatar
unknown committed
4947
      goto err_unlock;
4948 4949
    }

unknown's avatar
unknown committed
4950 4951
    DBUG_PRINT("info", ("chunk: #%u  first_page: %u (%u)  "
                        "full_pages: %lu (%lu)  "
4952 4953 4954
                        "Left %lu",
                        groups.elements,
                        first_page, first_page - 1,
4955
                        (ulong) full_pages,
unknown's avatar
unknown committed
4956 4957 4958 4959 4960 4961
                        (ulong) (full_pages *
                                 log_descriptor.page_capacity_chunk_2),
                        (ulong)(parts->record_length - (first_page - 1 +
                                                        buffer_rest) -
                                done)));
    rc|= translog_advance_pointer(full_pages, 0);
4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975

    rc|= translog_unlock();

    if (buffer_to_flush != NULL)
    {
      rc|= translog_buffer_lock(buffer_to_flush);
      translog_buffer_decrease_writers(buffer_to_flush);
      if (!rc)
        rc= translog_buffer_flush(buffer_to_flush);
      rc|= translog_buffer_unlock(buffer_to_flush);
      buffer_to_flush= NULL;
    }
    if (rc)
    {
4976
      DBUG_PRINT("error", ("flush of unlock buffer failed"));
unknown's avatar
unknown committed
4977
      goto err;
4978 4979 4980 4981
    }

    translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
    translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
unknown's avatar
unknown committed
4982 4983
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)  "
                        "Left  %lu",
unknown's avatar
unknown committed
4984 4985
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
4986 4987 4988 4989 4990 4991
                        (ulong) (parts->record_length - (first_page - 1) -
                                 done)));

    for (i= 0; i < full_pages; i++)
    {
      if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
unknown's avatar
unknown committed
4992
        goto err;
4993

unknown's avatar
unknown committed
4994 4995
      DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  "
                          "local: (%lu,0x%lx)  "
4996
                          "Left: %lu",
unknown's avatar
unknown committed
4997 4998
                          LSN_IN_PARTS(log_descriptor.horizon),
                          LSN_IN_PARTS(horizon),
4999 5000 5001 5002 5003 5004 5005
                          (ulong) (parts->record_length - (first_page - 1) -
                                   i * log_descriptor.page_capacity_chunk_2 -
                                   done)));
    }

    done+= (first_page - 1 + buffer_rest);

unknown's avatar
unknown committed
5006
    /* TODO: make separate function for following */
5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018
    rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
    if (buffer_to_flush != NULL)
    {
      rc|= translog_buffer_lock(buffer_to_flush);
      translog_buffer_decrease_writers(buffer_to_flush);
      if (!rc)
        rc= translog_buffer_flush(buffer_to_flush);
      rc|= translog_buffer_unlock(buffer_to_flush);
      buffer_to_flush= NULL;
    }
    if (rc)
    {
5019
      DBUG_PRINT("error", ("flush of unlock buffer failed"));
unknown's avatar
unknown committed
5020
      goto err;
5021 5022 5023 5024 5025 5026
    }
    rc= translog_buffer_lock(cursor.buffer);
    if (!rc)
      translog_buffer_decrease_writers(cursor.buffer);
    rc|= translog_buffer_unlock(cursor.buffer);
    if (rc)
unknown's avatar
unknown committed
5027
      goto err;
5028 5029 5030 5031 5032 5033 5034 5035 5036 5037

    translog_lock();

    first_page= translog_get_current_page_rest();
    buffer_rest= translog_get_current_group_size();
  } while (first_page + buffer_rest < (uint) (parts->record_length - done));

  group.addr= horizon= log_descriptor.horizon;
  cursor= log_descriptor.bc;
  cursor.chaser= 1;
5038
  group.num= 0;                       /* 0 because it does not matter */
unknown's avatar
unknown committed
5039
  if (insert_dynamic(&groups, (uchar*) &group))
5040
  {
5041
    DBUG_PRINT("error", ("insert into array failed"));
unknown's avatar
unknown committed
5042
    goto err_unlock;
5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091
  }
  record_rest= parts->record_length - done;
  DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
  if (first_page <= record_rest + 1)
  {
    chunk2_page= 1;
    record_rest-= (first_page - 1);
    full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
    record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
    last_page_capacity= page_capacity;
  }
  else
  {
    chunk2_page= full_pages= 0;
    last_page_capacity= first_page;
  }
  chunk3_size= 0;
  chunk3_pages= 0;
  if (last_page_capacity > record_rest + 1 && record_rest != 0)
  {
    if (last_page_capacity >
        record_rest + header_fixed_part + groups.elements * (7 + 1))
    {
      /* 1 record of type 0 */
      chunk3_pages= 0;
    }
    else
    {
      chunk3_pages= 1;
      if (record_rest + 2 == last_page_capacity)
      {
        chunk3_size= record_rest - 1;
        record_rest= 1;
      }
      else
      {
        chunk3_size= record_rest;
        record_rest= 0;
      }
    }
  }
  /*
     A first non-full page will hold type 0 chunk only if it fit in it with
     all its headers
  */
  while (page_capacity <
         record_rest + header_fixed_part +
         (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
    chunk0_pages++;
unknown's avatar
unknown committed
5092 5093
  DBUG_PRINT("info", ("chunk0_pages: %u  groups %u  groups per full page: %u  "
                      "Group on last page: %u",
5094 5095 5096 5097 5098
                      chunk0_pages, groups.elements,
                      groups_per_page,
                      (groups.elements -
                       ((page_capacity - header_fixed_part) / (7 + 1)) *
                       (chunk0_pages - 1))));
unknown's avatar
unknown committed
5099 5100
  DBUG_PRINT("info", ("first_page: %u  chunk2: %u  full_pages: %u (%lu)  "
                      "chunk3: %u (%u)  rest: %u",
5101 5102 5103 5104 5105
                      first_page,
                      chunk2_page, full_pages,
                      (ulong) full_pages *
                      log_descriptor.page_capacity_chunk_2,
                      chunk3_pages, (uint) chunk3_size, (uint) record_rest));
unknown's avatar
unknown committed
5106 5107 5108 5109 5110 5111 5112 5113 5114 5115
  rc= translog_advance_pointer(full_pages + chunk3_pages +
                               (chunk0_pages - 1),
                               record_rest + header_fixed_part +
                               (groups.elements -
                                ((page_capacity -
                                  header_fixed_part) / (7 + 1)) *
                                (chunk0_pages - 1)) * (7 + 1));
  rc|= translog_unlock();
  if (rc)
    goto err;
5116 5117 5118 5119 5120 5121

  if (chunk2_page)
  {
    DBUG_PRINT("info", ("chunk 2 to finish first page"));
    translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
    translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
unknown's avatar
unknown committed
5122
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx) "
5123
                        "Left: %lu",
unknown's avatar
unknown committed
5124 5125
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
5126 5127 5128 5129 5130 5131 5132
                        (ulong) (parts->record_length - (first_page - 1) -
                                 done)));
  }
  else if (chunk3_pages)
  {
    DBUG_PRINT("info", ("chunk 3"));
    DBUG_ASSERT(full_pages == 0);
unknown's avatar
unknown committed
5133
    uchar chunk3_header[3];
unknown's avatar
unknown committed
5134
    chunk3_pages= 0;
5135 5136 5137 5138
    chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
    int2store(chunk3_header + 1, chunk3_size);
    translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
    translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
unknown's avatar
unknown committed
5139
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx) "
5140
                        "Left: %lu",
unknown's avatar
unknown committed
5141 5142
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154
                        (ulong) (parts->record_length - chunk3_size - done)));
  }
  else
  {
    DBUG_PRINT("info", ("no new_page_before_chunk0"));
    new_page_before_chunk0= 0;
  }

  for (i= 0; i < full_pages; i++)
  {
    DBUG_ASSERT(chunk2_page != 0);
    if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
unknown's avatar
unknown committed
5155
      goto err;
5156

unknown's avatar
unknown committed
5157
    DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx) "
5158
                        "Left: %lu",
unknown's avatar
unknown committed
5159 5160
                        LSN_IN_PARTS(log_descriptor.horizon),
                        LSN_IN_PARTS(horizon),
5161 5162 5163 5164 5165 5166 5167 5168 5169
                        (ulong) (parts->record_length - (first_page - 1) -
                                 i * log_descriptor.page_capacity_chunk_2 -
                                 done)));
  }

  if (chunk3_pages &&
      translog_write_variable_record_chunk3_page(parts,
                                                 chunk3_size,
                                                 &horizon, &cursor))
unknown's avatar
unknown committed
5170 5171
    goto err;
  DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx)  local: (%lu,0x%lx)",
unknown's avatar
unknown committed
5172 5173
                      LSN_IN_PARTS(log_descriptor.horizon),
                      LSN_IN_PARTS(horizon)));
5174

unknown's avatar
unknown committed
5175
  *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN);
5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196
  int2store(chunk0_header + 1, short_trid);
  translog_write_variable_record_1group_code_len(chunk0_header + 3,
                                                 parts->record_length,
                                                 header_length);
  do
  {
    int limit;
    if (new_page_before_chunk0)
    {
      rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
      if (buffer_to_flush != NULL)
      {
        rc|= translog_buffer_lock(buffer_to_flush);
        translog_buffer_decrease_writers(buffer_to_flush);
        if (!rc)
          rc= translog_buffer_flush(buffer_to_flush);
        rc|= translog_buffer_unlock(buffer_to_flush);
        buffer_to_flush= NULL;
      }
      if (rc)
      {
5197
        DBUG_PRINT("error", ("flush of unlock buffer failed"));
unknown's avatar
unknown committed
5198
        goto err;
5199 5200 5201 5202 5203 5204
      }
    }
    new_page_before_chunk0= 1;

    if (first_chunk0)
    {
unknown's avatar
unknown committed
5205
      first_chunk0= 0;
5206 5207
      *lsn= horizon;
      if (log_record_type_descriptor[type].inwrite_hook &&
unknown's avatar
unknown committed
5208 5209
          (*log_record_type_descriptor[type].inwrite_hook) (type, trn,
                                                            tbl_info,
unknown's avatar
unknown committed
5210
                                                            lsn, hook_arg))
unknown's avatar
unknown committed
5211
        goto err;
5212 5213 5214 5215 5216 5217 5218 5219 5220
    }

    /*
       A first non-full page will hold type 0 chunk only if it fit in it with
       all its headers => the fist page is full or number of groups less then
       possible number of full page.
    */
    limit= (groups_per_page < groups.elements - curr_group ?
            groups_per_page : groups.elements - curr_group);
unknown's avatar
unknown committed
5221
    DBUG_PRINT("info", ("Groups: %u  curr: %u  limit: %u",
5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243
                        (uint) groups.elements, (uint) curr_group,
                        (uint) limit));

    if (chunk0_pages == 1)
    {
      DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
                          (uint) limit, (uint) record_rest,
                          (uint) (2 + limit * (7 + 1) + record_rest)));
      int2store(chunk0_header + header_length - 2,
                2 + limit * (7 + 1) + record_rest);
    }
    else
    {
      DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
                          (uint) limit, (uint) (2 + limit * (7 + 1))));
      int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
    }
    int2store(chunk0_header + header_length, groups.elements - curr_group);
    translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
                                chunk0_header);
    for (i= curr_group; i < limit + curr_group; i++)
    {
unknown's avatar
unknown committed
5244 5245 5246 5247 5248
      struct st_translog_group_descriptor *grp_ptr;
      grp_ptr= dynamic_element(&groups, i,
                               struct st_translog_group_descriptor *);
      lsn_store(group_desc, grp_ptr->addr);
      group_desc[7]= grp_ptr->num;
5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264
      translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
    }

    if (chunk0_pages == 1 && record_rest != 0)
      translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);

    chunk0_pages--;
    curr_group+= limit;

  } while (chunk0_pages != 0);
  rc= translog_buffer_lock(cursor.buffer);
  if (cmp_translog_addr(cursor.buffer->last_lsn, *lsn) < 0)
    cursor.buffer->last_lsn= *lsn;
  translog_buffer_decrease_writers(cursor.buffer);
  rc|= translog_buffer_unlock(cursor.buffer);

5265 5266 5267 5268
  if (translog_set_lsn_for_files(file_of_the_first_group, LSN_FILE_NO(*lsn),
                                 *lsn, FALSE))
    goto err;

unknown's avatar
unknown committed
5269
  translog_mark_file_finished(file_of_the_first_group);
5270

5271 5272
  delete_dynamic(&groups);
  DBUG_RETURN(rc);
unknown's avatar
unknown committed
5273 5274

err_unlock:
unknown's avatar
unknown committed
5275

unknown's avatar
unknown committed
5276
  translog_unlock();
unknown's avatar
unknown committed
5277

unknown's avatar
unknown committed
5278
err:
unknown's avatar
unknown committed
5279 5280 5281

  translog_mark_file_finished(file_of_the_first_group);

unknown's avatar
unknown committed
5282 5283
  delete_dynamic(&groups);
  DBUG_RETURN(1);
5284 5285 5286
}


unknown's avatar
unknown committed
5287 5288
/**
   @brief Write the variable length log record.
5289

unknown's avatar
unknown committed
5290 5291 5292 5293 5294 5295 5296 5297
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
5298

unknown's avatar
unknown committed
5299 5300 5301
   @return Operation status
     @retval 0      OK
     @retval 1      Error
5302 5303 5304 5305
*/

static my_bool translog_write_variable_record(LSN *lsn,
                                              enum translog_record_type type,
unknown's avatar
unknown committed
5306
                                              MARIA_HA *tbl_info,
5307 5308
                                              SHORT_TRANSACTION_ID short_trid,
                                              struct st_translog_parts *parts,
unknown's avatar
unknown committed
5309
                                              TRN *trn, void *hook_arg)
5310 5311 5312 5313 5314 5315
{
  struct st_translog_buffer *buffer_to_flush= NULL;
  uint header_length1= 1 + 2 + 2 +
    translog_variable_record_length_bytes(parts->record_length);
  ulong buffer_rest;
  uint page_rest;
unknown's avatar
unknown committed
5316
  /* Max number of such LSNs per record is 2 */
unknown's avatar
unknown committed
5317
  uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
5318
    COMPRESSED_LSN_MAX_STORE_SIZE];
unknown's avatar
unknown committed
5319
  my_bool res;
5320 5321 5322
  DBUG_ENTER("translog_write_variable_record");

  translog_lock();
unknown's avatar
unknown committed
5323
  DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
unknown's avatar
unknown committed
5324
                      LSN_IN_PARTS(log_descriptor.horizon)));
unknown's avatar
unknown committed
5325 5326
  page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
  DBUG_PRINT("info", ("header length: %u  page_rest: %u",
5327 5328 5329
                      header_length1, page_rest));

  /*
5330 5331
    header and part which we should read have to fit in one chunk
    TODO: allow to divide readable header
5332 5333 5334 5335 5336
  */
  if (page_rest <
      (header_length1 + log_record_type_descriptor[type].read_header_len))
  {
    DBUG_PRINT("info",
unknown's avatar
unknown committed
5337 5338
               ("Next page, size: %u  header: %u + %u",
                log_descriptor.bc.current_page_fill,
5339 5340 5341 5342
                header_length1,
                log_record_type_descriptor[type].read_header_len));
    translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
                       &buffer_to_flush);
unknown's avatar
unknown committed
5343
    /* Chunk 2 header is 1 byte, so full page capacity will be one uchar more */
5344 5345 5346 5347 5348 5349 5350 5351
    page_rest= log_descriptor.page_capacity_chunk_2 + 1;
    DBUG_PRINT("info", ("page_rest: %u", page_rest));
  }

  /*
     To minimize compressed size we will compress always relative to
     very first chunk address (log_descriptor.horizon for now)
  */
unknown's avatar
unknown committed
5352
  if (log_record_type_descriptor[type].compressed_LSN > 0)
5353
  {
unknown's avatar
unknown committed
5354 5355 5356
    translog_relative_LSN_encode(parts, log_descriptor.horizon,
                                 log_record_type_descriptor[type].
                                 compressed_LSN, compressed_LSNs);
5357 5358 5359
    /* recalculate header length after compression */
    header_length1= 1 + 2 + 2 +
      translog_variable_record_length_bytes(parts->record_length);
unknown's avatar
unknown committed
5360 5361
    DBUG_PRINT("info", ("after compressing LSN(s) header length: %u  "
                        "record length: %lu",
5362
                        header_length1, (ulong)parts->record_length));
5363 5364 5365 5366 5367 5368
  }

  /* TODO: check space on current page for header + few bytes */
  if (page_rest >= parts->record_length + header_length1)
  {
    /* following function makes translog_unlock(); */
unknown's avatar
unknown committed
5369
    res= translog_write_variable_record_1chunk(lsn, type, tbl_info,
unknown's avatar
unknown committed
5370 5371
                                               short_trid,
                                               parts, buffer_to_flush,
unknown's avatar
unknown committed
5372
                                               header_length1, trn, hook_arg);
unknown's avatar
unknown committed
5373
    DBUG_RETURN(res);
5374 5375 5376 5377 5378 5379 5380
  }

  buffer_rest= translog_get_current_group_size();

  if (buffer_rest >= parts->record_length + header_length1 - page_rest)
  {
    /* following function makes translog_unlock(); */
unknown's avatar
unknown committed
5381
    res= translog_write_variable_record_1group(lsn, type, tbl_info,
unknown's avatar
unknown committed
5382 5383
                                               short_trid,
                                               parts, buffer_to_flush,
unknown's avatar
unknown committed
5384
                                               header_length1, trn, hook_arg);
unknown's avatar
unknown committed
5385
    DBUG_RETURN(res);
5386 5387
  }
  /* following function makes translog_unlock(); */
unknown's avatar
unknown committed
5388
  res= translog_write_variable_record_mgroup(lsn, type, tbl_info,
unknown's avatar
unknown committed
5389 5390 5391
                                             short_trid,
                                             parts, buffer_to_flush,
                                             header_length1,
unknown's avatar
unknown committed
5392
                                             buffer_rest, trn, hook_arg);
unknown's avatar
unknown committed
5393
  DBUG_RETURN(res);
5394 5395 5396
}


unknown's avatar
unknown committed
5397 5398
/**
   @brief Write the fixed and pseudo-fixed log record.
5399

unknown's avatar
unknown committed
5400 5401 5402 5403 5404 5405 5406 5407
   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  short_trid      Short transaction ID or 0 if it has no sense
   @param  parts           Descriptor of record source parts
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
   @param  hook_arg        Argument which will be passed to pre-write and
                           in-write hooks of this record.
5408

unknown's avatar
unknown committed
5409 5410 5411
   @return Operation status
     @retval 0      OK
     @retval 1      Error
5412 5413 5414 5415
*/

static my_bool translog_write_fixed_record(LSN *lsn,
                                           enum translog_record_type type,
unknown's avatar
unknown committed
5416
                                           MARIA_HA *tbl_info,
5417 5418
                                           SHORT_TRANSACTION_ID short_trid,
                                           struct st_translog_parts *parts,
unknown's avatar
unknown committed
5419
                                           TRN *trn, void *hook_arg)
5420 5421
{
  struct st_translog_buffer *buffer_to_flush= NULL;
unknown's avatar
unknown committed
5422
  uchar chunk1_header[1 + 2];
unknown's avatar
unknown committed
5423
  /* Max number of such LSNs per record is 2 */
unknown's avatar
unknown committed
5424
  uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
5425
    COMPRESSED_LSN_MAX_STORE_SIZE];
5426
  LEX_STRING *part;
5427 5428
  int rc;
  DBUG_ENTER("translog_write_fixed_record");
unknown's avatar
unknown committed
5429
  DBUG_ASSERT((log_record_type_descriptor[type].rclass ==
5430 5431 5432
               LOGRECTYPE_FIXEDLENGTH &&
               parts->record_length ==
               log_record_type_descriptor[type].fixed_length) ||
unknown's avatar
unknown committed
5433
              (log_record_type_descriptor[type].rclass ==
5434
               LOGRECTYPE_PSEUDOFIXEDLENGTH &&
5435
               parts->record_length ==
5436 5437 5438
               log_record_type_descriptor[type].fixed_length));

  translog_lock();
unknown's avatar
unknown committed
5439
  DBUG_PRINT("info", ("horizon: (%lu,0x%lx)",
unknown's avatar
unknown committed
5440
                      LSN_IN_PARTS(log_descriptor.horizon)));
5441

unknown's avatar
unknown committed
5442
  DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE);
5443
  DBUG_PRINT("info",
unknown's avatar
unknown committed
5444 5445
             ("Page size: %u  record: %u  next cond: %d",
              log_descriptor.bc.current_page_fill,
5446
              (parts->record_length +
unknown's avatar
unknown committed
5447 5448
               log_record_type_descriptor[type].compressed_LSN * 2 + 3),
              ((((uint) log_descriptor.bc.current_page_fill) +
5449
                (parts->record_length +
unknown's avatar
unknown committed
5450
                 log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
5451 5452
               TRANSLOG_PAGE_SIZE)));
  /*
5453 5454
     check that there is enough place on current page.
     NOTE: compressing may increase page LSN size on two bytes for every LSN
5455
  */
unknown's avatar
unknown committed
5456
  if ((((uint) log_descriptor.bc.current_page_fill) +
5457
       (parts->record_length +
unknown's avatar
unknown committed
5458
        log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
5459 5460 5461 5462 5463 5464 5465 5466
      TRANSLOG_PAGE_SIZE)
  {
    DBUG_PRINT("info", ("Next page"));
    translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
                       &buffer_to_flush);
  }

  *lsn= log_descriptor.horizon;
5467 5468 5469 5470
  if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
                             *lsn, TRUE) ||
      (log_record_type_descriptor[type].inwrite_hook &&
       (*log_record_type_descriptor[type].inwrite_hook) (type, trn, tbl_info,
unknown's avatar
unknown committed
5471
                                                         lsn, hook_arg)))
5472
  {
unknown's avatar
unknown committed
5473 5474
    rc= 1;
    goto err;
5475 5476 5477
  }

  /* compress LSNs */
unknown's avatar
unknown committed
5478 5479
  if (log_record_type_descriptor[type].rclass ==
      LOGRECTYPE_PSEUDOFIXEDLENGTH)
5480
  {
unknown's avatar
unknown committed
5481
    DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0);
unknown's avatar
unknown committed
5482 5483 5484
    translog_relative_LSN_encode(parts, *lsn,
                                 log_record_type_descriptor[type].
                                 compressed_LSN, compressed_LSNs);
5485 5486 5487
  }

  /*
unknown's avatar
unknown committed
5488 5489
    Write the whole record at once (we know that there is enough place on
    the destination page)
5490
  */
unknown's avatar
unknown committed
5491
  DBUG_ASSERT(parts->current != 0);       /* first part is left for header */
5492 5493 5494
  part= parts->parts + (--parts->current);
  parts->total_record_length+= (part->length= 1 + 2);
  part->str= (char*)chunk1_header;
unknown's avatar
unknown committed
5495
  *chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
5496 5497 5498 5499 5500 5501 5502
  int2store(chunk1_header + 1, short_trid);

  rc= translog_write_parts_on_page(&log_descriptor.horizon,
                                   &log_descriptor.bc,
                                   parts->total_record_length, parts);

  log_descriptor.bc.buffer->last_lsn= *lsn;
unknown's avatar
unknown committed
5503

5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521
err:
  rc|= translog_unlock();

  /*
     check if we switched buffer and need process it (current buffer is
     unlocked already => we will not delay other threads
  */
  if (buffer_to_flush != NULL)
  {
    if (!rc)
      rc= translog_buffer_flush(buffer_to_flush);
    rc|= translog_buffer_unlock(buffer_to_flush);
  }

  DBUG_RETURN(rc);
}


5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532
/**
   @brief Writes the log record

   If share has no 2-byte-id yet, gives an id to the share and logs
   LOGREC_FILE_ID. If transaction has not logged LOGREC_LONG_TRANSACTION_ID
   yet, logs it.

   @param  lsn             LSN of the record will be written here
   @param  type            the log record type
   @param  trn             Transaction structure pointer for hooks by
                           record log type, for short_id
unknown's avatar
unknown committed
5533
   @param  tbl_info        MARIA_HA of table or NULL
5534 5535 5536 5537 5538 5539
   @param  rec_len         record length or 0 (count it)
   @param  part_no         number of parts or 0 (count it)
   @param  parts_data      zero ended (in case of number of parts is 0)
                           array of LEX_STRINGs (parts), first
                           TRANSLOG_INTERNAL_PARTS positions in the log
                           should be unused (need for loghandler)
unknown's avatar
unknown committed
5540 5541 5542
   @param  store_share_id  if tbl_info!=NULL then share's id will
                           automatically be stored in the two first bytes
                           pointed (so pointer is assumed to be !=NULL)
unknown's avatar
unknown committed
5543 5544 5545
   @param  hook_arg        argument which will be passed to pre-write and
                           in-write hooks of this record.

5546 5547 5548
   @return Operation status
     @retval 0      OK
     @retval 1      Error
5549 5550 5551 5552
*/

my_bool translog_write_record(LSN *lsn,
                              enum translog_record_type type,
unknown's avatar
unknown committed
5553
                              TRN *trn, MARIA_HA *tbl_info,
5554 5555
                              translog_size_t rec_len,
                              uint part_no,
5556
                              LEX_STRING *parts_data,
unknown's avatar
unknown committed
5557 5558
                              uchar *store_share_id,
                              void *hook_arg)
5559 5560
{
  struct st_translog_parts parts;
5561
  LEX_STRING *part;
5562
  int rc;
5563
  uint short_trid= trn->short_id;
5564
  DBUG_ENTER("translog_write_record");
unknown's avatar
unknown committed
5565 5566
  DBUG_PRINT("enter", ("type: %u  ShortTrID: %u  rec_len: %lu",
                       (uint) type, (uint) short_trid, (ulong) rec_len));
5567 5568 5569 5570 5571 5572 5573 5574
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
  if (unlikely(translog_status != TRANSLOG_OK))
  {
    DBUG_PRINT("error", ("Transaction log is write protected"));
    DBUG_RETURN(1);
  }

5575

unknown's avatar
unknown committed
5576
  if (tbl_info)
5577
  {
unknown's avatar
unknown committed
5578
    MARIA_SHARE *share= tbl_info->s;
unknown's avatar
unknown committed
5579
    DBUG_ASSERT(share->now_transactional);
5580 5581 5582 5583 5584 5585 5586 5587 5588
    if (unlikely(share->id == 0))
    {
      /*
        First log write for this MARIA_SHARE; give it a short id.
        When the lock manager is enabled and needs a short id, it should be
        assigned in the lock manager (because row locks will be taken before
        log records are written; for example SELECT FOR UPDATE takes locks but
        writes no log record.
      */
unknown's avatar
unknown committed
5589
      if (unlikely(translog_assign_id_to_share(tbl_info, trn)))
5590 5591 5592 5593 5594 5595
        DBUG_RETURN(1);
    }
    fileid_store(store_share_id, share->id);
  }
  if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID)))
  {
unknown's avatar
unknown committed
5596
    LSN dummy_lsn;
5597 5598 5599 5600 5601 5602
    LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
    uchar log_data[6];
    int6store(log_data, trn->trid);
    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    (char*) log_data;
    log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
    trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */
unknown's avatar
unknown committed
5603
    if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID,
5604 5605
                                       trn, NULL, sizeof(log_data),
                                       sizeof(log_array)/sizeof(log_array[0]),
unknown's avatar
unknown committed
5606
                                       log_array, NULL, NULL)))
5607
      DBUG_RETURN(1);
5608
  }
unknown's avatar
unknown committed
5609

5610
  parts.parts= parts_data;
5611

5612 5613
  /* count parts if they are not counted by upper level */
  if (part_no == 0)
unknown's avatar
unknown committed
5614
  {
5615 5616 5617
    for (part_no= TRANSLOG_INTERNAL_PARTS;
         parts_data[part_no].length != 0;
         part_no++);
unknown's avatar
unknown committed
5618
  }
5619 5620
  parts.elements= part_no;
  parts.current= TRANSLOG_INTERNAL_PARTS;
5621

5622
  /* clear TRANSLOG_INTERNAL_PARTS */
unknown's avatar
unknown committed
5623
  compile_time_assert(TRANSLOG_INTERNAL_PARTS != 0);
5624 5625 5626 5627 5628
  parts_data[0].str= 0;
  parts_data[0].length= 0;

  /* count length of the record */
  if (rec_len == 0)
unknown's avatar
unknown committed
5629
  {
5630 5631 5632
    for(part= parts_data + TRANSLOG_INTERNAL_PARTS;\
        part < parts_data + part_no;
        part++)
5633
    {
5634
      rec_len+= part->length;
5635 5636
    }
  }
5637
  parts.record_length= rec_len;
unknown's avatar
unknown committed
5638

5639 5640 5641 5642
#ifndef DBUG_OFF
  {
    uint i;
    uint len= 0;
5643
#ifdef HAVE_purify
unknown's avatar
unknown committed
5644 5645
    ha_checksum checksum= 0;
#endif
5646
    for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++)
unknown's avatar
unknown committed
5647
    {
5648
#ifdef HAVE_purify
unknown's avatar
unknown committed
5649 5650 5651 5652
      /* Find unitialized bytes early */
      checksum+= my_checksum(checksum, parts_data[i].str,
                             parts_data[i].length);
#endif
5653
      len+= parts_data[i].length;
unknown's avatar
unknown committed
5654
    }
5655 5656 5657
    DBUG_ASSERT(len == rec_len);
  }
#endif
unknown's avatar
unknown committed
5658 5659 5660 5661 5662
  /*
    Start total_record_length from record_length then overhead will
    be add
  */
  parts.total_record_length= parts.record_length;
unknown's avatar
unknown committed
5663
  DBUG_PRINT("info", ("record length: %lu", (ulong) parts.record_length));
5664 5665 5666

  /* process this parts */
  if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
5667
             (*log_record_type_descriptor[type].prewrite_hook) (type, trn,
unknown's avatar
unknown committed
5668
                                                                tbl_info,
unknown's avatar
unknown committed
5669
                                                                hook_arg))))
5670
  {
unknown's avatar
unknown committed
5671
    switch (log_record_type_descriptor[type].rclass) {
5672
    case LOGRECTYPE_VARIABLE_LENGTH:
unknown's avatar
unknown committed
5673
      rc= translog_write_variable_record(lsn, type, tbl_info,
unknown's avatar
unknown committed
5674
                                         short_trid, &parts, trn, hook_arg);
5675 5676 5677
      break;
    case LOGRECTYPE_PSEUDOFIXEDLENGTH:
    case LOGRECTYPE_FIXEDLENGTH:
unknown's avatar
unknown committed
5678
      rc= translog_write_fixed_record(lsn, type, tbl_info,
unknown's avatar
unknown committed
5679
                                      short_trid, &parts, trn, hook_arg);
5680 5681 5682 5683 5684 5685 5686 5687
      break;
    case LOGRECTYPE_NOT_ALLOWED:
    default:
      DBUG_ASSERT(0);
      rc= 1;
    }
  }

unknown's avatar
unknown committed
5688
  DBUG_PRINT("info", ("LSN: (%lu,0x%lx)", LSN_IN_PARTS(*lsn)));
5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706
  DBUG_RETURN(rc);
}


/*
  Decode compressed (relative) LSN(s)

  SYNOPSIS
   translog_relative_lsn_decode()
   base_lsn              LSN for encoding
   src                   Decode LSN(s) from here
   dst                   Put decoded LSNs here
   lsns                  number of LSN(s)

   RETURN
     position in sources after decoded LSN(s)
*/

unknown's avatar
unknown committed
5707 5708
static uchar *translog_relative_LSN_decode(LSN base_lsn,
                                          uchar *src, uchar *dst, uint lsns)
5709 5710
{
  uint i;
unknown's avatar
unknown committed
5711
  for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE)
5712 5713 5714 5715 5716 5717
  {
    src= translog_get_LSN_from_diff(base_lsn, src, dst);
  }
  return src;
}

5718 5719 5720
/**
   @brief Get header of fixed/pseudo length record and call hook for
   it processing
5721

5722 5723 5724 5725
   @param page            Pointer to the buffer with page where LSN chunk is
                          placed
   @param page_offset     Offset of the first chunk in the page
   @param buff            Buffer to be filled with header data
5726

5727 5728 5729
   @return Length of header or operation status
     @retval #  number of bytes in TRANSLOG_HEADER_BUFFER::header where
                stored decoded part of the header
5730 5731
*/

5732 5733 5734
static int translog_fixed_length_header(uchar *page,
                                        translog_size_t page_offset,
                                        TRANSLOG_HEADER_BUFFER *buff)
5735 5736 5737
{
  struct st_log_record_type_descriptor *desc=
    log_record_type_descriptor + buff->type;
unknown's avatar
unknown committed
5738 5739 5740
  uchar *src= page + page_offset + 3;
  uchar *dst= buff->header;
  uchar *start= src;
unknown's avatar
unknown committed
5741
  uint lsns= desc->compressed_LSN;
5742
  uint length= desc->fixed_length;
5743 5744 5745 5746 5747

  DBUG_ENTER("translog_fixed_length_header");

  buff->record_length= length;

unknown's avatar
unknown committed
5748
  if (desc->rclass == LOGRECTYPE_PSEUDOFIXEDLENGTH)
5749 5750
  {
    DBUG_ASSERT(lsns > 0);
5751
    src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns);
unknown's avatar
unknown committed
5752
    lsns*= LSN_STORE_SIZE;
5753 5754
    dst+= lsns;
    length-= lsns;
5755
    buff->compressed_LSN_economy= (lsns - (src - start));
5756 5757 5758 5759
  }
  else
    buff->compressed_LSN_economy= 0;

unknown's avatar
unknown committed
5760
  memcpy(dst, src, length);
5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779
  buff->non_header_data_start_offset= page_offset +
    ((src + length) - (page + page_offset));
  buff->non_header_data_len= 0;
  DBUG_RETURN(buff->record_length);
}


/*
  Free resources used by TRANSLOG_HEADER_BUFFER

  SYNOPSIS
    translog_free_record_header();
*/

void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
{
  DBUG_ENTER("translog_free_record_header");
  if (buff->groups_no != 0)
  {
unknown's avatar
unknown committed
5780
    my_free((uchar*) buff->groups, MYF(0));
5781 5782 5783 5784 5785 5786
    buff->groups_no= 0;
  }
  DBUG_VOID_RETURN;
}


5787 5788
/**
   @brief Returns the current horizon at the end of the current log
5789

5790
   @return Horizon
5791 5792
   @retval LSN_ERROR     error
   @retvar #             Horizon
5793 5794
*/

5795
TRANSLOG_ADDRESS translog_get_horizon()
5796
{
5797
  TRANSLOG_ADDRESS res;
5798 5799
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
5800
  translog_lock();
5801
  res= log_descriptor.horizon;
5802
  translog_unlock();
5803
  return res;
5804 5805 5806
}


unknown's avatar
unknown committed
5807 5808 5809 5810 5811
/**
   @brief Returns the current horizon at the end of the current log, caller is
   assumed to already hold the lock

   @return Horizon
5812 5813
   @retval LSN_ERROR     error
   @retvar #             Horizon
unknown's avatar
unknown committed
5814 5815 5816 5817
*/

TRANSLOG_ADDRESS translog_get_horizon_no_lock()
{
5818 5819
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
unknown's avatar
unknown committed
5820 5821 5822 5823 5824
  translog_lock_assert_owner();
  return log_descriptor.horizon;
}


5825 5826 5827 5828 5829 5830 5831 5832
/*
  Set last page in the scanner data structure

  SYNOPSIS
    translog_scanner_set_last_page()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
5833 5834
    0  OK
    1  Error
5835 5836
*/

unknown's avatar
unknown committed
5837
static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA *scanner)
5838 5839
{
  my_bool page_ok;
5840 5841 5842 5843 5844 5845 5846 5847
  if (LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->horizon))
  {
    /* It is last file => we can easy find last page address by horizon */
    uint pagegrest= LSN_OFFSET(scanner->horizon) % TRANSLOG_PAGE_SIZE;
    scanner->last_file_page= (scanner->horizon -
                              (pagegrest ? pagegrest : TRANSLOG_PAGE_SIZE));
    return (0);
  }
5848
  scanner->last_file_page= scanner->page_addr;
5849
  return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok, 0));
5850 5851 5852
}


5853 5854
/**
  @brief Get page from page cache according to requested method
5855

5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883
  @param scanner         The scanner data

  @return operation status
  @retval 0 OK
  @retval 1 Error
*/

static my_bool
translog_scanner_get_page(TRANSLOG_SCANNER_DATA *scanner)
{
  TRANSLOG_VALIDATOR_DATA data;
  DBUG_ENTER("translog_scanner_get_page");
  data.addr= &scanner->page_addr;
  data.was_recovered= 0;
  DBUG_RETURN((scanner->page=
               translog_get_page(&data, scanner->buffer,
                                 (scanner->use_direct_link ?
                                  &scanner->direct_link :
                                  NULL))) ==
               NULL);
}


/**
  @brief Initialize reader scanner.

  @param lsn             LSN with which it have to be inited
  @param fixed_horizon   true if it is OK do not read records which was written
5884
                         after scanning beginning
5885 5886 5887
  @param scanner         scanner which have to be inited
  @param use_direct      prefer using direct lings from page handler
                         where it is possible.
5888

5889 5890 5891 5892 5893 5894
  @note If direct link was used translog_destroy_scanner should be
        called after it using

  @return status of the operation
  @retval 0 OK
  @retval 1 Error
5895 5896
*/

unknown's avatar
unknown committed
5897
my_bool translog_scanner_init(LSN lsn,
unknown's avatar
unknown committed
5898
                              my_bool fixed_horizon,
5899 5900
                              TRANSLOG_SCANNER_DATA *scanner,
                              my_bool use_direct)
unknown's avatar
unknown committed
5901 5902
{
  TRANSLOG_VALIDATOR_DATA data;
unknown's avatar
unknown committed
5903
  DBUG_ENTER("translog_scanner_init");
5904 5905
  DBUG_PRINT("enter", ("Scanner: 0x%lx  LSN: (0x%lu,0x%lx)",
                       (ulong) scanner, LSN_IN_PARTS(lsn)));
5906 5907
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
unknown's avatar
unknown committed
5908 5909 5910 5911

  data.addr= &scanner->page_addr;
  data.was_recovered= 0;

5912
  scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
5913 5914

  scanner->fixed_horizon= fixed_horizon;
5915 5916
  scanner->use_direct_link= use_direct;
  scanner->direct_link= NULL;
5917

5918
  scanner->horizon= translog_get_horizon();
unknown's avatar
unknown committed
5919
  DBUG_PRINT("info", ("horizon: (0x%lu,0x%lx)",
unknown's avatar
unknown committed
5920
                      LSN_IN_PARTS(scanner->horizon)));
5921 5922

  /* lsn < horizon */
unknown's avatar
unknown committed
5923
  DBUG_ASSERT(lsn <= scanner->horizon);
5924

5925 5926
  scanner->page_addr= lsn;
  scanner->page_addr-= scanner->page_offset; /*decrease offset */
5927 5928 5929 5930

  if (translog_scanner_set_last_page(scanner))
    DBUG_RETURN(1);

5931
  if (translog_scanner_get_page(scanner))
5932 5933 5934 5935 5936
    DBUG_RETURN(1);
  DBUG_RETURN(0);
}


5937 5938 5939 5940 5941 5942 5943 5944
/**
  @brief Destroy scanner object;

  @param scanner         The scanner object to destroy
*/

void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner)
{
5945 5946
  DBUG_ENTER("translog_destroy_scanner");
  DBUG_PRINT("enter", ("Scanner: 0x%lx", (ulong)scanner));
5947
  translog_free_link(scanner->direct_link);
5948
  DBUG_VOID_RETURN;
5949 5950 5951
}


5952 5953 5954 5955 5956 5957 5958 5959
/*
  Checks End of the Log

  SYNOPSIS
    translog_scanner_eol()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
5960 5961
    1  End of the Log
    0  OK
5962
*/
5963

unknown's avatar
unknown committed
5964
static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner)
5965 5966 5967
{
  DBUG_ENTER("translog_scanner_eol");
  DBUG_PRINT("enter",
unknown's avatar
unknown committed
5968
             ("Horizon: (%lu, 0x%lx)  Current: (%lu, 0x%lx+0x%x=0x%lx)",
unknown's avatar
unknown committed
5969 5970
              LSN_IN_PARTS(scanner->horizon),
              LSN_IN_PARTS(scanner->page_addr),
5971
              (uint) scanner->page_offset,
5972 5973 5974
              (ulong) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset)));
  if (scanner->horizon > (scanner->page_addr +
                          scanner->page_offset))
5975 5976 5977 5978 5979 5980 5981 5982 5983
  {
    DBUG_PRINT("info", ("Horizon is not reached"));
    DBUG_RETURN(0);
  }
  if (scanner->fixed_horizon)
  {
    DBUG_PRINT("info", ("Horizon is fixed and reached"));
    DBUG_RETURN(1);
  }
5984
  scanner->horizon= translog_get_horizon();
5985 5986
  DBUG_PRINT("info",
             ("Horizon is re-read, EOL: %d",
5987 5988 5989 5990
              scanner->horizon <= (scanner->page_addr +
                                   scanner->page_offset)));
  DBUG_RETURN(scanner->horizon <= (scanner->page_addr +
                                   scanner->page_offset));
5991 5992 5993
}


unknown's avatar
unknown committed
5994 5995
/**
  @brief Cheks End of the Page
5996

unknown's avatar
unknown committed
5997
  @param scanner         Information about current chunk during scanning
5998

unknown's avatar
unknown committed
5999 6000
  @retval 1  End of the Page
  @retval 0  OK
6001
*/
6002

unknown's avatar
unknown committed
6003
static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner)
6004 6005 6006
{
  DBUG_ENTER("translog_scanner_eop");
  DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
unknown's avatar
unknown committed
6007
              scanner->page[scanner->page_offset] == TRANSLOG_FILLER);
6008 6009 6010
}


unknown's avatar
unknown committed
6011 6012 6013
/**
  @brief Checks End of the File (i.e. we are scanning last page, which do not
    mean end of this page)
6014

unknown's avatar
unknown committed
6015
  @param scanner         Information about current chunk during scanning
6016

unknown's avatar
unknown committed
6017 6018
  @retval 1 End of the File
  @retval 0 OK
6019
*/
6020

unknown's avatar
unknown committed
6021
static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner)
6022 6023
{
  DBUG_ENTER("translog_scanner_eof");
6024 6025
  DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) ==
              LSN_FILE_NO(scanner->last_file_page));
unknown's avatar
unknown committed
6026 6027
  DBUG_PRINT("enter", ("curr Page: 0x%lx  last page: 0x%lx  "
                       "normal EOF: %d",
6028 6029 6030 6031
                       (ulong) LSN_OFFSET(scanner->page_addr),
                       (ulong) LSN_OFFSET(scanner->last_file_page),
                       LSN_OFFSET(scanner->page_addr) ==
                       LSN_OFFSET(scanner->last_file_page)));
6032 6033 6034 6035
  /*
     TODO: detect damaged file EOF,
     TODO: issue warning if damaged file EOF detected
  */
6036 6037
  DBUG_RETURN(scanner->page_addr ==
              scanner->last_file_page);
6038 6039 6040 6041 6042 6043 6044 6045 6046 6047
}

/*
  Move scanner to the next chunk

  SYNOPSIS
    translog_get_next_chunk()
    scanner              Information about current chunk during scanning

  RETURN
unknown's avatar
unknown committed
6048 6049
    0  OK
    1  Error
6050 6051
*/

unknown's avatar
unknown committed
6052 6053
static my_bool
translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
6054
{
unknown's avatar
unknown committed
6055
  uint16 len;
6056
  DBUG_ENTER("translog_get_next_chunk");
unknown's avatar
unknown committed
6057

6058 6059 6060 6061
  if (translog_scanner_eop(scanner))
    len= TRANSLOG_PAGE_SIZE - scanner->page_offset;
  else if ((len= translog_get_total_chunk_length(scanner->page,
                                                 scanner->page_offset)) == 0)
6062 6063 6064 6065 6066
    DBUG_RETURN(1);
  scanner->page_offset+= len;

  if (translog_scanner_eol(scanner))
  {
unknown's avatar
unknown committed
6067
    scanner->page= END_OF_LOG;
6068 6069 6070 6071 6072
    scanner->page_offset= 0;
    DBUG_RETURN(0);
  }
  if (translog_scanner_eop(scanner))
  {
6073 6074
    /* before reading next page we should unpin current one if it was pinned */
    translog_free_link(scanner->direct_link);
6075 6076
    if (translog_scanner_eof(scanner))
    {
unknown's avatar
unknown committed
6077
      DBUG_PRINT("info", ("horizon: (%lu,0x%lx)  pageaddr: (%lu,0x%lx)",
unknown's avatar
unknown committed
6078 6079
                          LSN_IN_PARTS(scanner->horizon),
                          LSN_IN_PARTS(scanner->page_addr)));
6080
      /* if it is log end it have to be caught before */
6081 6082 6083 6084 6085
      DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
                  LSN_FILE_NO(scanner->page_addr));
      scanner->page_addr+= LSN_ONE_FILE;
      scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
                                             TRANSLOG_PAGE_SIZE);
6086 6087 6088 6089 6090
      if (translog_scanner_set_last_page(scanner))
        DBUG_RETURN(1);
    }
    else
    {
6091
      scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
6092
    }
unknown's avatar
unknown committed
6093

6094
    if (translog_scanner_get_page(scanner))
unknown's avatar
unknown committed
6095 6096
      DBUG_RETURN(1);

6097 6098 6099
    scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
    if (translog_scanner_eol(scanner))
    {
unknown's avatar
unknown committed
6100
      scanner->page= END_OF_LOG;
6101 6102 6103
      scanner->page_offset= 0;
      DBUG_RETURN(0);
    }
unknown's avatar
unknown committed
6104
    DBUG_ASSERT(scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
6105 6106 6107 6108 6109
  }
  DBUG_RETURN(0);
}


6110 6111
/**
   @brief Get header of variable length record and call hook for it processing
6112

6113 6114 6115 6116 6117
   @param page            Pointer to the buffer with page where LSN chunk is
                          placed
   @param page_offset     Offset of the first chunk in the page
   @param buff            Buffer to be filled with header data
   @param scanner         If present should be moved to the header page if
6118 6119 6120
                          it differ from LSN page

   @return                Length of header or operation status
6121
     @retval RECHEADER_READ_ERROR  error
unknown's avatar
unknown committed
6122
     @retval RECHEADER_READ_EOF    End of the log reached during the read
6123 6124 6125
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where
                                   stored decoded part of the header
6126 6127
*/

unknown's avatar
unknown committed
6128 6129 6130 6131
static int
translog_variable_length_header(uchar *page, translog_size_t page_offset,
                                TRANSLOG_HEADER_BUFFER *buff,
                                TRANSLOG_SCANNER_DATA *scanner)
6132
{
unknown's avatar
unknown committed
6133 6134
  struct st_log_record_type_descriptor *desc= (log_record_type_descriptor +
                                               buff->type);
unknown's avatar
unknown committed
6135 6136
  uchar *src= page + page_offset + 1 + 2;
  uchar *dst= buff->header;
6137
  LSN base_lsn;
unknown's avatar
unknown committed
6138
  uint lsns= desc->compressed_LSN;
6139
  uint16 chunk_len;
6140
  uint16 length= desc->read_header_len;
6141 6142
  uint16 buffer_length= length;
  uint16 body_len;
unknown's avatar
unknown committed
6143
  TRANSLOG_SCANNER_DATA internal_scanner;
6144 6145 6146 6147
  DBUG_ENTER("translog_variable_length_header");

  buff->record_length= translog_variable_record_1group_decode_len(&src);
  chunk_len= uint2korr(src);
unknown's avatar
unknown committed
6148
  DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  length: %u  bufflen: %u",
6149 6150 6151 6152 6153 6154 6155 6156 6157 6158
                      (ulong) buff->record_length, (uint) chunk_len,
                      (uint) length, (uint) buffer_length));
  if (chunk_len == 0)
  {
    uint16 page_rest;
    DBUG_PRINT("info", ("1 group"));
    src+= 2;
    page_rest= TRANSLOG_PAGE_SIZE - (src - page);

    base_lsn= buff->lsn;
unknown's avatar
unknown committed
6159
    body_len= min(page_rest, buff->record_length);
6160 6161 6162 6163 6164 6165 6166 6167 6168
  }
  else
  {
    uint grp_no, curr;
    uint header_to_skip;
    uint16 page_rest;

    DBUG_PRINT("info", ("multi-group"));
    grp_no= buff->groups_no= uint2korr(src + 2);
unknown's avatar
unknown committed
6169 6170 6171
    if (!(buff->groups=
          (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * grp_no,
                                      MYF(0))))
6172
      DBUG_RETURN(RECHEADER_READ_ERROR);
6173 6174 6175 6176 6177 6178 6179 6180 6181
    DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
    src+= (2 + 2);
    page_rest= TRANSLOG_PAGE_SIZE - (src - page);
    curr= 0;
    header_to_skip= src - (page + page_offset);
    buff->chunk0_pages= 0;

    for (;;)
    {
6182
      uint i, read_length= grp_no;
6183 6184 6185

      buff->chunk0_pages++;
      if (page_rest < grp_no * (7 + 1))
6186
        read_length= page_rest / (7 + 1);
unknown's avatar
unknown committed
6187 6188
      DBUG_PRINT("info", ("Read chunk0 page#%u  read: %u  left: %u  "
                          "start from: %u",
6189 6190
                          buff->chunk0_pages, read_length, grp_no, curr));
      for (i= 0; i < read_length; i++, curr++)
6191 6192
      {
        DBUG_ASSERT(curr < buff->groups_no);
unknown's avatar
unknown committed
6193
        buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
6194
        buff->groups[curr].num= src[i * (7 + 1) + 7];
unknown's avatar
unknown committed
6195
        DBUG_PRINT("info", ("group #%u (%lu,0x%lx)  chunks: %u",
6196
                            curr,
unknown's avatar
unknown committed
6197
                            LSN_IN_PARTS(buff->groups[curr].addr),
6198 6199
                            (uint) buff->groups[curr].num));
      }
6200
      grp_no-= read_length;
6201 6202 6203 6204 6205
      if (grp_no == 0)
      {
        if (scanner)
        {
          buff->chunk0_data_addr= scanner->page_addr;
6206
          /* offset increased */
6207
          buff->chunk0_data_addr+= (page_offset + header_to_skip +
6208
                                    read_length * (7 + 1));
6209 6210 6211 6212
        }
        else
        {
          buff->chunk0_data_addr= buff->lsn;
6213
          /* offset increased */
6214
          buff->chunk0_data_addr+= (header_to_skip + read_length * (7 + 1));
6215
        }
6216
        buff->chunk0_data_len= chunk_len - 2 - read_length * (7 + 1);
unknown's avatar
unknown committed
6217
        DBUG_PRINT("info", ("Data address: (%lu,0x%lx)  len: %u",
unknown's avatar
unknown committed
6218
                            LSN_IN_PARTS(buff->chunk0_data_addr),
6219 6220 6221 6222 6223
                            buff->chunk0_data_len));
        break;
      }
      if (scanner == NULL)
      {
unknown's avatar
unknown committed
6224
        DBUG_PRINT("info", ("use internal scanner for header reading"));
6225
        scanner= &internal_scanner;
unknown's avatar
unknown committed
6226
        if (translog_scanner_init(buff->lsn, 1, scanner, 0))
6227
          DBUG_RETURN(RECHEADER_READ_ERROR);
6228
      }
6229
      if (translog_get_next_chunk(scanner))
unknown's avatar
unknown committed
6230 6231 6232
      {
        if (scanner == &internal_scanner)
          translog_destroy_scanner(scanner);
6233
        DBUG_RETURN(RECHEADER_READ_ERROR);
unknown's avatar
unknown committed
6234 6235 6236 6237 6238 6239 6240
      }
      if (scanner->page == END_OF_LOG)
      {
        if (scanner == &internal_scanner)
          translog_destroy_scanner(scanner);
        DBUG_RETURN(RECHEADER_READ_EOF);
      }
6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253
      page= scanner->page;
      page_offset= scanner->page_offset;
      src= page + page_offset + header_to_skip;
      chunk_len= uint2korr(src - 2 - 2);
      DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
      page_rest= TRANSLOG_PAGE_SIZE - (src - page);
    }

    if (scanner == NULL)
    {
      DBUG_PRINT("info", ("use internal scanner"));
      scanner= &internal_scanner;
    }
6254 6255 6256 6257
    else
    {
      translog_destroy_scanner(scanner);
    }
6258
    base_lsn= buff->groups[0].addr;
unknown's avatar
unknown committed
6259
    translog_scanner_init(base_lsn, 1, scanner, scanner == &internal_scanner);
6260 6261 6262 6263 6264 6265
    /* first group chunk is always chunk type 2 */
    page= scanner->page;
    page_offset= scanner->page_offset;
    src= page + page_offset + 1;
    page_rest= TRANSLOG_PAGE_SIZE - (src - page);
    body_len= page_rest;
6266 6267
    if (scanner == &internal_scanner)
      translog_destroy_scanner(scanner);
6268 6269 6270
  }
  if (lsns)
  {
unknown's avatar
unknown committed
6271
    uchar *start= src;
6272
    src= translog_relative_LSN_decode(base_lsn, src, dst, lsns);
unknown's avatar
unknown committed
6273
    lsns*= LSN_STORE_SIZE;
6274 6275 6276
    dst+= lsns;
    length-= lsns;
    buff->record_length+= (buff->compressed_LSN_economy=
6277 6278
                           (lsns - (src - start)));
    DBUG_PRINT("info", ("lsns: %u  length: %u  economy: %d  new length: %lu",
unknown's avatar
unknown committed
6279
                        lsns / LSN_STORE_SIZE, (uint) length,
6280
                        (int) buff->compressed_LSN_economy,
6281 6282 6283 6284 6285 6286 6287 6288
                        (ulong) buff->record_length));
    body_len-= (src - start);
  }
  else
    buff->compressed_LSN_economy= 0;

  DBUG_ASSERT(body_len >= length);
  body_len-= length;
unknown's avatar
unknown committed
6289
  memcpy(dst, src, length);
6290 6291
  buff->non_header_data_start_offset= src + length - page;
  buff->non_header_data_len= body_len;
unknown's avatar
unknown committed
6292
  DBUG_PRINT("info", ("non_header_data_start_offset: %u  len: %u  buffer: %u",
6293 6294 6295 6296 6297 6298
                      buff->non_header_data_start_offset,
                      buff->non_header_data_len, buffer_length));
  DBUG_RETURN(buffer_length);
}


6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311
/**
   @brief Read record header from the given buffer

   @param page            page content buffer
   @param page_offset     offset of the chunk in the page
   @param buff            destination buffer
   @param scanner         If this is set the scanner will be moved to the
                          record header page (differ from LSN page in case of
                          multi-group records)

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval #                     number of bytes in
unknown's avatar
unknown committed
6312
                                   TRANSLOG_HEADER_BUFFER::header where
6313
                                   stored decoded part of the header
6314 6315
*/

6316 6317 6318 6319
int translog_read_record_header_from_buffer(uchar *page,
                                            uint16 page_offset,
                                            TRANSLOG_HEADER_BUFFER *buff,
                                            TRANSLOG_SCANNER_DATA *scanner)
6320
{
unknown's avatar
unknown committed
6321
  translog_size_t res;
6322 6323 6324 6325 6326
  DBUG_ENTER("translog_read_record_header_from_buffer");
  DBUG_ASSERT((page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
              TRANSLOG_CHUNK_LSN ||
              (page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
              TRANSLOG_CHUNK_FIXED);
6327 6328
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
6329 6330
  buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
  buff->short_trid= uint2korr(page + page_offset + 1);
6331
  DBUG_PRINT("info", ("Type %u, Short TrID %u, LSN (%lu,0x%lx)",
6332
                      (uint) buff->type, (uint)buff->short_trid,
unknown's avatar
unknown committed
6333
                      LSN_IN_PARTS(buff->lsn)));
6334
  /* Read required bytes from the header and call hook */
unknown's avatar
unknown committed
6335
  switch (log_record_type_descriptor[buff->type].rclass) {
6336
  case LOGRECTYPE_VARIABLE_LENGTH:
unknown's avatar
unknown committed
6337 6338 6339
    res= translog_variable_length_header(page, page_offset, buff,
                                         scanner);
    break;
6340 6341
  case LOGRECTYPE_PSEUDOFIXEDLENGTH:
  case LOGRECTYPE_FIXEDLENGTH:
unknown's avatar
unknown committed
6342 6343
    res= translog_fixed_length_header(page, page_offset, buff);
    break;
6344
  default:
unknown's avatar
unknown committed
6345
    DBUG_ASSERT(0); /* we read some junk (got no LSN) */
6346
    res= RECHEADER_READ_ERROR;
6347
  }
unknown's avatar
unknown committed
6348
  DBUG_RETURN(res);
6349 6350 6351
}


6352 6353 6354
/**
   @brief Read record header and some fixed part of a record (the part depend
   on record type).
6355

6356 6357
   @param lsn             log record serial number (address of the record)
   @param buff            log record header buffer
6358

6359 6360 6361 6362 6363 6364 6365 6366 6367 6368
   @note Some type of record can be read completely by this call
   @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
   LSN can be translated to absolute one), some fields can be added (like
   actual header length in the record if the header has variable length)

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where
                                   stored decoded part of the header
6369 6370
*/

6371
int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff)
6372
{
unknown's avatar
unknown committed
6373
  uchar buffer[TRANSLOG_PAGE_SIZE], *page;
unknown's avatar
unknown committed
6374
  translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
unknown's avatar
unknown committed
6375
  PAGECACHE_BLOCK_LINK *direct_link;
unknown's avatar
unknown committed
6376 6377
  TRANSLOG_ADDRESS addr;
  TRANSLOG_VALIDATOR_DATA data;
6378
  DBUG_ENTER("translog_read_record_header");
unknown's avatar
unknown committed
6379
  DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", LSN_IN_PARTS(lsn)));
6380
  DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
6381 6382
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
6383

6384
  buff->lsn= lsn;
6385
  buff->groups_no= 0;
unknown's avatar
unknown committed
6386 6387 6388 6389
  data.addr= &addr;
  data.was_recovered= 0;
  addr= lsn;
  addr-= page_offset; /* offset decreasing */
6390 6391
  res= (!(page= translog_get_page(&data, buffer, &direct_link))) ?
    RECHEADER_READ_ERROR :
unknown's avatar
unknown committed
6392
    translog_read_record_header_from_buffer(page, page_offset, buff, 0);
6393
  translog_free_link(direct_link);
unknown's avatar
unknown committed
6394
  DBUG_RETURN(res);
6395 6396 6397
}


6398 6399 6400
/**
   @brief Read record header and some fixed part of a record (the part depend
   on record type).
6401

6402 6403 6404
   @param scan            scanner position to read
   @param buff            log record header buffer
   @param move_scanner    request to move scanner to the header position
6405

6406 6407 6408 6409 6410 6411 6412 6413 6414 6415
   @note Some type of record can be read completely by this call
   @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
   LSN can be translated to absolute one), some fields can be added (like
   actual header length in the record if the header has variable length)

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where stored
                                   decoded part of the header
6416 6417
*/

6418 6419 6420
int translog_read_record_header_scan(TRANSLOG_SCANNER_DATA *scanner,
                                     TRANSLOG_HEADER_BUFFER *buff,
                                     my_bool move_scanner)
6421
{
unknown's avatar
unknown committed
6422
  translog_size_t res;
6423
  DBUG_ENTER("translog_read_record_header_scan");
unknown's avatar
unknown committed
6424 6425
  DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx)  Hrz: (%lu,0x%lx)  "
                       "Lst: (%lu,0x%lx)  Offset: %u(%x)  fixed %d",
unknown's avatar
unknown committed
6426 6427 6428
                       LSN_IN_PARTS(scanner->page_addr),
                       LSN_IN_PARTS(scanner->horizon),
                       LSN_IN_PARTS(scanner->last_file_page),
6429 6430
                       (uint) scanner->page_offset,
                       (uint) scanner->page_offset, scanner->fixed_horizon));
6431 6432
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
6433 6434
  buff->groups_no= 0;
  buff->lsn= scanner->page_addr;
6435
  buff->lsn+= scanner->page_offset; /* offset increasing */
unknown's avatar
unknown committed
6436 6437 6438 6439 6440 6441
  res= translog_read_record_header_from_buffer(scanner->page,
                                               scanner->page_offset,
                                               buff,
                                               (move_scanner ?
                                                scanner : 0));
  DBUG_RETURN(res);
6442 6443 6444
}


6445 6446 6447
/**
   @brief Read record header and some fixed part of the next record (the part
   depend on record type).
6448

6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460
   @param scanner         data for scanning if lsn is NULL scanner data
                          will be used for continue scanning.
                          The scanner can be NULL.

   @param buff            log record header buffer

   @return Length of header or operation status
     @retval RECHEADER_READ_ERROR  error
     @retval RECHEADER_READ_EOF    EOF
     @retval #                     number of bytes in
                                   TRANSLOG_HEADER_BUFFER::header where
                                   stored decoded part of the header
6461
*/
6462

6463 6464
int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
                                     TRANSLOG_HEADER_BUFFER *buff)
6465 6466
{
  uint8 chunk_type;
unknown's avatar
unknown committed
6467
  translog_size_t res;
6468
  buff->groups_no= 0;        /* to be sure that we will free it right */
6469 6470 6471

  DBUG_ENTER("translog_read_next_record_header");
  DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner));
unknown's avatar
unknown committed
6472 6473
  DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx)  Hrz: (%lu,0x%lx)  "
                      "Lst: (%lu,0x%lx)  Offset: %u(%x)  fixed: %d",
unknown's avatar
unknown committed
6474 6475 6476
                      LSN_IN_PARTS(scanner->page_addr),
                      LSN_IN_PARTS(scanner->horizon),
                      LSN_IN_PARTS(scanner->last_file_page),
6477 6478
                      (uint) scanner->page_offset,
                      (uint) scanner->page_offset, scanner->fixed_horizon));
6479 6480
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
6481 6482 6483 6484

  do
  {
    if (translog_get_next_chunk(scanner))
6485
      DBUG_RETURN(RECHEADER_READ_ERROR);
unknown's avatar
unknown committed
6486 6487 6488 6489 6490 6491 6492
    if (scanner->page == END_OF_LOG)
    {
       DBUG_PRINT("info", ("End of file from the scanner"));
       /* Last record was read */
       buff->lsn= LSN_IMPOSSIBLE;
       DBUG_RETURN(RECHEADER_READ_EOF);
    }
6493
    chunk_type= scanner->page[scanner->page_offset] & TRANSLOG_CHUNK_TYPE;
unknown's avatar
unknown committed
6494 6495 6496 6497
    DBUG_PRINT("info", ("Page: (%lu,0x%lx)  offset: %lu  type: %x  byte: %x",
                        LSN_IN_PARTS(scanner->page_addr),
                        (ulong) scanner->page_offset,
                        (uint) chunk_type,
6498
                        (uint) scanner->page[scanner->page_offset]));
unknown's avatar
unknown committed
6499 6500 6501
  } while (chunk_type != TRANSLOG_CHUNK_LSN &&
           chunk_type != TRANSLOG_CHUNK_FIXED &&
           scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
6502

unknown's avatar
unknown committed
6503
  if (scanner->page[scanner->page_offset] == TRANSLOG_FILLER)
6504
  {
unknown's avatar
unknown committed
6505
    DBUG_PRINT("info", ("End of file"));
6506
    /* Last record was read */
unknown's avatar
unknown committed
6507
    buff->lsn= LSN_IMPOSSIBLE;
unknown's avatar
unknown committed
6508
    /* Return 'end of log' marker */
6509
    res= RECHEADER_READ_EOF;
6510
  }
unknown's avatar
unknown committed
6511 6512 6513
  else
    res= translog_read_record_header_scan(scanner, buff, 0);
  DBUG_RETURN(res);
6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525
}


/*
  Moves record data reader to the next chunk and fill the data reader
  information about that chunk.

  SYNOPSIS
    translog_record_read_next_chunk()
    data                 data cursor

  RETURN
unknown's avatar
unknown committed
6526 6527
    0  OK
    1  Error
6528
*/
6529

unknown's avatar
unknown committed
6530
static my_bool translog_record_read_next_chunk(TRANSLOG_READER_DATA *data)
6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549
{
  translog_size_t new_current_offset= data->current_offset + data->chunk_size;
  uint16 chunk_header_len, chunk_len;
  uint8 type;
  DBUG_ENTER("translog_record_read_next_chunk");

  if (data->eor)
  {
    DBUG_PRINT("info", ("end of the record flag set"));
    DBUG_RETURN(1);
  }

  if (data->header.groups_no &&
      data->header.groups_no - 1 != data->current_group &&
      data->header.groups[data->current_group].num == data->current_chunk)
  {
    /* Goto next group */
    data->current_group++;
    data->current_chunk= 0;
unknown's avatar
unknown committed
6550
    DBUG_PRINT("info", ("skip to group: #%u", data->current_group));
6551
    translog_destroy_scanner(&data->scanner);
unknown's avatar
unknown committed
6552
    translog_scanner_init(data->header.groups[data->current_group].addr,
6553
                          1, &data->scanner, 1);
6554 6555 6556 6557 6558 6559
  }
  else
  {
    data->current_chunk++;
    if (translog_get_next_chunk(&data->scanner))
      DBUG_RETURN(1);
unknown's avatar
unknown committed
6560 6561 6562
     if (data->scanner.page == END_OF_LOG)
     {
       /*
unknown's avatar
unknown committed
6563 6564
         Actually it should not happened, but we want to quit nicely in case
         of a truncated log
unknown's avatar
unknown committed
6565 6566 6567
       */
       DBUG_RETURN(1);
     }
6568 6569 6570 6571 6572 6573
  }
  type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;

  if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
  {
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6574
               ("Last chunk: data len: %u  offset: %u  group: %u of %u",
6575 6576 6577
                data->header.chunk0_data_len, data->scanner.page_offset,
                data->current_group, data->header.groups_no - 1));
    DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
6578 6579
    DBUG_ASSERT(data->header.lsn ==
                data->scanner.page_addr + data->scanner.page_offset);
6580
    translog_destroy_scanner(&data->scanner);
unknown's avatar
unknown committed
6581
    translog_scanner_init(data->header.chunk0_data_addr, 1, &data->scanner, 1);
6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595
    data->chunk_size= data->header.chunk0_data_len;
    data->body_offset= data->scanner.page_offset;
    data->current_offset= new_current_offset;
    data->eor= 1;
    DBUG_RETURN(0);
  }

  if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
  {
    data->eor= 1;
    DBUG_RETURN(1);                             /* End of record */
  }

  chunk_header_len=
unknown's avatar
unknown committed
6596
    translog_get_chunk_header_length(data->scanner.page +
6597 6598 6599 6600 6601 6602
                                     data->scanner.page_offset);
  chunk_len= translog_get_total_chunk_length(data->scanner.page,
                                             data->scanner.page_offset);
  data->chunk_size= chunk_len - chunk_header_len;
  data->body_offset= data->scanner.page_offset + chunk_header_len;
  data->current_offset= new_current_offset;
unknown's avatar
unknown committed
6603 6604
  DBUG_PRINT("info", ("grp: %u  chunk: %u  body_offset: %u  chunk_size: %u  "
                      "current_offset: %lu",
6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621
                      (uint) data->current_group,
                      (uint) data->current_chunk,
                      (uint) data->body_offset,
                      (uint) data->chunk_size, (ulong) data->current_offset));
  DBUG_RETURN(0);
}


/*
  Initialize record reader data from LSN

  SYNOPSIS
    translog_init_reader_data()
    lsn                  reference to LSN we should start from
    data                 reader data to initialize

  RETURN
unknown's avatar
unknown committed
6622 6623
    0  OK
    1  Error
6624 6625
*/

6626
static my_bool translog_init_reader_data(LSN lsn,
unknown's avatar
unknown committed
6627
                                         TRANSLOG_READER_DATA *data)
6628
{
unknown's avatar
unknown committed
6629
  int read_header;
6630
  DBUG_ENTER("translog_init_reader_data");
unknown's avatar
unknown committed
6631
  if (translog_scanner_init(lsn, 1, &data->scanner, 1) ||
unknown's avatar
unknown committed
6632 6633 6634
      ((read_header=
        translog_read_record_header_scan(&data->scanner, &data->header, 1))
       == RECHEADER_READ_ERROR))
6635
    DBUG_RETURN(1);
unknown's avatar
unknown committed
6636
  data->read_header= read_header;
6637 6638 6639 6640 6641 6642
  data->body_offset= data->header.non_header_data_start_offset;
  data->chunk_size= data->header.non_header_data_len;
  data->current_offset= data->read_header;
  data->current_group= 0;
  data->current_chunk= 0;
  data->eor= 0;
unknown's avatar
unknown committed
6643 6644
  DBUG_PRINT("info", ("read_header: %u  "
                      "body_offset: %u  chunk_size: %u  current_offset: %lu",
6645 6646 6647 6648 6649 6650 6651
                      (uint) data->read_header,
                      (uint) data->body_offset,
                      (uint) data->chunk_size, (ulong) data->current_offset));
  DBUG_RETURN(0);
}


6652 6653 6654 6655
/**
  @brief Destroy reader data object
*/

unknown's avatar
unknown committed
6656
static void translog_destroy_reader_data(TRANSLOG_READER_DATA *data)
6657 6658 6659 6660 6661
{
  translog_destroy_scanner(&data->scanner);
}


6662 6663 6664 6665 6666 6667
/*
  Read a part of the record.

  SYNOPSIS
    translog_read_record_header()
    lsn                  log record serial number (address of the record)
unknown's avatar
unknown committed
6668
    offset               From the beginning of the record beginning (read
6669
                         by translog_read_record_header).
unknown's avatar
unknown committed
6670 6671
    length               Length of record part which have to be read.
    buffer               Buffer where to read the record part (have to be at
6672 6673 6674 6675 6676 6677
                         least 'length' bytes length)

  RETURN
    length of data actually read
*/

6678
translog_size_t translog_read_record(LSN lsn,
6679 6680
                                     translog_size_t offset,
                                     translog_size_t length,
unknown's avatar
unknown committed
6681
                                     uchar *buffer,
unknown's avatar
unknown committed
6682
                                     TRANSLOG_READER_DATA *data)
6683 6684 6685
{
  translog_size_t requested_length= length;
  translog_size_t end= offset + length;
unknown's avatar
unknown committed
6686
  TRANSLOG_READER_DATA internal_data;
6687
  DBUG_ENTER("translog_read_record");
6688 6689
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
6690 6691 6692

  if (data == NULL)
  {
unknown's avatar
unknown committed
6693
    DBUG_ASSERT(lsn != LSN_IMPOSSIBLE);
6694 6695 6696 6697 6698 6699 6700 6701 6702
    data= &internal_data;
  }
  if (lsn ||
      (offset < data->current_offset &&
       !(offset < data->read_header && offset + length < data->read_header)))
  {
    if (translog_init_reader_data(lsn, data))
      DBUG_RETURN(0);
  }
unknown's avatar
unknown committed
6703 6704 6705
  DBUG_PRINT("info", ("Offset: %lu  length: %lu  "
                      "Scanner: Cur: (%lu,0x%lx)  Hrz: (%lu,0x%lx)  "
                      "Lst: (%lu,0x%lx)  Offset: %u(%x)  fixed: %d",
6706
                      (ulong) offset, (ulong) length,
unknown's avatar
unknown committed
6707 6708 6709
                      LSN_IN_PARTS(data->scanner.page_addr),
                      LSN_IN_PARTS(data->scanner.horizon),
                      LSN_IN_PARTS(data->scanner.last_file_page),
6710 6711 6712 6713 6714
                      (uint) data->scanner.page_offset,
                      (uint) data->scanner.page_offset,
                      data->scanner.fixed_horizon));
  if (offset < data->read_header)
  {
unknown's avatar
unknown committed
6715
    uint16 len= min(data->read_header, end) - offset;
6716
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6717
               ("enter header offset: %lu  length: %lu",
6718
                (ulong) offset, (ulong) length));
unknown's avatar
unknown committed
6719
    memcpy(buffer, data->header.header + offset, len);
6720 6721
    length-= len;
    if (length == 0)
6722 6723
    {
      translog_destroy_reader_data(data);
6724
      DBUG_RETURN(requested_length);
6725
    }
6726 6727 6728
    offset+= len;
    buffer+= len;
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6729
               ("len: %u  offset: %lu   curr: %lu  length: %lu",
6730 6731 6732 6733 6734 6735
                len, (ulong) offset, (ulong) data->current_offset,
                (ulong) length));
  }
  /* TODO: find first page which we should read by offset */

  /* read the record chunk by chunk */
unknown's avatar
unknown committed
6736
  for(;;)
6737 6738 6739
  {
    uint page_end= data->current_offset + data->chunk_size;
    DBUG_PRINT("info",
unknown's avatar
unknown committed
6740 6741
               ("enter body offset: %lu  curr: %lu  "
                "length: %lu  page_end: %lu",
6742 6743 6744 6745 6746
                (ulong) offset, (ulong) data->current_offset, (ulong) length,
                (ulong) page_end));
    if (offset < page_end)
    {
      uint len= page_end - offset;
unknown's avatar
unknown committed
6747
      set_if_smaller(len, length); /* in case we read beyond record's end */
unknown's avatar
unknown committed
6748 6749
      DBUG_ASSERT(offset >= data->current_offset);
      memcpy(buffer,
6750 6751 6752 6753
              data->scanner.page + data->body_offset +
              (offset - data->current_offset), len);
      length-= len;
      if (length == 0)
6754 6755
      {
        translog_destroy_reader_data(data);
6756
        DBUG_RETURN(requested_length);
6757
      }
6758 6759 6760
      offset+= len;
      buffer+= len;
      DBUG_PRINT("info",
unknown's avatar
unknown committed
6761
                 ("len: %u  offset: %lu  curr: %lu  length: %lu",
6762 6763 6764 6765
                  len, (ulong) offset, (ulong) data->current_offset,
                  (ulong) length));
    }
    if (translog_record_read_next_chunk(data))
6766 6767
    {
      translog_destroy_reader_data(data);
6768
      DBUG_RETURN(requested_length - length);
6769
    }
unknown's avatar
unknown committed
6770
  }
6771 6772 6773 6774
}


/*
6775
  @brief Force skipping to the next buffer
6776

6777 6778
  @todo Do not copy old page content if all page protections are switched off
  (because we do not need calculate something or change old parts of the page)
6779 6780 6781 6782
*/

static void translog_force_current_buffer_to_finish()
{
unknown's avatar
unknown committed
6783
  TRANSLOG_ADDRESS new_buff_beginning;
unknown's avatar
unknown committed
6784 6785 6786 6787
  uint16 old_buffer_no= log_descriptor.bc.buffer_no;
  uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
  struct st_translog_buffer *new_buffer= (log_descriptor.buffers +
                                          new_buffer_no);
6788
  struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
6789
  uchar *data= log_descriptor.bc.ptr - log_descriptor.bc.current_page_fill;
unknown's avatar
unknown committed
6790 6791
  uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
  uint16 current_page_fill, write_counter, previous_offset;
6792
  DBUG_ENTER("translog_force_current_buffer_to_finish");
unknown's avatar
unknown committed
6793 6794 6795 6796
  DBUG_PRINT("enter", ("Buffer #%u 0x%lx  "
                       "Buffer addr: (%lu,0x%lx)  "
                       "Page addr: (%lu,0x%lx)  "
                       "size: %lu (%lu)  Pg: %u  left: %u",
6797 6798
                       (uint) log_descriptor.bc.buffer_no,
                       (ulong) log_descriptor.bc.buffer,
unknown's avatar
unknown committed
6799
                       LSN_IN_PARTS(log_descriptor.bc.buffer->offset),
6800 6801
                       (ulong) LSN_FILE_NO(log_descriptor.horizon),
                       (ulong) (LSN_OFFSET(log_descriptor.horizon) -
unknown's avatar
unknown committed
6802
                                log_descriptor.bc.current_page_fill),
6803 6804 6805
                       (ulong) log_descriptor.bc.buffer->size,
                       (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
                                buffer->buffer),
unknown's avatar
unknown committed
6806
                       (uint) log_descriptor.bc.current_page_fill,
6807
                       (uint) left));
unknown's avatar
unknown committed
6808

unknown's avatar
unknown committed
6809 6810 6811
  LINT_INIT(current_page_fill);
  new_buff_beginning= log_descriptor.bc.buffer->offset;
  new_buff_beginning+= log_descriptor.bc.buffer->size; /* increase offset */
unknown's avatar
unknown committed
6812

6813
  DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
6814 6815
  DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) ==
              LSN_FILE_NO(log_descriptor.bc.buffer->offset));
unknown's avatar
unknown committed
6816
  translog_check_cursor(&log_descriptor.bc);
unknown's avatar
unknown committed
6817 6818
  DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
  if (left != 0)
6819 6820 6821 6822 6823
  {
    /*
       TODO: if 'left' is so small that can't hold any other record
       then do not move the page
    */
unknown's avatar
unknown committed
6824
    DBUG_PRINT("info", ("left: %u", (uint) left));
6825

6826
    /* decrease offset */
unknown's avatar
unknown committed
6827
    new_buff_beginning-= log_descriptor.bc.current_page_fill;
unknown's avatar
unknown committed
6828
    current_page_fill= log_descriptor.bc.current_page_fill;
6829

unknown's avatar
unknown committed
6830
    memset(log_descriptor.bc.ptr, TRANSLOG_FILLER, left);
6831
    log_descriptor.bc.buffer->size+= left;
unknown's avatar
unknown committed
6832
    DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx  "
6833 6834 6835 6836 6837 6838 6839 6840 6841
                        "Size: %lu",
                        (uint) log_descriptor.bc.buffer->buffer_no,
                        (ulong) log_descriptor.bc.buffer,
                        (ulong) log_descriptor.bc.buffer->size));
    DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
                log_descriptor.bc.buffer_no);
  }
  else
  {
unknown's avatar
unknown committed
6842
    log_descriptor.bc.current_page_fill= 0;
6843 6844 6845 6846 6847
  }

  translog_buffer_lock(new_buffer);
  translog_wait_for_buffer_free(new_buffer);

unknown's avatar
unknown committed
6848 6849 6850
  write_counter= log_descriptor.bc.write_counter;
  previous_offset= log_descriptor.bc.previous_offset;
  translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
unknown's avatar
unknown committed
6851
  /* Fix buffer offset (which was incorrectly set to horizon) */
unknown's avatar
unknown committed
6852
  log_descriptor.bc.buffer->offset= new_buff_beginning;
unknown's avatar
unknown committed
6853 6854
  log_descriptor.bc.write_counter= write_counter;
  log_descriptor.bc.previous_offset= previous_offset;
6855

unknown's avatar
unknown committed
6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878
  /*
    Advances this log pointer, increases writers and let other threads to
    write to the log while we process old page content
  */
  if (left)
  {
    log_descriptor.bc.ptr+= current_page_fill;
    log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill=
      current_page_fill;
    new_buffer->overlay= old_buffer;
  }
  else
    translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
  translog_buffer_increase_writers(new_buffer);
  translog_buffer_unlock(new_buffer);

  /*
    We have to wait until all writers finish before start changing the
    pages by applying protection and copying the page content in the
    new buffer.
  */
  translog_wait_for_writers(old_buffer);

unknown's avatar
unknown committed
6879
  if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
6880 6881 6882 6883 6884
  {
    translog_put_sector_protection(data, &log_descriptor.bc);
    if (left)
    {
      log_descriptor.bc.write_counter++;
unknown's avatar
unknown committed
6885
      log_descriptor.bc.previous_offset= current_page_fill;
6886 6887 6888 6889 6890 6891 6892 6893 6894
    }
    else
    {
      DBUG_PRINT("info", ("drop write_counter"));
      log_descriptor.bc.write_counter= 0;
      log_descriptor.bc.previous_offset= 0;
    }
  }

unknown's avatar
unknown committed
6895
  if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
6896
  {
unknown's avatar
unknown committed
6897 6898 6899
    uint32 crc= translog_crc(data + log_descriptor.page_overhead,
                             TRANSLOG_PAGE_SIZE -
                             log_descriptor.page_overhead);
6900 6901 6902 6903 6904 6905
    DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc));
    int4store(data + 3 + 3 + 1, crc);
  }

  if (left)
  {
6906
    /*
unknown's avatar
unknown committed
6907
      TODO: do not copy beginning of the page if we have no CRC or sector
6908 6909
      checks on
    */
unknown's avatar
unknown committed
6910
    memcpy(new_buffer->buffer, data, current_page_fill);
6911
  }
6912
  old_buffer->next_buffer_offset= new_buffer->offset;
6913

unknown's avatar
unknown committed
6914 6915 6916 6917
  translog_buffer_lock(new_buffer);
  translog_buffer_decrease_writers(new_buffer);
  translog_buffer_unlock(new_buffer);

6918 6919 6920
  DBUG_VOID_RETURN;
}

unknown's avatar
unknown committed
6921

6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946
/**
   @brief Flush the log up to given LSN (included)

   @param  lsn             log record serial number up to which (inclusive)
                           the log has to be flushed

   @return Operation status
     @retval 0      OK
     @retval 1      Error

  @todo LOG: when a log write fails, we should not write to this log anymore
  (if we add more log records to this log they will be unreadable: we will hit
  the broken log record): all translog_flush() should be made to fail (because
  translog_flush() is when a a transaction wants something durable and we
  cannot make anything durable as log is corrupted). For that, a "my_bool
  st_translog_descriptor::write_error" could be set to 1 when a
  translog_write_record() or translog_flush() fails, and translog_flush()
  would test this var (and translog_write_record() could also test this var if
  it wants, though it's not absolutely needed).
  Then, either shut Maria down immediately, or switch to a new log (but if we
  get write error after write error, that would create too many logs).
  A popular open-source transactional engine intentionally crashes as soon as
  a log flush fails (we however don't want to crash the entire mysqld, but
  stopping all engine's operations immediately would make sense).
  Same applies to translog_write_record().
6947 6948

  @todo: remove serialization and make group commit.
6949 6950
*/

unknown's avatar
unknown committed
6951
my_bool translog_flush(TRANSLOG_ADDRESS lsn)
6952
{
6953
  LSN old_flushed, sent_to_disk;
6954
  TRANSLOG_ADDRESS flush_horizon;
6955
  int rc= 0;
6956
  /* We can't have more different files then buffers */
unknown's avatar
unknown committed
6957
  TRANSLOG_FILE *file_handlers[TRANSLOG_BUFFERS_NO];
6958 6959
  int current_file_handler= -1;
  uint32 prev_file= 0;
6960 6961
  my_bool full_circle= 0;
  DBUG_ENTER("translog_flush");
unknown's avatar
unknown committed
6962
  DBUG_PRINT("enter", ("Flush up to LSN: (%lu,0x%lx)", LSN_IN_PARTS(lsn)));
6963 6964
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
6965
  LINT_INIT(sent_to_disk);
6966

6967
  pthread_mutex_lock(&log_descriptor.log_flush_lock);
6968
  translog_lock();
6969
  flush_horizon= LSN_IMPOSSIBLE;
6970 6971 6972
  old_flushed= log_descriptor.flushed;
  for (;;)
  {
unknown's avatar
unknown committed
6973 6974
    uint16 buffer_no= log_descriptor.bc.buffer_no;
    uint16 buffer_start= buffer_no;
6975 6976
    struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
    struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
6977
    if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0)
6978
    {
unknown's avatar
unknown committed
6979
      DBUG_PRINT("info", ("already flushed: (%lu,0x%lx)",
unknown's avatar
unknown committed
6980
                          LSN_IN_PARTS(log_descriptor.flushed)));
6981 6982
      translog_unlock();
      goto out;
6983 6984
    }
    /* send to the file if it is not sent */
6985 6986 6987 6988 6989
    if (translog_status != TRANSLOG_OK)
    {
      rc= 1;
      goto out;
    }
6990
    sent_to_disk= translog_get_sent_to_disk();
6991
    if (cmp_translog_addr(sent_to_disk, lsn) >= 0 || full_circle)
6992 6993 6994 6995 6996 6997 6998 6999 7000
      break;

    do
    {
      buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
      buffer= log_descriptor.buffers + buffer_no;
      translog_buffer_lock(buffer);
      translog_buffer_unlock(buffer_unlock);
      buffer_unlock= buffer;
unknown's avatar
unknown committed
7001
      if (buffer->file != NULL)
7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012
      {
        buffer_unlock= NULL;
        if (buffer_start == buffer_no)
        {
          /* we made a circle */
          full_circle= 1;
          translog_force_current_buffer_to_finish();
        }
        break;
      }
    } while ((buffer_start != buffer_no) &&
7013
             cmp_translog_addr(log_descriptor.flushed, lsn) < 0);
7014
    if (buffer_unlock != NULL && buffer_unlock != buffer)
7015
      translog_buffer_unlock(buffer_unlock);
7016 7017 7018

    if (prev_file != LSN_FILE_NO(buffer->offset))
    {
unknown's avatar
unknown committed
7019
      TRANSLOG_FILE *file;
7020 7021
      uint32 fn= LSN_FILE_NO(buffer->offset);
      prev_file= fn;
unknown's avatar
unknown committed
7022 7023
      file= get_logfile_by_number(fn);
      if (!file->is_sync)
7024 7025
      {
        current_file_handler++;
unknown's avatar
unknown committed
7026
        file_handlers[current_file_handler]= file;
7027 7028 7029 7030 7031
      }
      /* We sync file when we are closing it => do nothing if file closed */
    }
    DBUG_ASSERT(flush_horizon <= buffer->offset + buffer->size);
    flush_horizon= buffer->offset + buffer->size;
unknown's avatar
unknown committed
7032
    rc= translog_buffer_flush(buffer);
7033
    translog_buffer_unlock(buffer);
unknown's avatar
unknown committed
7034
    if (rc)
7035
      goto out;                                 /* rc is 1 */
unknown's avatar
unknown committed
7036
    translog_lock();
7037
  }
unknown's avatar
unknown committed
7038
  translog_unlock();
7039 7040

  {
unknown's avatar
unknown committed
7041 7042 7043
    TRANSLOG_FILE **cur= file_handlers;
    TRANSLOG_FILE **end= file_handlers + current_file_handler;
    for (; cur <= end; cur++)
7044
    {
unknown's avatar
unknown committed
7045
      (*cur)->is_sync= 1;
7046
      if (my_sync((*cur)->handler.file, MYF(MY_WME)))
7047 7048
      {
        rc= 1;
7049
        translog_stop_writing();
7050 7051
        goto out;
      }
7052 7053
    }
  }
7054
  log_descriptor.flushed= sent_to_disk;
7055 7056 7057 7058 7059 7060 7061 7062 7063 7064
  /*
    If we should flush (due to directory flush mode) and
    previous flush horizon was not within one page border with this one.
  */
  if (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
      (LSN_FILE_NO(log_descriptor.previous_flush_horizon) !=
       LSN_FILE_NO(flush_horizon) ||
       ((LSN_OFFSET(log_descriptor.previous_flush_horizon) - 1) /
        TRANSLOG_PAGE_SIZE) !=
       ((LSN_OFFSET(flush_horizon) - 1) / TRANSLOG_PAGE_SIZE)))
7065
    rc|= my_sync(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD));
7066
  log_descriptor.previous_flush_horizon= flush_horizon;
7067
out:
7068
  pthread_mutex_unlock(&log_descriptor.log_flush_lock);
7069 7070
  DBUG_RETURN(rc);
}
7071 7072


7073 7074 7075 7076 7077 7078 7079
/**
   @brief Gives a 2-byte-id to MARIA_SHARE and logs this fact

   If a MARIA_SHARE does not yet have a 2-byte-id (unique over all currently
   open MARIA_SHAREs), give it one and record this assignment in the log
   (LOGREC_FILE_ID log record).

unknown's avatar
unknown committed
7080
   @param  tbl_info        table
7081 7082 7083 7084 7085 7086 7087 7088 7089
   @param  trn             calling transaction

   @return Operation status
     @retval 0      OK
     @retval 1      Error

   @note Can be called even if share already has an id (then will do nothing)
*/

unknown's avatar
unknown committed
7090
int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
7091
{
unknown's avatar
unknown committed
7092
  MARIA_SHARE *share= tbl_info->s;
7093 7094 7095 7096 7097 7098
  /*
    If you give an id to a non-BLOCK_RECORD table, you also need to release
    this id somewhere. Then you can change the assertion.
  */
  DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
  /* re-check under mutex to avoid having 2 ids for the same share */
7099
  pthread_mutex_lock(&share->intern_lock);
7100 7101 7102
  if (likely(share->id == 0))
  {
    /* Inspired by set_short_trid() of trnman.c */
unknown's avatar
unknown committed
7103 7104
    uint i= share->kfile.file % SHARE_ID_MAX + 1;
    do
7105
    {
unknown's avatar
unknown committed
7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119
      my_atomic_rwlock_wrlock(&LOCK_id_to_share);
      for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */
      {
        void *tmp= NULL;
        if (id_to_share[i] == NULL &&
            my_atomic_casptr((void **)&id_to_share[i], &tmp, share))
        {
          share->id= (uint16)i;
          break;
        }
      }
      my_atomic_rwlock_wrunlock(&LOCK_id_to_share);
      i= 1; /* scan the whole array */
    } while (share->id == 0);
unknown's avatar
unknown committed
7120
    DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, share->id));
7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136
    LSN lsn;
    LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
    uchar log_data[FILEID_STORE_SIZE];
    log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    (char*) log_data;
    log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
    /*
      open_file_name is an unresolved name (symlinks are not resolved, datadir
      is not realpath-ed, etc) which is good: the log can be moved to another
      directory and continue working.
    */
    log_array[TRANSLOG_INTERNAL_PARTS + 1].str= share->open_file_name;
    /**
       @todo if we had the name's length in MARIA_SHARE we could avoid this
       strlen()
    */
    log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
unknown's avatar
unknown committed
7137
      strlen(share->open_file_name) + 1;
unknown's avatar
unknown committed
7138
    if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, tbl_info,
7139 7140 7141 7142
                                       sizeof(log_data) +
                                       log_array[TRANSLOG_INTERNAL_PARTS +
                                                 1].length,
                                       sizeof(log_array)/sizeof(log_array[0]),
unknown's avatar
unknown committed
7143
                                       log_array, log_data, NULL)))
7144 7145
      return 1;
  }
7146
  pthread_mutex_unlock(&share->intern_lock);
7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164
  return 0;
}


/**
   @brief Recycles a MARIA_SHARE's short id.

   @param  share           table

   @note Must be called only if share has an id (i.e. id != 0)
*/

void translog_deassign_id_from_share(MARIA_SHARE *share)
{
  DBUG_PRINT("info", ("id_to_share: 0x%lx id %u -> 0",
                      (ulong)share, share->id));
  /*
    We don't need any mutex as we are called only when closing the last
unknown's avatar
unknown committed
7165 7166 7167
    instance of the table or at the end of REPAIR: no writes can be
    happening. But a Checkpoint may be reading share->id, so we require this
    mutex:
7168
  */
unknown's avatar
unknown committed
7169
  safe_mutex_assert_owner(&share->intern_lock);
7170 7171 7172
  my_atomic_rwlock_rdlock(&LOCK_id_to_share);
  my_atomic_storeptr((void **)&id_to_share[share->id], 0);
  my_atomic_rwlock_rdunlock(&LOCK_id_to_share);
unknown's avatar
unknown committed
7173
  share->id= 0;
unknown's avatar
unknown committed
7174 7175
  /* useless but safety: */
  share->lsn_of_file_id= LSN_IMPOSSIBLE;
7176
}
unknown's avatar
unknown committed
7177 7178


unknown's avatar
unknown committed
7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189
void translog_assign_id_to_share_from_recovery(MARIA_SHARE *share,
                                               uint16 id)
{
  DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
  DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
  DBUG_ASSERT(share->id == 0);
  DBUG_ASSERT(id_to_share[id] == NULL);
  id_to_share[share->id= id]= share;
}


7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203
/**
   @brief check if such log file exists

   @param file_no number of the file to test

   @retval 0 no such file
   @retval 1 there is file with such number
*/

my_bool translog_is_file(uint file_no)
{
  MY_STAT stat_buff;
  char path[FN_REFLEN];
  return (test(my_stat(translog_filename_by_fileno(file_no, path),
7204
                       &stat_buff, MYF(0))));
7205 7206 7207
}


unknown's avatar
unknown committed
7208
/**
7209
  @brief returns minimum log file number
unknown's avatar
unknown committed
7210

7211 7212 7213 7214 7215
  @param horizon         the end of the log
  @param is_protected    true if it is under purge_log protection

  @retval minimum file number
  @retval 0 no files found
unknown's avatar
unknown committed
7216 7217
*/

7218
static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected)
unknown's avatar
unknown committed
7219
{
7220
  uint min_file= 0, max_file;
7221 7222
  DBUG_ENTER("translog_first_file");
  if (!is_protected)
7223
    pthread_mutex_lock(&log_descriptor.purger_lock);
7224 7225 7226 7227 7228 7229
  if (log_descriptor.min_file_number &&
      translog_is_file(log_descriptor.min_file_number))
  {
    DBUG_PRINT("info", ("cached %lu",
                        (ulong) log_descriptor.min_file_number));
    if (!is_protected)
7230
      pthread_mutex_unlock(&log_descriptor.purger_lock);
7231 7232
    DBUG_RETURN(log_descriptor.min_file_number);
  }
7233

7234 7235
  max_file= LSN_FILE_NO(horizon);

7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248
  /* binary search for last file */
  while (min_file != max_file && min_file != (max_file - 1))
  {
    uint test= (min_file + max_file) / 2;
    DBUG_PRINT("info", ("min_file: %u  test: %u  max_file: %u",
                        min_file, test, max_file));
    if (test == max_file)
      test--;
    if (translog_is_file(test))
      max_file= test;
    else
      min_file= test;
  }
7249 7250
  log_descriptor.min_file_number= max_file;
  if (!is_protected)
7251
    pthread_mutex_unlock(&log_descriptor.purger_lock);
unknown's avatar
unknown committed
7252 7253
  DBUG_PRINT("info", ("first file :%lu", (ulong) max_file));
  DBUG_ASSERT(max_file >= 1);
7254 7255 7256 7257
  DBUG_RETURN(max_file);
}


7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272
/**
  @brief returns the most close LSN higher the given chunk address

  @param addr the chunk address to start from
  @param horizon the horizon if it is known or LSN_IMPOSSIBLE

  @retval LSN_ERROR Error
  @retval LSN_IMPOSSIBLE no LSNs after the address
  @retval # LSN of the most close LSN higher the given chunk address
*/

LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon)
{
  uint chunk_type;
  TRANSLOG_SCANNER_DATA scanner;
7273
  LSN result;
7274 7275 7276 7277 7278 7279 7280 7281
  DBUG_ENTER("translog_next_LSN");

  if (horizon == LSN_IMPOSSIBLE)
    horizon= translog_get_horizon();

  if (addr == horizon)
    DBUG_RETURN(LSN_IMPOSSIBLE);

unknown's avatar
unknown committed
7282
  translog_scanner_init(addr, 0, &scanner, 1);
unknown's avatar
unknown committed
7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302
  /*
    addr can point not to a chunk beginning but page end so next
    page beginning.
  */
  if (addr % TRANSLOG_PAGE_SIZE == 0)
  {
    /*
      We are emulating the page end which cased such horizon value to
      trigger translog_scanner_eop().

      We can't just increase addr on page header overhead because it
      can be file end so we allow translog_get_next_chunk() to skip
      to the next page in correct way
    */
    scanner.page_addr-= TRANSLOG_PAGE_SIZE;
    scanner.page_offset= TRANSLOG_PAGE_SIZE;
#ifndef DBUG_OFF
    scanner.page= NULL; /* prevent using incorrect page content */
#endif
  }
7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316
  /* addr can point not to a chunk beginning but to a page end */
  if (translog_scanner_eop(&scanner))
  {
    if (translog_get_next_chunk(&scanner))
    {
      result= LSN_ERROR;
      goto out;
    }
    if (scanner.page == END_OF_LOG)
    {
      result= LSN_IMPOSSIBLE;
      goto out;
    }
  }
7317 7318 7319 7320 7321 7322

  chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
  DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                      (uint) scanner.page[scanner.page_offset]));
  while (chunk_type != TRANSLOG_CHUNK_LSN &&
         chunk_type != TRANSLOG_CHUNK_FIXED &&
unknown's avatar
unknown committed
7323
         scanner.page[scanner.page_offset] != TRANSLOG_FILLER)
7324 7325
  {
    if (translog_get_next_chunk(&scanner))
unknown's avatar
unknown committed
7326 7327 7328 7329 7330 7331 7332 7333 7334
    {
      result= LSN_ERROR;
      goto out;
    }
    if (scanner.page == END_OF_LOG)
    {
      result= LSN_IMPOSSIBLE;
      goto out;
    }
7335 7336 7337 7338
    chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
    DBUG_PRINT("info", ("type: %x  byte: %x", (uint) chunk_type,
                        (uint) scanner.page[scanner.page_offset]));
  }
7339

unknown's avatar
unknown committed
7340
  if (scanner.page[scanner.page_offset] == TRANSLOG_FILLER)
7341 7342 7343
    result= LSN_IMPOSSIBLE; /* reached page filler */
  else
    result= scanner.page_addr + scanner.page_offset;
unknown's avatar
unknown committed
7344
out:
7345 7346
  translog_destroy_scanner(&scanner);
  DBUG_RETURN(result);
7347 7348
}

7349

7350 7351 7352 7353
/**
   @brief returns the LSN of the first record starting in this log

   @retval LSN_ERROR Error
unknown's avatar
unknown committed
7354
   @retval LSN_IMPOSSIBLE no log or the log is empty
7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365
   @retval # LSN of the first record
*/

LSN translog_first_lsn_in_log()
{
  TRANSLOG_ADDRESS addr, horizon= translog_get_horizon();
  TRANSLOG_VALIDATOR_DATA data;
  uint file;
  uint16 chunk_offset;
  uchar *page;
  DBUG_ENTER("translog_first_lsn_in_log");
7366
  DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)", LSN_IN_PARTS(horizon)));
7367 7368
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
7369 7370 7371 7372 7373 7374

  if (!(file= translog_first_file(horizon, 0)))
  {
    /* log has no records yet */
    DBUG_RETURN(LSN_IMPOSSIBLE);
  }
7375

7376
  addr= MAKE_LSN(file, TRANSLOG_PAGE_SIZE); /* the first page of the file */
7377 7378
  data.addr= &addr;
  {
7379
    uchar buffer[TRANSLOG_PAGE_SIZE];
7380
    if ((page= translog_get_page(&data, buffer, NULL)) == NULL ||
7381
        (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
7382 7383
      DBUG_RETURN(LSN_ERROR);
  }
7384 7385 7386
  addr+= chunk_offset;

  DBUG_RETURN(translog_next_LSN(addr, horizon));
7387 7388 7389 7390
}


/**
7391
   @brief Returns theoretical first LSN if first log is present
7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403

   @retval LSN_ERROR Error
   @retval LSN_IMPOSSIBLE no log
   @retval # LSN of the first record
*/

LSN translog_first_theoretical_lsn()
{
  TRANSLOG_ADDRESS addr= translog_get_horizon();
  uchar buffer[TRANSLOG_PAGE_SIZE], *page;
  TRANSLOG_VALIDATOR_DATA data;
  DBUG_ENTER("translog_first_theoretical_lsn");
unknown's avatar
unknown committed
7404
  DBUG_PRINT("info", ("Horizon: (%lu,0x%lx)", LSN_IN_PARTS(addr)));
7405 7406
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
7407 7408 7409 7410 7411

  if (!translog_is_file(1))
    DBUG_RETURN(LSN_IMPOSSIBLE);
  if (addr == MAKE_LSN(1, TRANSLOG_PAGE_SIZE))
  {
7412
    /* log has no records yet */
7413 7414 7415 7416 7417 7418
    DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
                         log_descriptor.page_overhead));
  }

  addr= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* the first page of the file */
  data.addr= &addr;
7419
  if ((page= translog_get_page(&data, buffer, NULL)) == NULL)
7420 7421 7422 7423
    DBUG_RETURN(LSN_ERROR);

  DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
                       page_overhead[page[TRANSLOG_PAGE_FLAGS]]));
unknown's avatar
unknown committed
7424
}
7425 7426 7427


/**
7428
  @brief Checks given low water mark and purge files if it is need
7429

unknown's avatar
unknown committed
7430
  @param low the last (minimum) address which is need
7431 7432 7433 7434 7435

  @retval 0 OK
  @retval 1 Error
*/

unknown's avatar
unknown committed
7436
my_bool translog_purge(TRANSLOG_ADDRESS low)
7437 7438 7439 7440 7441
{
  uint32 last_need_file= LSN_FILE_NO(low);
  TRANSLOG_ADDRESS horizon= translog_get_horizon();
  int rc= 0;
  DBUG_ENTER("translog_purge");
unknown's avatar
unknown committed
7442
  DBUG_PRINT("enter", ("low: (%lu,0x%lx)", LSN_IN_PARTS(low)));
7443 7444
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);
7445

7446
  pthread_mutex_lock(&log_descriptor.purger_lock);
7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463
  if (LSN_FILE_NO(log_descriptor.last_lsn_checked) < last_need_file)
  {
    uint32 i;
    uint32 min_file= translog_first_file(horizon, 1);
    DBUG_ASSERT(min_file != 0); /* log is already started */
    for(i= min_file; i < last_need_file && rc == 0; i++)
    {
      LSN lsn= translog_get_file_max_lsn_stored(i);
      if (lsn == LSN_IMPOSSIBLE)
        break;   /* files are still in writing */
      if (lsn == LSN_ERROR)
      {
        rc= 1;
        break;
      }
      if (cmp_translog_addr(lsn, low) >= 0)
        break;
unknown's avatar
unknown committed
7464

7465
      DBUG_PRINT("info", ("purge file %lu", (ulong) i));
unknown's avatar
unknown committed
7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487

      /* remove file descriptor from the cache */
      /*
        log_descriptor.min_file can be changed only here during execution
        and the function is serialized, so we can access it without problems
      */
      if (i >= log_descriptor.min_file)
      {
        TRANSLOG_FILE *file;
        rw_wrlock(&log_descriptor.open_files_lock);
        DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
                    log_descriptor.open_files.elements);
        DBUG_ASSERT(log_descriptor.min_file == i);
        file= *((TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files));
        DBUG_PRINT("info", ("Files : %d", log_descriptor.open_files.elements));
        DBUG_ASSERT(i == file->number);
        log_descriptor.min_file++;
        DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
                    log_descriptor.open_files.elements);
        rw_unlock(&log_descriptor.open_files_lock);
        translog_close_log_file(file);
      }
7488
      if (log_purge_type == TRANSLOG_PURGE_IMMIDIATE)
7489 7490 7491 7492 7493 7494
      {
        char path[FN_REFLEN], *file_name;
        file_name= translog_filename_by_fileno(i, path);
        rc= test(my_delete(file_name, MYF(MY_WME)));
      }
    }
7495 7496 7497 7498 7499 7500
    if (unlikely(rc == 1))
      log_descriptor.min_need_file= 0; /* impossible value */
    else
      log_descriptor.min_need_file= i;
  }

unknown's avatar
unknown committed
7501
  pthread_mutex_unlock(&log_descriptor.purger_lock);
7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522
  DBUG_RETURN(rc);
}


/**
  @brief Purges files by stored min need file in case of
    "ondemend" purge type

  @note This function do real work only if it is "ondemend" purge type
    and translog_purge() was called at least once and last time without
    errors

  @retval 0 OK
  @retval 1 Error
*/

my_bool translog_purge_at_flush()
{
  uint32 i, min_file;
  int rc= 0;
  DBUG_ENTER("translog_purge_at_flush");
unknown's avatar
unknown committed
7523 7524 7525 7526 7527 7528 7529 7530
  DBUG_ASSERT(translog_status == TRANSLOG_OK ||
              translog_status == TRANSLOG_READONLY);

  if (unlikely(translog_status == TRANSLOG_READONLY))
  {
    DBUG_PRINT("info", ("The log is read onlyu => exit"));
    DBUG_RETURN(0);
  }
7531 7532 7533 7534 7535 7536 7537

  if (log_purge_type != TRANSLOG_PURGE_ONDEMAND)
  {
    DBUG_PRINT("info", ("It is not \"at_flush\" => exit"));
    DBUG_RETURN(0);
  }

unknown's avatar
unknown committed
7538
  pthread_mutex_lock(&log_descriptor.purger_lock);
7539 7540 7541 7542

  if (unlikely(log_descriptor.min_need_file == 0))
  {
    DBUG_PRINT("info", ("No info about min need file => exit"));
unknown's avatar
unknown committed
7543
    pthread_mutex_unlock(&log_descriptor.purger_lock);
7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554
    DBUG_RETURN(0);
  }

  min_file= translog_first_file(translog_get_horizon(), 1);
  DBUG_ASSERT(min_file != 0); /* log is already started */
  for(i= min_file; i < log_descriptor.min_need_file && rc == 0; i++)
  {
    char path[FN_REFLEN], *file_name;
    DBUG_PRINT("info", ("purge file %lu\n", (ulong) i));
    file_name= translog_filename_by_fileno(i, path);
    rc= test(my_delete(file_name, MYF(MY_WME)));
7555 7556
  }

7557
  pthread_mutex_unlock(&log_descriptor.purger_lock);
7558 7559
  DBUG_RETURN(rc);
}
7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586


/**
  @brief Gets min file number

  @param horizon         the end of the log

  @retval minimum file number
  @retval 0 no files found
*/

uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon)
{
  return translog_first_file(horizon, 0);
}


/**
  @brief Gets min file number which is needed

  @retval minimum file number
  @retval 0 unknown
*/

uint32 translog_get_first_needed_file()
{
  uint32 file_no;
unknown's avatar
unknown committed
7587
  pthread_mutex_lock(&log_descriptor.purger_lock);
7588
  file_no= log_descriptor.min_need_file;
unknown's avatar
unknown committed
7589
  pthread_mutex_unlock(&log_descriptor.purger_lock);
7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615
  return file_no;
}


/**
  @brief Gets transaction log file size

  @return transaction log file size
*/

uint32 translog_get_file_size()
{
  uint32 res;
  translog_lock();
  res= log_descriptor.log_file_max_size;
  translog_unlock();
  return (res);
}


/**
  @brief Sets transaction log file size

  @return Returns actually set transaction log size
*/

unknown's avatar
unknown committed
7616
void translog_set_file_size(uint32 size)
7617
{
7618
  struct st_translog_buffer *old_buffer= NULL;
unknown's avatar
unknown committed
7619
  DBUG_ENTER("translog_set_file_size");
7620
  translog_lock();
unknown's avatar
unknown committed
7621 7622
  DBUG_PRINT("enter", ("Size: %lu", (ulong) size));
  DBUG_ASSERT(size % TRANSLOG_PAGE_SIZE == 0 &&
unknown's avatar
unknown committed
7623
              size >= TRANSLOG_MIN_FILE_SIZE);
unknown's avatar
unknown committed
7624
  log_descriptor.log_file_max_size= size;
7625 7626 7627
  /* if current file longer then finish it*/
  if (LSN_OFFSET(log_descriptor.horizon) >=  log_descriptor.log_file_max_size)
  {
7628
    old_buffer= log_descriptor.bc.buffer;
7629 7630 7631 7632
    translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc, 1);
    translog_buffer_unlock(old_buffer);
  }
  translog_unlock();
7633 7634 7635 7636 7637 7638
  if (old_buffer)
  {
    translog_buffer_lock(old_buffer);
    translog_buffer_flush(old_buffer);
    translog_buffer_unlock(old_buffer);
  }
unknown's avatar
unknown committed
7639
  DBUG_VOID_RETURN;
7640 7641
}