log.cc 85 KB
Newer Older
unknown's avatar
unknown committed
1
/* Copyright (C) 2000-2003 MySQL AB
unknown's avatar
unknown committed
2

unknown's avatar
unknown committed
3 4 5 6
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
unknown's avatar
unknown committed
7

unknown's avatar
unknown committed
8 9 10 11
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
unknown's avatar
unknown committed
12

unknown's avatar
unknown committed
13 14 15 16 17 18
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */


/* logging of commands */
19
/* TODO: Abort logging when we get an error in reading or writing log files */
unknown's avatar
unknown committed
20

21 22 23 24
#ifdef __EMX__
#include <io.h>
#endif

unknown's avatar
unknown committed
25
#include "mysql_priv.h"
unknown's avatar
unknown committed
26
#include "sql_repl.h"
unknown's avatar
unknown committed
27 28 29 30 31

#include <my_dir.h>
#include <stdarg.h>
#include <m_ctype.h>				// For test_if_number

unknown's avatar
unknown committed
32 33 34 35
#ifdef __NT__
#include "message.h"
#endif

36
MYSQL_LOG mysql_log, mysql_slow_log, mysql_bin_log;
37
ulong sync_binlog_counter= 0;
unknown's avatar
unknown committed
38 39 40

static bool test_if_number(const char *str,
			   long *res, bool allow_wildcards);
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
static int binlog_close_connection(THD *thd);
static int binlog_savepoint_set(THD *thd, void *sv);
static int binlog_savepoint_rollback(THD *thd, void *sv);
static int binlog_commit(THD *thd, bool all);
static int binlog_rollback(THD *thd, bool all);
static int binlog_prepare(THD *thd, bool all);

static handlerton binlog_hton = {
  0,
  sizeof(my_off_t),             /* savepoint size = binlog offset */
  binlog_close_connection,
  binlog_savepoint_set,
  binlog_savepoint_rollback,
  NULL,                         /* savepoint_release */
  binlog_commit,
  binlog_rollback,
  binlog_prepare,
  NULL,                         /* recover */
  NULL,                         /* commit_by_xid */
  NULL                          /* rollback_by_xid */
};

/*
  this function is mostly a placeholder.
  conceptually, binlog initialization (now mostly done in MYSQL_LOG::open)
  should be moved here.
*/

handlerton *binlog_init()
{
unknown's avatar
unknown committed
71
  return &binlog_hton;
72 73 74 75 76
}

static int binlog_close_connection(THD *thd)
{
  IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
unknown's avatar
unknown committed
77
  DBUG_ASSERT(mysql_bin_log.is_open() && !my_b_tell(trans_log));
78 79 80 81 82
  close_cached_file(trans_log);
  my_free((gptr)trans_log, MYF(0));
  return 0;
}

83
static int binlog_end_trans(THD *thd, IO_CACHE *trans_log, Log_event *end_ev)
84
{
85 86 87 88 89
  int error=0;
  DBUG_ENTER("binlog_end_trans");
  if (end_ev)
    error= mysql_bin_log.write(thd, trans_log, end_ev);

90 91 92 93 94 95 96 97
  statistic_increment(binlog_cache_use, &LOCK_status);
  if (trans_log->disk_writes != 0)
  {
    statistic_increment(binlog_cache_disk_use, &LOCK_status);
    trans_log->disk_writes= 0;
  }
  reinit_io_cache(trans_log, WRITE_CACHE, (my_off_t) 0, 0, 1); // cannot fail
  trans_log->end_of_file= max_binlog_cache_size;
98
  DBUG_RETURN(error);
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
}

static int binlog_prepare(THD *thd, bool all)
{
  /*
    do nothing.
    just pretend we can do 2pc, so that MySQL won't
    switch to 1pc.
    real work will be done in MYSQL_LOG::log()
  */
  return 0;
}

static int binlog_commit(THD *thd, bool all)
{
  IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
  DBUG_ENTER("binlog_commit");
  DBUG_ASSERT(mysql_bin_log.is_open() &&
unknown's avatar
unknown committed
117
     (all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))));
118 119 120 121 122 123

  if (!my_b_tell(trans_log))
  {
    // we're here because trans_log was flushed in MYSQL_LOG::log()
    DBUG_RETURN(0);
  }
124
  Query_log_event qev(thd, "COMMIT", 6, TRUE, FALSE);
125
  DBUG_RETURN(binlog_end_trans(thd, trans_log, &qev));
126 127 128 129 130 131 132 133
}

static int binlog_rollback(THD *thd, bool all)
{
  int error=0;
  IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
  DBUG_ENTER("binlog_rollback");
  /*
unknown's avatar
unknown committed
134 135
    First assert is guaranteed - see trans_register_ha() call below.
    The second must be true. If it is not, we're registering
136 137
    unnecessary, doing extra work. The cause should be found and eliminated
  */
unknown's avatar
unknown committed
138 139
  DBUG_ASSERT(all || !(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
  DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log));
140
  /*
141 142 143 144
    Update the binary log with a BEGIN/ROLLBACK block if we have
    cached some queries and we updated some non-transactional
    table. Such cases should be rare (updating a
    non-transactional table inside a transaction...)
145 146
  */
  if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE))
147 148
  {
    Query_log_event qev(thd, "ROLLBACK", 8, TRUE, FALSE);
149
    error= binlog_end_trans(thd, trans_log, &qev);
150
  }
151 152
  else
    error= binlog_end_trans(thd, trans_log, 0);
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
  DBUG_RETURN(error);
}

/*
  NOTE: how do we handle this (unlikely but legal) case:
  [transaction] + [update to non-trans table] + [rollback to savepoint] ?
  The problem occurs when a savepoint is before the update to the
  non-transactional table. Then when there's a rollback to the savepoint, if we
  simply truncate the binlog cache, we lose the part of the binlog cache where
  the update is. If we want to not lose it, we need to write the SAVEPOINT
  command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
  is easy: it's just write at the end of the binlog cache, but the former
  should be *inserted* to the place where the user called SAVEPOINT. The
  solution is that when the user calls SAVEPOINT, we write it to the binlog
  cache (so no need to later insert it). As transactions are never intermixed
  in the binary log (i.e. they are serialized), we won't have conflicts with
  savepoint names when using mysqlbinlog or in the slave SQL thread.
  Then when ROLLBACK TO SAVEPOINT is called, if we updated some
  non-transactional table, we don't truncate the binlog cache but instead write
  ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
  will chop the SAVEPOINT command from the binlog cache, which is good as in
  that case there is no need to have it in the binlog).
*/

static int binlog_savepoint_set(THD *thd, void *sv)
{
  IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
  DBUG_ENTER("binlog_savepoint_set");
  DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log));

  *(my_off_t *)sv= my_b_tell(trans_log);
  /* Write it to the binary log */
  Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE);
  DBUG_RETURN(mysql_bin_log.write(&qinfo));
}

static int binlog_savepoint_rollback(THD *thd, void *sv)
{
  IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
  DBUG_ENTER("binlog_savepoint_rollback");
  DBUG_ASSERT(mysql_bin_log.is_open() && my_b_tell(trans_log));

  /*
    Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
    non-transactional table. Otherwise, truncate the binlog cache starting
    from the SAVEPOINT command.
  */
  if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE))
  {
    Query_log_event qinfo(thd, thd->query, thd->query_length, TRUE, FALSE);
    DBUG_RETURN(mysql_bin_log.write(&qinfo));
  }
  reinit_io_cache(trans_log, WRITE_CACHE, *(my_off_t *)sv, 0, 0);
  DBUG_RETURN(0);
}

int check_binlog_magic(IO_CACHE* log, const char** errmsg)
{
  char magic[4];
  DBUG_ASSERT(my_b_tell(log) == 0);

  if (my_b_read(log, (byte*) magic, sizeof(magic)))
  {
    *errmsg = "I/O error reading the header from the binary log";
    sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
		    log->error);
    return 1;
  }
  if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
  {
    *errmsg = "Binlog has bad magic number;  It's not a binary log file that can be used by this version of MySQL";
    return 1;
  }
  return 0;
}

File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
{
  File file;
  DBUG_ENTER("open_binlog");

  if ((file = my_open(log_file_name, O_RDONLY | O_BINARY, MYF(MY_WME))) < 0)
  {
    sql_print_error("Failed to open log (file '%s', errno %d)",
                    log_file_name, my_errno);
    *errmsg = "Could not open log file";
    goto err;
  }
  if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
                    MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
  {
    sql_print_error("Failed to create a cache on log (file '%s')",
                    log_file_name);
    *errmsg = "Could not open log file";
    goto err;
  }
  if (check_binlog_magic(log,errmsg))
    goto err;
  DBUG_RETURN(file);

err:
  if (file >= 0)
  {
    my_close(file,MYF(0));
    end_io_cache(log);
  }
  DBUG_RETURN(-1);
}
unknown's avatar
unknown committed
261

unknown's avatar
unknown committed
262 263
#ifdef __NT__
static int eventSource = 0;
unknown's avatar
unknown committed
264

265
void setup_windows_event_source()
unknown's avatar
unknown committed
266
{
267
  HKEY    hRegKey= NULL;
unknown's avatar
unknown committed
268 269 270
  DWORD   dwError= 0;
  TCHAR   szPath[MAX_PATH];
  DWORD dwTypes;
271

unknown's avatar
unknown committed
272 273 274
  if (eventSource)               // Ensure that we are only called once
    return;
  eventSource= 1;
unknown's avatar
unknown committed
275

unknown's avatar
unknown committed
276
  // Create the event source registry key
277
  dwError= RegCreateKey(HKEY_LOCAL_MACHINE,
unknown's avatar
unknown committed
278 279
                          "SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\MySQL", 
                          &hRegKey);
unknown's avatar
unknown committed
280

unknown's avatar
unknown committed
281 282
  /* Name of the PE module that contains the message resource */
  GetModuleFileName(NULL, szPath, MAX_PATH);
unknown's avatar
unknown committed
283

unknown's avatar
unknown committed
284
  /* Register EventMessageFile */
285
  dwError = RegSetValueEx(hRegKey, "EventMessageFile", 0, REG_EXPAND_SZ,
unknown's avatar
unknown committed
286
                          (PBYTE) szPath, strlen(szPath)+1);
unknown's avatar
unknown committed
287

unknown's avatar
unknown committed
288 289 290 291 292
  /* Register supported event types */
  dwTypes= (EVENTLOG_ERROR_TYPE | EVENTLOG_WARNING_TYPE |
            EVENTLOG_INFORMATION_TYPE);
  dwError= RegSetValueEx(hRegKey, "TypesSupported", 0, REG_DWORD,
                         (LPBYTE) &dwTypes, sizeof dwTypes);
unknown's avatar
unknown committed
293

unknown's avatar
unknown committed
294
  RegCloseKey(hRegKey);
unknown's avatar
unknown committed
295 296
}

unknown's avatar
unknown committed
297
#endif /* __NT__ */
unknown's avatar
unknown committed
298 299


unknown's avatar
unknown committed
300 301 302 303 304 305 306 307
/****************************************************************************
** Find a uniq filename for 'filename.#'.
** Set # to a number as low as possible
** returns != 0 if not possible to get uniq filename
****************************************************************************/

static int find_uniq_filename(char *name)
{
308 309 310 311
  long                  number;
  uint                  i;
  char                  buff[FN_REFLEN];
  struct st_my_dir     *dir_info;
unknown's avatar
unknown committed
312
  reg1 struct fileinfo *file_info;
313 314
  ulong                 max_found=0;

unknown's avatar
unknown committed
315 316
  DBUG_ENTER("find_uniq_filename");

317 318 319 320
  uint  length = dirname_part(buff,name);
  char *start  = name + length;
  char *end    = strend(start);

unknown's avatar
unknown committed
321
  *end='.';
unknown's avatar
unknown committed
322
  length= (uint) (end-start+1);
unknown's avatar
unknown committed
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340

  if (!(dir_info = my_dir(buff,MYF(MY_DONT_SORT))))
  {						// This shouldn't happen
    strmov(end,".1");				// use name+1
    DBUG_RETURN(0);
  }
  file_info= dir_info->dir_entry;
  for (i=dir_info->number_off_files ; i-- ; file_info++)
  {
    if (bcmp(file_info->name,start,length) == 0 &&
	test_if_number(file_info->name+length, &number,0))
    {
      set_if_bigger(max_found,(ulong) number);
    }
  }
  my_dirend(dir_info);

  *end++='.';
341
  sprintf(end,"%06ld",max_found+1);
unknown's avatar
unknown committed
342 343 344
  DBUG_RETURN(0);
}

345

unknown's avatar
unknown committed
346
MYSQL_LOG::MYSQL_LOG()
347
  :bytes_written(0), last_time(0), query_start(0), name(0),
unknown's avatar
unknown committed
348
   file_id(1), open_count(1), log_type(LOG_CLOSED), write_error(0), inited(0),
349
   need_start_event(1), prepared_xids(0), description_event_for_exec(0),
350
   description_event_for_queue(0)
unknown's avatar
unknown committed
351 352
{
  /*
unknown's avatar
unknown committed
353 354
    We don't want to initialize LOCK_Log here as such initialization depends on
    safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
unknown's avatar
unknown committed
355
    called only in main(). Doing initialization here would make it happen
356
    before main().
unknown's avatar
unknown committed
357
  */
358 359
  index_file_name[0] = 0;
  bzero((char*) &log_file,sizeof(log_file));
360
  bzero((char*) &index_file, sizeof(index_file));
unknown's avatar
unknown committed
361 362
}

363

unknown's avatar
unknown committed
364
MYSQL_LOG::~MYSQL_LOG()
unknown's avatar
unknown committed
365 366 367 368
{
  cleanup();
}

unknown's avatar
unknown committed
369
/* this is called only once */
unknown's avatar
unknown committed
370

unknown's avatar
unknown committed
371
void MYSQL_LOG::cleanup()
unknown's avatar
unknown committed
372
{
373
  DBUG_ENTER("cleanup");
unknown's avatar
unknown committed
374
  if (inited)
375
  {
unknown's avatar
unknown committed
376
    inited= 0;
377
    close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
378 379
    delete description_event_for_queue;
    delete description_event_for_exec;
380 381
    (void) pthread_mutex_destroy(&LOCK_log);
    (void) pthread_mutex_destroy(&LOCK_index);
unknown's avatar
unknown committed
382
    (void) pthread_cond_destroy(&update_cond);
383
  }
384
  DBUG_VOID_RETURN;
unknown's avatar
unknown committed
385 386
}

387

unknown's avatar
unknown committed
388
int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name)
389
{
390 391
  fn_format(new_name,log_name,mysql_data_home,"",4);
  if (log_type != LOG_NORMAL)
unknown's avatar
unknown committed
392 393 394 395 396 397 398 399 400 401 402 403 404
  {
    if (!fn_ext(log_name)[0])
    {
      if (find_uniq_filename(new_name))
      {
	sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
	return 1;
      }
    }
  }
  return 0;
}

405

406
void MYSQL_LOG::init(enum_log_type log_type_arg,
407
		     enum cache_type io_cache_type_arg,
408 409
		     bool no_auto_events_arg,
                     ulong max_size_arg)
410
{
411
  DBUG_ENTER("MYSQL_LOG::init");
412
  log_type = log_type_arg;
413
  io_cache_type = io_cache_type_arg;
414
  no_auto_events = no_auto_events_arg;
415
  max_size=max_size_arg;
unknown's avatar
unknown committed
416
  DBUG_PRINT("info",("log_type: %d max_size: %lu", log_type, max_size));
417
  DBUG_VOID_RETURN;
418 419
}

unknown's avatar
unknown committed
420

unknown's avatar
unknown committed
421 422
void MYSQL_LOG::init_pthread_objects()
{
unknown's avatar
unknown committed
423 424
  DBUG_ASSERT(inited == 0);
  inited= 1;
unknown's avatar
unknown committed
425 426 427
  (void) pthread_mutex_init(&LOCK_log,MY_MUTEX_INIT_SLOW);
  (void) pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW);
  (void) pthread_cond_init(&update_cond, 0);
428 429
}

430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
const char *MYSQL_LOG::generate_name(const char *log_name,
                                     const char *suffix,
                                     bool strip_ext, char *buff)
{
  if (!log_name || !log_name[0])
  {
    /*
      TODO: The following should be using fn_format();  We just need to
      first change fn_format() to cut the file name if it's too long.
    */
    strmake(buff,glob_hostname,FN_REFLEN-5);
    strmov(fn_ext(buff),suffix);
    return (const char *)buff;
  }
  // get rid of extension if the log is binary to avoid problems
  if (strip_ext)
  {
    char *p = fn_ext(log_name);
    uint length=(uint) (p-log_name);
    strmake(buff,log_name,min(length,FN_REFLEN));
    return (const char*)buff;
  }
  return log_name;
}

bool MYSQL_LOG::open_index_file(const char *index_file_name_arg,
                                const char *log_name)
{
  File index_file_nr= -1;
  DBUG_ASSERT(!my_b_inited(&index_file));

  /*
    First open of this class instance
    Create an index file that will hold all file names uses for logging.
    Add new entries to the end of it.
  */
  myf opt= MY_UNPACK_FILENAME;
  if (!index_file_name_arg)
  {
    index_file_name_arg= log_name;    // Use same basename for index file
    opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
  }
  fn_format(index_file_name, index_file_name_arg, mysql_data_home,
            ".index", opt);
  if ((index_file_nr= my_open(index_file_name,
                              O_RDWR | O_CREAT | O_BINARY ,
                              MYF(MY_WME))) < 0 ||
       my_sync(index_file_nr, MYF(MY_WME)) ||
       init_io_cache(&index_file, index_file_nr,
                     IO_SIZE, WRITE_CACHE,
                     my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
unknown's avatar
unknown committed
481
			0, MYF(MY_WME | MY_WAIT_IF_FULL)))
482 483 484 485 486 487 488 489
  {
    if (index_file_nr >= 0)
      my_close(index_file_nr,MYF(0));
    return TRUE;
  }
  return FALSE;
}

unknown's avatar
unknown committed
490

491 492 493 494 495 496 497 498 499 500 501 502 503 504
/*
  Open a (new) log file.

  DESCRIPTION
  - If binary logs, also open the index file and register the new
    file name in it
  - When calling this when the file is in use, you must have a locks
    on LOCK_log and LOCK_index.

  RETURN VALUES
    0	ok
    1	error
*/

505 506 507 508 509
bool MYSQL_LOG::open(const char *log_name,
                     enum_log_type log_type_arg,
                     const char *new_name,
                     enum cache_type io_cache_type_arg,
                     bool no_auto_events_arg,
510 511
                     ulong max_size_arg,
                     bool null_created_arg)
unknown's avatar
unknown committed
512
{
513 514
  char buff[FN_REFLEN];
  File file= -1;
515
  int open_flags = O_CREAT | O_BINARY;
unknown's avatar
unknown committed
516
  DBUG_ENTER("MYSQL_LOG::open");
517
  DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
518 519 520

  last_time=query_start=0;
  write_error=0;
unknown's avatar
unknown committed
521

522
  init(log_type_arg,io_cache_type_arg,no_auto_events_arg,max_size_arg);
523

524
  if (!(name=my_strdup(log_name,MYF(MY_WME))))
525 526
  {
    name= (char *)log_name; // for the error message
527
    goto err;
528
  }
unknown's avatar
unknown committed
529 530 531
  if (new_name)
    strmov(log_file_name,new_name);
  else if (generate_new_name(log_file_name, name))
532
    goto err;
533

534
  if (io_cache_type == SEQ_READ_APPEND)
535
    open_flags |= O_RDWR | O_APPEND;
536
  else
537
    open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
538

unknown's avatar
unknown committed
539
  db[0]=0;
unknown's avatar
unknown committed
540
  open_count++;
541
  if ((file=my_open(log_file_name,open_flags,
542
		    MYF(MY_WME | ME_WAITTANG))) < 0 ||
543
      init_io_cache(&log_file, file, IO_SIZE, io_cache_type,
544 545 546
		    my_tell(file,MYF(MY_WME)), 0, 
                    MYF(MY_WME | MY_NABP |
                        ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
547
    goto err;
unknown's avatar
unknown committed
548

549 550
  switch (log_type) {
  case LOG_NORMAL:
unknown's avatar
unknown committed
551
  {
552
    char *end;
553
    int len=my_snprintf(buff, sizeof(buff), "%s, Version: %s. "
unknown's avatar
SCRUM  
unknown committed
554
#ifdef EMBEDDED_LIBRARY
555
		        "embedded library\n", my_progname, server_version
unknown's avatar
SCRUM  
unknown committed
556
#elif __NT__
557 558
			"started with:\nTCP Port: %d, Named Pipe: %s\n",
			my_progname, server_version, mysqld_port, mysqld_unix_port
unknown's avatar
unknown committed
559
#else
560 561
			"started with:\nTcp port: %d  Unix socket: %s\n",
			my_progname,server_version,mysqld_port,mysqld_unix_port
unknown's avatar
unknown committed
562
#endif
unknown's avatar
unknown committed
563
                       );
564 565
    end=strnmov(buff+len,"Time                 Id Command    Argument\n",
                sizeof(buff)-len);
unknown's avatar
unknown committed
566
    if (my_b_write(&log_file, (byte*) buff,(uint) (end-buff)) ||
567 568
	flush_io_cache(&log_file))
      goto err;
569
    break;
unknown's avatar
unknown committed
570
  }
571
  case LOG_NEW:
unknown's avatar
unknown committed
572
  {
573
    uint len;
unknown's avatar
unknown committed
574 575
    time_t skr=time(NULL);
    struct tm tm_tmp;
576

unknown's avatar
unknown committed
577
    localtime_r(&skr,&tm_tmp);
578 579 580 581 582 583 584 585 586 587
    len= my_snprintf(buff,sizeof(buff),
		     "# %s, Version: %s at %02d%02d%02d %2d:%02d:%02d\n",
		     my_progname,server_version,
		     tm_tmp.tm_year % 100,
		     tm_tmp.tm_mon+1,
		     tm_tmp.tm_mday,
		     tm_tmp.tm_hour,
		     tm_tmp.tm_min,
		     tm_tmp.tm_sec);
    if (my_b_write(&log_file, (byte*) buff, len) ||
588 589
	flush_io_cache(&log_file))
      goto err;
590
    break;
unknown's avatar
unknown committed
591
  }
592
  case LOG_BIN:
unknown's avatar
unknown committed
593
  {
594 595 596 597 598 599 600 601 602 603
    bool write_file_name_to_index_file=0;

    if (!my_b_filelength(&log_file))
    {
      /*
	The binary log file was empty (probably newly created)
	This is the normal case and happens when the user doesn't specify
	an extension for the binary log files.
	In this case we write a standard header to it.
      */
unknown's avatar
unknown committed
604 605
      if (my_b_safe_write(&log_file, (byte*) BINLOG_MAGIC,
			  BIN_LOG_HEADER_SIZE))
unknown's avatar
unknown committed
606
        goto err;
607 608
      bytes_written+= BIN_LOG_HEADER_SIZE;
      write_file_name_to_index_file= 1;
unknown's avatar
unknown committed
609
    }
unknown's avatar
unknown committed
610

611
    DBUG_ASSERT(my_b_inited(&index_file) != 0);
612 613
    reinit_io_cache(&index_file, WRITE_CACHE,
                    my_b_filelength(&index_file), 0, 0);
614
    if (need_start_event && !no_auto_events)
615
    {
616 617 618 619 620
      /*
        In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
        even if this is not the very first binlog.
      */
      Format_description_log_event s(BINLOG_VERSION);
621
      s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
      if (!s.is_valid())
        goto err;
      if (null_created_arg)
        s.created= 0;
      if (s.write(&log_file))
        goto err;
      bytes_written+= s.data_written;
    }
    if (description_event_for_queue &&
        description_event_for_queue->binlog_version>=4)
    {
      /*
        This is a relay log written to by the I/O slave thread.
        Write the event so that others can later know the format of this relay
        log.
        Note that this event is very close to the original event from the
        master (it has binlog version of the master, event types of the
        master), so this is suitable to parse the next relay log's event. It
        has been produced by
unknown's avatar
unknown committed
641
        Format_description_log_event::Format_description_log_event(char* buf,).
642 643 644 645 646 647 648 649 650 651 652 653 654 655
        Why don't we want to write the description_event_for_queue if this
        event is for format<4 (3.23 or 4.x): this is because in that case, the
        description_event_for_queue describes the data received from the
        master, but not the data written to the relay log (*conversion*),
        which is in format 4 (slave's).
      */
      /*
        Set 'created' to 0, so that in next relay logs this event does not
        trigger cleaning actions on the slave in
        Format_description_log_event::exec_event().
      */
      description_event_for_queue->created= 0;
      /* Don't set log_pos in event header */
      description_event_for_queue->artificial_event=1;
656

657 658 659
      if (description_event_for_queue->write(&log_file))
        goto err;
      bytes_written+= description_event_for_queue->data_written;
660
    }
unknown's avatar
unknown committed
661 662
    if (flush_io_cache(&log_file) ||
        my_sync(log_file.file, MYF(MY_WME)))
unknown's avatar
unknown committed
663
      goto err;
664 665 666

    if (write_file_name_to_index_file)
    {
unknown's avatar
unknown committed
667 668 669 670
      /*
        As this is a new log file, we write the file name to the index
        file. As every time we write to the index file, we sync it.
      */
671 672 673
      if (my_b_write(&index_file, (byte*) log_file_name,
		     strlen(log_file_name)) ||
	  my_b_write(&index_file, (byte*) "\n", 1) ||
unknown's avatar
unknown committed
674 675
	  flush_io_cache(&index_file) ||
          my_sync(index_file.file, MYF(MY_WME)))
676
	goto err;
unknown's avatar
unknown committed
677
    }
678
    break;
unknown's avatar
unknown committed
679
  }
680
  case LOG_CLOSED:				// Impossible
681
  case LOG_TO_BE_OPENED:
682 683 684 685
    DBUG_ASSERT(1);
    break;
  }
  DBUG_RETURN(0);
686 687

err:
688 689 690
  sql_print_error("Could not use %s for logging (error %d). \
Turning logging off for the whole duration of the MySQL server process. \
To turn it on again: fix the cause, \
691
shutdown the MySQL server and restart it.", name, errno);
692 693 694
  if (file >= 0)
    my_close(file,MYF(0));
  end_io_cache(&log_file);
695 696
  end_io_cache(&index_file);
  safeFree(name);
697
  log_type= LOG_CLOSED;
698
  DBUG_RETURN(1);
unknown's avatar
unknown committed
699 700
}

701

unknown's avatar
unknown committed
702 703 704
int MYSQL_LOG::get_current_log(LOG_INFO* linfo)
{
  pthread_mutex_lock(&LOCK_log);
705 706
  strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
  linfo->pos = my_b_tell(&log_file);
unknown's avatar
unknown committed
707 708 709 710
  pthread_mutex_unlock(&LOCK_log);
  return 0;
}

unknown's avatar
unknown committed
711

712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753
/*
  Move all data up in a file in an filename index file

  SYNOPSIS
    copy_up_file_and_fill()
    index_file			File to move
    offset			Move everything from here to beginning

  NOTE
    File will be truncated to be 'offset' shorter or filled up with
    newlines

  IMPLEMENTATION
    We do the copy outside of the IO_CACHE as the cache buffers would just
    make things slower and more complicated.
    In most cases the copy loop should only do one read.

  RETURN VALUES
    0	ok
*/

static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
{
  int bytes_read;
  my_off_t init_offset= offset;
  File file= index_file->file;
  byte io_buf[IO_SIZE*2];
  DBUG_ENTER("copy_up_file_and_fill");

  for (;; offset+= bytes_read)
  {
    (void) my_seek(file, offset, MY_SEEK_SET, MYF(0));
    if ((bytes_read= (int) my_read(file, io_buf, sizeof(io_buf), MYF(MY_WME)))
	< 0)
      goto err;
    if (!bytes_read)
      break;					// end of file
    (void) my_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
    if (my_write(file, (byte*) io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
      goto err;
  }
  /* The following will either truncate the file or fill the end with \n' */
unknown's avatar
unknown committed
754 755
  if (my_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
      my_sync(file, MYF(MY_WME)))
756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775
    goto err;

  /* Reset data in old index cache */
  reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
  DBUG_RETURN(0);

err:
  DBUG_RETURN(1);
}


/*
  Find the position in the log-index-file for the given log name

  SYNOPSIS
    find_log_pos()
    linfo		Store here the found log file name and position to
			the NEXT log file name in the index file.
    log_name		Filename to find in the index file.
			Is a null pointer if we want to read the first entry
unknown's avatar
unknown committed
776
    need_lock		Set this to 1 if the parent doesn't already have a
777 778 779
			lock on LOCK_index

  NOTE
unknown's avatar
unknown committed
780 781
    On systems without the truncate function the file will end with one or
    more empty lines.  These will be ignored when reading the file.
782 783 784 785 786 787 788 789 790

  RETURN VALUES
    0			ok
    LOG_INFO_EOF	End of log-index-file found
    LOG_INFO_IO		Got IO error while reading file
*/

int MYSQL_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
			    bool need_lock)
unknown's avatar
unknown committed
791
{
792 793 794 795 796
  int error= 0;
  char *fname= linfo->log_file_name;
  uint log_name_len= log_name ? (uint) strlen(log_name) : 0;
  DBUG_ENTER("find_log_pos");
  DBUG_PRINT("enter",("log_name: %s", log_name ? log_name : "NULL"));
unknown's avatar
unknown committed
797

unknown's avatar
unknown committed
798
  /*
799 800
    Mutex needed because we need to make sure the file pointer does not
    move from under our feet
unknown's avatar
unknown committed
801
  */
802
  if (need_lock)
803
    pthread_mutex_lock(&LOCK_index);
804 805 806 807 808
  safe_mutex_assert_owner(&LOCK_index);

  /* As the file is flushed, we can't get an error here */
  (void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);

unknown's avatar
unknown committed
809
  for (;;)
810
  {
811
    uint length;
812 813 814 815
    my_off_t offset= my_b_tell(&index_file);
    /* If we get 0 or 1 characters, this is the end of the file */

    if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
unknown's avatar
unknown committed
816
    {
817 818 819
      /* Did not find the given entry; Return not found or error */
      error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
      break;
unknown's avatar
unknown committed
820 821
    }

822 823
    // if the log entry matches, null string matching anything
    if (!log_name ||
unknown's avatar
unknown committed
824
	(log_name_len == length-1 && fname[log_name_len] == '\n' &&
825
	 !memcmp(fname, log_name, log_name_len)))
unknown's avatar
unknown committed
826
    {
827
      DBUG_PRINT("info",("Found log file entry"));
828
      fname[length-1]=0;			// remove last \n
829 830
      linfo->index_file_start_offset= offset;
      linfo->index_file_offset = my_b_tell(&index_file);
831
      break;
unknown's avatar
unknown committed
832
    }
833
  }
834

835
  if (need_lock)
836
    pthread_mutex_unlock(&LOCK_index);
837
  DBUG_RETURN(error);
unknown's avatar
unknown committed
838
}
unknown's avatar
unknown committed
839

840

841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
/*
  Find the position in the log-index-file for the given log name

  SYNOPSIS
    find_next_log()
    linfo		Store here the next log file name and position to
			the file name after that.
    need_lock		Set this to 1 if the parent doesn't already have a
			lock on LOCK_index

  NOTE
    - Before calling this function, one has to call find_log_pos()
      to set up 'linfo'
    - Mutex needed because we need to make sure the file pointer does not move
      from under our feet

  RETURN VALUES
    0			ok
    LOG_INFO_EOF	End of log-index-file found
    LOG_INFO_IO		Got IO error while reading file
*/

863
int MYSQL_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
unknown's avatar
unknown committed
864
{
865
  int error= 0;
866
  uint length;
867 868
  char *fname= linfo->log_file_name;

869 870
  if (need_lock)
    pthread_mutex_lock(&LOCK_index);
871 872 873 874 875 876 877 878
  safe_mutex_assert_owner(&LOCK_index);

  /* As the file is flushed, we can't get an error here */
  (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
			 0);

  linfo->index_file_start_offset= linfo->index_file_offset;
  if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
879
  {
880
    error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
881 882
    goto err;
  }
883
  fname[length-1]=0;				// kill \n
884
  linfo->index_file_offset = my_b_tell(&index_file);
885

unknown's avatar
unknown committed
886
err:
887 888
  if (need_lock)
    pthread_mutex_unlock(&LOCK_index);
unknown's avatar
unknown committed
889 890 891
  return error;
}

unknown's avatar
unknown committed
892

893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
/*
  Delete all logs refered to in the index file
  Start writing to a new log file.  The new index file will only contain
  this file.

  SYNOPSIS
     reset_logs()
     thd		Thread

  NOTE
    If not called from slave thread, write start event to new log


  RETURN VALUES
    0	ok
    1   error
*/

bool MYSQL_LOG::reset_logs(THD* thd)
912 913
{
  LOG_INFO linfo;
914
  bool error=0;
915 916
  const char* save_name;
  enum_log_type save_log_type;
917
  DBUG_ENTER("reset_logs");
918

919 920 921 922
  /*
    We need to get both locks to be sure that no one is trying to
    write to the index log file.
  */
923
  pthread_mutex_lock(&LOCK_log);
924
  pthread_mutex_lock(&LOCK_index);
925 926 927 928 929 930 931
  /*
    The following mutex is needed to ensure that no threads call
    'delete thd' as we would then risk missing a 'rollback' from this
    thread. If the transaction involved MyISAM tables, it should go
    into binlog even on rollback.
  */
  (void) pthread_mutex_lock(&LOCK_thread_count);
932 933 934 935 936

  /* Save variables so that we can reopen the log */
  save_name=name;
  name=0;					// Protect against free
  save_log_type=log_type;
937
  close(LOG_CLOSE_TO_BE_OPENED);
938 939 940 941

  /* First delete all old log files */

  if (find_log_pos(&linfo, NullS, 0))
942 943 944 945
  {
    error=1;
    goto err;
  }
946

unknown's avatar
unknown committed
947
  for (;;)
948 949
  {
    my_delete(linfo.log_file_name, MYF(MY_WME));
950
    if (find_next_log(&linfo, 0))
951 952
      break;
  }
953 954

  /* Start logging with a new file */
unknown's avatar
unknown committed
955
  close(LOG_CLOSE_INDEX);
956 957
  my_delete(index_file_name, MYF(MY_WME));	// Reset (open will update)
  if (!thd->slave_thread)
958
    need_start_event=1;
959 960
  open_index_file(index_file_name, 0);
  open(save_name, save_log_type, 0,
961
       io_cache_type, no_auto_events, max_size, 0);
962
  my_free((gptr) save_name, MYF(0));
963

964
err:
965
  (void) pthread_mutex_unlock(&LOCK_thread_count);
966
  pthread_mutex_unlock(&LOCK_index);
967
  pthread_mutex_unlock(&LOCK_log);
968
  DBUG_RETURN(error);
969 970
}

unknown's avatar
unknown committed
971

972
/*
973 974 975
  Delete relay log files prior to rli->group_relay_log_name
  (i.e. all logs which are not involved in a non-finished group
  (transaction)), remove them from the index file and start on next relay log.
976 977 978

  SYNOPSIS
    purge_first_log()
979 980 981 982 983
    rli		 Relay log information
    included     If false, all relay logs that are strictly before
                 rli->group_relay_log_name are deleted ; if true, the latter is
                 deleted too (i.e. all relay logs
                 read by the SQL slave thread are deleted).
984

985 986
  NOTE
    - This is only called from the slave-execute thread when it has read
987 988 989 990 991
      all commands from a relay log and want to switch to a new relay log.
    - When this happens, we can be in an active transaction as
      a transaction can span over two relay logs
      (although it is always written as a single block to the master's binary 
      log, hence cannot span over two master's binary logs).
992 993 994

  IMPLEMENTATION
    - Protects index file with LOCK_index
995 996
    - Delete relevant relay log files
    - Copy all file names after these ones to the front of the index file
997
    - If the OS has truncate, truncate the file, else fill it with \n'
998
    - Read the next file name from the index file and store in rli->linfo
999 1000

  RETURN VALUES
1001 1002 1003 1004
    0			ok
    LOG_INFO_EOF	End of log-index-file found
    LOG_INFO_SEEK	Could not allocate IO cache
    LOG_INFO_IO		Got IO error while reading file
1005 1006
*/

unknown's avatar
SCRUM  
unknown committed
1007
#ifdef HAVE_REPLICATION
1008

1009
int MYSQL_LOG::purge_first_log(struct st_relay_log_info* rli, bool included) 
1010
{
1011
  int error;
1012 1013
  DBUG_ENTER("purge_first_log");

1014 1015
  DBUG_ASSERT(is_open());
  DBUG_ASSERT(rli->slave_running == 1);
1016
  DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
1017

1018
  pthread_mutex_lock(&LOCK_index);
1019 1020 1021 1022 1023 1024
  pthread_mutex_lock(&rli->log_space_lock);
  rli->relay_log.purge_logs(rli->group_relay_log_name, included,
                            0, 0, &rli->log_space_total);
  // Tell the I/O thread to take the relay_log_space_limit into account
  rli->ignore_log_space_limit= 0;
  pthread_mutex_unlock(&rli->log_space_lock);
unknown's avatar
unknown committed
1025 1026

  /*
1027 1028 1029
    Ok to broadcast after the critical region as there is no risk of
    the mutex being destroyed by this thread later - this helps save
    context switches
unknown's avatar
unknown committed
1030
  */
1031 1032 1033 1034 1035
  pthread_cond_broadcast(&rli->log_space_cond);
  
  /*
    Read the next log file name from the index file and pass it back to
    the caller
1036 1037
    If included is true, we want the first relay log;
    otherwise we want the one after event_relay_log_name.
1038
  */
1039 1040 1041 1042
  if ((included && (error=find_log_pos(&rli->linfo, NullS, 0))) ||
      (!included &&
       ((error=find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)) ||
        (error=find_next_log(&rli->linfo, 0)))))
1043 1044
  {
    char buff[22];
1045 1046 1047 1048 1049
    sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
                    error,
                    llstr(rli->linfo.index_file_offset,buff),
                    rli->group_relay_log_name,
                    included);
1050 1051
    goto err;
  }
1052

1053
  /*
1054
    Reset rli's coordinates to the current log.
1055
  */
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069
  rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
  strmake(rli->event_relay_log_name,rli->linfo.log_file_name,
	  sizeof(rli->event_relay_log_name)-1);

  /*
    If we removed the rli->group_relay_log_name file,
    we must update the rli->group* coordinates, otherwise do not touch it as the
    group's execution is not finished (e.g. COMMIT not executed)
  */
  if (included)
  {
    rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
    strmake(rli->group_relay_log_name,rli->linfo.log_file_name,
            sizeof(rli->group_relay_log_name)-1);
1070
    rli->notify_group_relay_log_name_update();
1071
  }
1072 1073 1074

  /* Store where we are in the new file for the execution thread */
  flush_relay_log_info(rli);
unknown's avatar
unknown committed
1075

1076
err:
1077
  pthread_mutex_unlock(&LOCK_index);
1078
  DBUG_RETURN(error);
1079 1080
}

unknown's avatar
unknown committed
1081 1082 1083 1084
/*
  Update log index_file
*/

1085
int MYSQL_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
unknown's avatar
unknown committed
1086 1087 1088 1089 1090
{
  if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
    return LOG_INFO_IO;

  // now update offsets in index file for running threads
1091 1092
  if (need_update_threads)
    adjust_linfo_offsets(log_info->index_file_start_offset);
unknown's avatar
unknown committed
1093 1094
  return 0;
}
unknown's avatar
unknown committed
1095

1096 1097 1098 1099 1100
/*
  Remove all logs before the given log from disk and from the index file.

  SYNOPSIS
    purge_logs()
1101 1102 1103 1104 1105 1106 1107
    to_log	        Delete all log file name before this file. 
    included            If true, to_log is deleted too.
    need_mutex
    need_update_threads If we want to update the log coordinates of
                        all threads. False for relay logs, true otherwise.
    freed_log_space     If not null, decrement this variable of
                        the amount of log space freed
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117

  NOTES
    If any of the logs before the deleted one is in use,
    only purge logs up to this one.

  RETURN VALUES
    0				ok
    LOG_INFO_EOF		to_log not found
*/

1118 1119 1120 1121 1122
int MYSQL_LOG::purge_logs(const char *to_log, 
                          bool included,
                          bool need_mutex, 
                          bool need_update_threads, 
                          ulonglong *decrease_log_space)
unknown's avatar
unknown committed
1123 1124
{
  int error;
1125
  bool exit_loop= 0;
1126 1127
  LOG_INFO log_info;
  DBUG_ENTER("purge_logs");
1128
  DBUG_PRINT("info",("to_log= %s",to_log));
1129

1130 1131
  if (need_mutex)
    pthread_mutex_lock(&LOCK_index);
1132
  if ((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/)))
1133 1134
    goto err;

1135
  /*
1136
    File name exists in index file; delete until we find this file
1137 1138 1139
    or a file that is used.
  */
  if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
1140
    goto err;
1141 1142
  while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
         !log_in_use(log_info.log_file_name))
1143
  {
1144
    ulong file_size= 0;
1145 1146 1147
    if (decrease_log_space) //stat the file we want to delete
    {
      MY_STAT s;
1148 1149 1150 1151 1152 1153

      /* 
         If we could not stat, we can't know the amount
         of space that deletion will free. In most cases,
         deletion won't work either, so it's not a problem.
      */
1154
      if (my_stat(log_info.log_file_name,&s,MYF(0)))
1155
        file_size= s.st_size;
1156 1157 1158 1159 1160 1161 1162
    }
    /*
      It's not fatal if we can't delete a log file ;
      if we could delete it, take its size into account
    */
    DBUG_PRINT("info",("purging %s",log_info.log_file_name));
    if (!my_delete(log_info.log_file_name, MYF(0)) && decrease_log_space)
1163
      *decrease_log_space-= file_size;
1164
    if (find_next_log(&log_info, 0) || exit_loop)
1165
      break;
1166
  }
1167

unknown's avatar
unknown committed
1168 1169 1170 1171
  /*
    If we get killed -9 here, the sysadmin would have to edit
    the log index file after restart - otherwise, this should be safe
  */
1172
  error= update_log_index(&log_info, need_update_threads);
1173

unknown's avatar
unknown committed
1174
err:
1175 1176
  if (need_mutex)
    pthread_mutex_unlock(&LOCK_index);
1177
  DBUG_RETURN(error);
unknown's avatar
unknown committed
1178 1179
}

1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197
/*
  Remove all logs before the given file date from disk and from the
  index file.

  SYNOPSIS
    purge_logs_before_date()
    thd		Thread pointer
    before_date	Delete all log files before given date.

  NOTES
    If any of the logs before the deleted one is in use,
    only purge logs up to this one.

  RETURN VALUES
    0				ok
    LOG_INFO_PURGE_NO_ROTATE	Binary file that can't be rotated
*/

1198
int MYSQL_LOG::purge_logs_before_date(time_t purge_time)
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
{
  int error;
  LOG_INFO log_info;
  MY_STAT stat_area;

  DBUG_ENTER("purge_logs_before_date");

  pthread_mutex_lock(&LOCK_index);

  /*
    Delete until we find curren file
    or a file that is used or a file
    that is older than purge_time.
  */
  if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
unknown's avatar
unknown committed
1214
    goto err;
1215 1216 1217 1218 1219

  while (strcmp(log_file_name, log_info.log_file_name) &&
	 !log_in_use(log_info.log_file_name))
  {
    /* It's not fatal even if we can't delete a log file */
unknown's avatar
unknown committed
1220 1221
    if (!my_stat(log_info.log_file_name, &stat_area, MYF(0)) ||
	stat_area.st_mtime >= purge_time)
1222
      break;
unknown's avatar
unknown committed
1223
    my_delete(log_info.log_file_name, MYF(0));
1224 1225
    if (find_next_log(&log_info, 0))
      break;
unknown's avatar
unknown committed
1226
  }
1227

1228 1229 1230 1231
  /*
    If we get killed -9 here, the sysadmin would have to edit
    the log index file after restart - otherwise, this should be safe
  */
1232
  error= update_log_index(&log_info, 1);
1233

unknown's avatar
unknown committed
1234 1235
err:
  pthread_mutex_unlock(&LOCK_index);
1236
  DBUG_RETURN(error);
unknown's avatar
unknown committed
1237 1238
}

1239

unknown's avatar
SCRUM  
unknown committed
1240
#endif /* HAVE_REPLICATION */
1241

1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253

/*
  Create a new log file name

  SYNOPSIS
    make_log_name()
    buf			buf of at least FN_REFLEN where new name is stored

  NOTE
    If file name will be longer then FN_REFLEN it will be truncated
*/

unknown's avatar
unknown committed
1254 1255
void MYSQL_LOG::make_log_name(char* buf, const char* log_ident)
{
unknown's avatar
unknown committed
1256 1257 1258 1259 1260
  uint dir_len = dirname_length(log_file_name); 
  if (dir_len > FN_REFLEN)
    dir_len=FN_REFLEN-1;
  strnmov(buf, log_file_name, dir_len);
  strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len);
unknown's avatar
unknown committed
1261 1262
}

1263 1264 1265 1266 1267 1268

/*
  Check if we are writing/reading to the given log file
*/

bool MYSQL_LOG::is_active(const char *log_file_name_arg)
unknown's avatar
unknown committed
1269
{
unknown's avatar
unknown committed
1270
  return !strcmp(log_file_name, log_file_name_arg);
unknown's avatar
unknown committed
1271 1272
}

1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286

/*
  Start writing to a new log file or reopen the old file

  SYNOPSIS
    new_file()
    need_lock		Set to 1 (default) if caller has not locked
			LOCK_log and LOCK_index

  NOTE
    The new file name is stored last in the index file
*/

void MYSQL_LOG::new_file(bool need_lock)
unknown's avatar
unknown committed
1287
{
1288 1289 1290
  char new_name[FN_REFLEN], *new_name_ptr, *old_name;
  enum_log_type save_log_type;

1291
  DBUG_ENTER("MYSQL_LOG::new_file");
1292
  if (!is_open())
1293 1294 1295 1296
  {
    DBUG_PRINT("info",("log is closed"));
    DBUG_VOID_RETURN;
  }
1297 1298 1299

  if (need_lock)
    pthread_mutex_lock(&LOCK_log);
1300 1301
  pthread_mutex_lock(&LOCK_index);

1302 1303 1304
  safe_mutex_assert_owner(&LOCK_log);
  safe_mutex_assert_owner(&LOCK_index);

1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322
  /*
    if binlog is used as tc log, be sure all xids are "unlogged",
    so that on recover we only need to scan one - latest - binlog file
    for prepared xids. As this is expected to be a rare event,
    simple wait strategy is enough. We're locking LOCK_log to be sure no
    new Xid_log_event's are added to the log (and prepared_xids is not
    increased), and waiting on COND_prep_xids for late threads to
    catch up.
  */
  if (prepared_xids)
  {
    tc_log_page_waits++;
    pthread_mutex_lock(&LOCK_prep_xids);
    while (prepared_xids)
      pthread_cond_wait(&COND_prep_xids, &LOCK_prep_xids);
    pthread_mutex_unlock(&LOCK_prep_xids);
  }

unknown's avatar
unknown committed
1323
  /* Reuse old name if not binlog and not update log */
1324
  new_name_ptr= name;
unknown's avatar
unknown committed
1325

1326
  /*
1327 1328 1329
    If user hasn't specified an extension, generate a new log name
    We have to do this here and not in open as we want to store the
    new file name in the current binary log file.
1330
  */
1331 1332 1333
  if (generate_new_name(new_name, name))
    goto end;
  new_name_ptr=new_name;
1334

1335
  if (log_type == LOG_BIN)
1336
  {
1337
    if (!no_auto_events)
unknown's avatar
unknown committed
1338
    {
1339
      /*
1340 1341
        We log the whole file name for log file as the user may decide
        to change base names at some point.
1342
      */
unknown's avatar
unknown committed
1343
      THD *thd = current_thd; /* may be 0 if we are reacting to SIGHUP */
1344 1345
      Rotate_log_event r(thd,new_name+dirname_length(new_name));
      r.write(&log_file);
1346
      bytes_written += r.data_written;
unknown's avatar
unknown committed
1347
    }
1348 1349 1350 1351 1352
    /*
      Update needs to be signalled even if there is no rotate event
      log rotation should give the waiting thread a signal to
      discover EOF and move on to the next log.
    */
1353
    signal_update();
1354 1355 1356 1357
  }
  old_name=name;
  save_log_type=log_type;
  name=0;				// Don't free name
1358
  close(LOG_CLOSE_TO_BE_OPENED);
unknown's avatar
unknown committed
1359

1360
  /*
unknown's avatar
unknown committed
1361
     Note that at this point, log_type != LOG_CLOSED (important for is_open()).
unknown's avatar
unknown committed
1362
  */
unknown's avatar
unknown committed
1363

1364
  /*
1365
     new_file() is only used for rotation (in FLUSH LOGS or because size >
1366
     max_binlog_size or max_relay_log_size).
1367 1368 1369 1370
     If this is a binary log, the Format_description_log_event at the beginning of
     the new file should have created=0 (to distinguish with the
     Format_description_log_event written at server startup, which should
     trigger temp tables deletion on slaves.
1371
  */
1372

1373 1374
  open(old_name, save_log_type, new_name_ptr,
       io_cache_type, no_auto_events, max_size, 1);
1375 1376
  my_free(old_name,MYF(0));

1377
end:
1378 1379
  if (need_lock)
    pthread_mutex_unlock(&LOCK_log);
1380 1381
  pthread_mutex_unlock(&LOCK_index);

1382
  DBUG_VOID_RETURN;
unknown's avatar
unknown committed
1383 1384
}

1385

1386 1387 1388 1389
bool MYSQL_LOG::append(Log_event* ev)
{
  bool error = 0;
  pthread_mutex_lock(&LOCK_log);
1390 1391
  DBUG_ENTER("MYSQL_LOG::append");

1392
  DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
1393 1394 1395 1396
  /*
    Log_event::write() is smart enough to use my_b_write() or
    my_b_append() depending on the kind of cache we have.
  */
1397 1398 1399 1400 1401
  if (ev->write(&log_file))
  {
    error=1;
    goto err;
  }
1402
  bytes_written+= ev->data_written;
unknown's avatar
unknown committed
1403
  DBUG_PRINT("info",("max_size: %lu",max_size));
1404
  if ((uint) my_b_append_tell(&log_file) > max_size)
1405
    new_file(0);
1406

1407
err:
1408
  pthread_mutex_unlock(&LOCK_log);
1409
  signal_update();				// Safe as we don't call close
1410
  DBUG_RETURN(error);
1411 1412
}

1413

1414 1415
bool MYSQL_LOG::appendv(const char* buf, uint len,...)
{
1416
  bool error= 0;
1417
  DBUG_ENTER("MYSQL_LOG::appendv");
1418 1419
  va_list(args);
  va_start(args,len);
1420

1421
  DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
1422

1423 1424 1425
  pthread_mutex_lock(&LOCK_log);
  do
  {
unknown's avatar
unknown committed
1426
    if (my_b_append(&log_file,(byte*) buf,len))
1427
    {
1428 1429
      error= 1;
      goto err;
1430
    }
unknown's avatar
unknown committed
1431
    bytes_written += len;
1432
  } while ((buf=va_arg(args,const char*)) && (len=va_arg(args,uint)));
unknown's avatar
unknown committed
1433
  DBUG_PRINT("info",("max_size: %lu",max_size));
1434
  if ((uint) my_b_append_tell(&log_file) > max_size)
1435 1436 1437 1438
    new_file(0);

err:
  pthread_mutex_unlock(&LOCK_log);
1439 1440
  if (!error)
    signal_update();
1441
  DBUG_RETURN(error);
1442
}
unknown's avatar
unknown committed
1443

unknown's avatar
unknown committed
1444

1445 1446
/*
  Write to normal (not rotable) log
1447
  This is the format for the 'normal' log.
1448 1449
*/

1450
bool MYSQL_LOG::write(THD *thd,enum enum_server_command command,
unknown's avatar
unknown committed
1451 1452
		      const char *format,...)
{
1453
  if (is_open() && (what_to_log & (1L << (uint) command)))
unknown's avatar
unknown committed
1454
  {
1455 1456
    uint length;
    int error= 0;
unknown's avatar
unknown committed
1457
    VOID(pthread_mutex_lock(&LOCK_log));
1458

unknown's avatar
unknown committed
1459 1460
    /* Test if someone closed between the is_open test and lock */
    if (is_open())
unknown's avatar
unknown committed
1461 1462 1463
    {
      time_t skr;
      ulong id;
1464 1465 1466 1467
      va_list args;
      va_start(args,format);
      char buff[32];

unknown's avatar
unknown committed
1468 1469
      if (thd)
      {						// Normal thread
unknown's avatar
unknown committed
1470 1471 1472 1473 1474
	if ((thd->options & OPTION_LOG_OFF)
#ifndef NO_EMBEDDED_ACCESS_CHECKS
	    && (thd->master_access & SUPER_ACL)
#endif
)
unknown's avatar
unknown committed
1475 1476
	{
	  VOID(pthread_mutex_unlock(&LOCK_log));
1477
	  return 0;				// No logging
unknown's avatar
unknown committed
1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
	}
	id=thd->thread_id;
	if (thd->user_time || !(skr=thd->query_start()))
	  skr=time(NULL);			// Connected
      }
      else
      {						// Log from connect handler
	skr=time(NULL);
	id=0;
      }
      if (skr != last_time)
      {
	last_time=skr;
	struct tm tm_tmp;
	struct tm *start;
	localtime_r(&skr,&tm_tmp);
	start=&tm_tmp;
1495 1496 1497 1498 1499 1500 1501 1502
	/* Note that my_b_write() assumes it knows the length for this */
	sprintf(buff,"%02d%02d%02d %2d:%02d:%02d\t",
		start->tm_year % 100,
		start->tm_mon+1,
		start->tm_mday,
		start->tm_hour,
		start->tm_min,
		start->tm_sec);
unknown's avatar
unknown committed
1503
	if (my_b_write(&log_file, (byte*) buff,16))
unknown's avatar
unknown committed
1504 1505
	  error=errno;
      }
unknown's avatar
unknown committed
1506
      else if (my_b_write(&log_file, (byte*) "\t\t",2) < 0)
unknown's avatar
unknown committed
1507
	error=errno;
1508 1509 1510 1511
      length=my_sprintf(buff,
			(buff, "%7ld %-11.11s", id,
			 command_name[(uint) command]));
      if (my_b_write(&log_file, (byte*) buff,length))
unknown's avatar
unknown committed
1512 1513 1514
	error=errno;
      if (format)
      {
unknown's avatar
unknown committed
1515
	if (my_b_write(&log_file, (byte*) " ",1) ||
1516
	    my_b_vprintf(&log_file,format,args) == (uint) -1)
unknown's avatar
unknown committed
1517 1518
	  error=errno;
      }
unknown's avatar
unknown committed
1519
      if (my_b_write(&log_file, (byte*) "\n",1) ||
1520
	  flush_io_cache(&log_file))
unknown's avatar
unknown committed
1521 1522 1523 1524 1525 1526
	error=errno;
      if (error && ! write_error)
      {
	write_error=1;
	sql_print_error(ER(ER_ERROR_ON_WRITE),name,error);
      }
1527
      va_end(args);
unknown's avatar
unknown committed
1528 1529
    }
    VOID(pthread_mutex_unlock(&LOCK_log));
1530
    return error != 0;
unknown's avatar
unknown committed
1531
  }
1532
  return 0;
unknown's avatar
unknown committed
1533 1534 1535
}


1536 1537
inline bool sync_binlog(IO_CACHE *cache)
{
1538 1539 1540 1541 1542 1543
  if (sync_binlog_period == ++sync_binlog_counter && sync_binlog_period)
  {
    sync_binlog_counter= 0;
    return my_sync(cache->file, MYF(MY_WME));
  }
  return 0;
1544 1545
}

1546

1547 1548 1549 1550
/*
  Write an event to the binary log
*/

1551
bool MYSQL_LOG::write(Log_event *event_info)
unknown's avatar
unknown committed
1552
{
1553 1554 1555 1556
  THD *thd= event_info->thd;
  bool error= 1;
  DBUG_ENTER("MYSQL_LOG::write(Log_event *)");

1557 1558
  pthread_mutex_lock(&LOCK_log);

1559
  /*
unknown's avatar
unknown committed
1560 1561 1562 1563
     In most cases this is only called if 'is_open()' is true; in fact this is
     mostly called if is_open() *was* true a few instructions before, but it
     could have changed since.
  */
1564
  if (likely(is_open()))
unknown's avatar
unknown committed
1565
  {
unknown's avatar
unknown committed
1566 1567
    const char *local_db= event_info->get_db();
    IO_CACHE *file= &log_file;
unknown's avatar
SCRUM  
unknown committed
1568
#ifdef HAVE_REPLICATION
1569
    /*
unknown's avatar
unknown committed
1570 1571 1572 1573
       In the future we need to add to the following if tests like
       "do the involved tables match (to be implemented)
        binlog_[wild_]{do|ignore}_table?" (WL#1049)"
    */
1574
    if ((thd && !(thd->options & OPTION_BIN_LOG)) ||
1575
	(!db_ok(local_db, binlog_do_db, binlog_ignore_db)))
unknown's avatar
unknown committed
1576
    {
1577
      VOID(pthread_mutex_unlock(&LOCK_log));
1578
      DBUG_PRINT("error",("!db_ok('%s')", local_db));
1579
      DBUG_RETURN(0);
1580
    }
unknown's avatar
SCRUM  
unknown committed
1581
#endif /* HAVE_REPLICATION */
1582

1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602
#ifdef USING_TRANSACTIONS
    /*
      Should we write to the binlog cache or to the binlog on disk?
      Write to the binlog cache if:
      - it is already not empty (meaning we're in a transaction; note that the
     present event could be about a non-transactional table, but still we need
     to write to the binlog cache in that case to handle updates to mixed
     trans/non-trans table types the best possible in binlogging)
      - or if the event asks for it (cache_stmt == true).
    */
    if (opt_using_transactions && thd)
    {
      IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];

      if (event_info->get_cache_stmt())
      {
        if (!trans_log)
        {
          thd->ha_data[binlog_hton.slot]= trans_log= (IO_CACHE *)
            my_malloc(sizeof(IO_CACHE), MYF(MY_ZEROFILL));
1603 1604
          if (!trans_log || open_cached_file(trans_log, mysql_tmpdir,
                                             LOG_PREFIX,
1605 1606 1607 1608 1609 1610 1611
                                             binlog_cache_size, MYF(MY_WME)))
          {
            my_free((gptr)trans_log, MYF(MY_ALLOW_ZERO_PTR));
            thd->ha_data[binlog_hton.slot]= trans_log= 0;
            goto err;
          }
          trans_log->end_of_file= max_binlog_cache_size;
unknown's avatar
unknown committed
1612
          trans_register_ha(thd,
1613 1614 1615
                            thd->options & (OPTION_NOT_AUTOCOMMIT |
                                            OPTION_BEGIN),
                            &binlog_hton);
1616 1617
        }
        else if (!my_b_tell(trans_log))
unknown's avatar
unknown committed
1618
          trans_register_ha(thd,
1619 1620 1621
                            thd->options & (OPTION_NOT_AUTOCOMMIT |
                                            OPTION_BEGIN),
                            &binlog_hton);
1622 1623 1624 1625 1626 1627 1628 1629
        file= trans_log;
      }
      else if (trans_log && my_b_tell(trans_log))
        file= trans_log;
    }
#endif
    DBUG_PRINT("info",("event type=%d",event_info->get_type_code()));

unknown's avatar
unknown committed
1630 1631 1632 1633
    /*
      No check for auto events flag here - this write method should
      never be called if auto-events are enabled
    */
unknown's avatar
unknown committed
1634 1635

    /*
1636 1637
      1. Write first log events which describe the 'run environment'
      of the SQL command
unknown's avatar
unknown committed
1638 1639
    */

1640
    if (thd)
1641
    {
1642
#if MYSQL_VERSION_ID < 50003
1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660
      /*
        To make replication of charsets working in 4.1 we are writing values
        of charset related variables before every statement in the binlog,
        if values of those variables differ from global server-wide defaults.
        We are using SET ONE_SHOT command so that the charset vars get reset
        to default after the first non-SET statement.
        In the next 5.0 this won't be needed as we will use the new binlog
        format to store charset info.
      */
      if ((thd->variables.character_set_client->number !=
           global_system_variables.collation_server->number) ||
          (thd->variables.character_set_client->number !=
           thd->variables.collation_connection->number) ||
          (thd->variables.collation_server->number !=
           thd->variables.collation_connection->number))
      {
	char buf[200];
        int written= my_snprintf(buf, sizeof(buf)-1,
1661
                                 "SET ONE_SHOT CHARACTER_SET_CLIENT=%u,\
unknown's avatar
unknown committed
1662
COLLATION_CONNECTION=%u,COLLATION_DATABASE=%u,COLLATION_SERVER=%u",
1663 1664 1665 1666
                                 (uint) thd->variables.character_set_client->number,
                                 (uint) thd->variables.collation_connection->number,
                                 (uint) thd->variables.collation_database->number,
                                 (uint) thd->variables.collation_server->number);
1667
	Query_log_event e(thd, buf, written, 0, FALSE);
1668 1669 1670
	if (e.write(file))
	  goto err;
      }
1671
#endif
1672 1673 1674
      /*
        We use the same ONE_SHOT trick for making replication of time zones 
        working in 4.1. Again in 5.0 we have better means for doing this.
1675 1676 1677 1678 1679

        TODO: we should do like we now do with charsets (no more ONE_SHOT;
        logging in each event in a compact format). Dmitri says we can do:
        if (time_zone_used) write the timezone to binlog (in a format to be
        defined).
1680 1681 1682 1683 1684 1685 1686 1687
      */
      if (thd->time_zone_used &&
          thd->variables.time_zone != global_system_variables.time_zone)
      {
        char buf[MAX_TIME_ZONE_NAME_LENGTH + 26];
        char *buf_end= strxmov(buf, "SET ONE_SHOT TIME_ZONE='", 
                               thd->variables.time_zone->get_name()->ptr(),
                               "'", NullS);
1688
        Query_log_event e(thd, buf, buf_end - buf, 0, FALSE);
1689 1690 1691
        if (e.write(file))
          goto err;
      }
1692

1693 1694 1695
      if (thd->last_insert_id_used)
      {
	Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
1696
			   thd->current_insert_id);
1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711
	if (e.write(file))
	  goto err;
      }
      if (thd->insert_id_used)
      {
	Intvar_log_event e(thd,(uchar) INSERT_ID_EVENT,thd->last_insert_id);
	if (e.write(file))
	  goto err;
      }
      if (thd->rand_used)
      {
	Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
	if (e.write(file))
	  goto err;
      }
unknown's avatar
unknown committed
1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727
      if (thd->user_var_events.elements)
      {
	for (uint i= 0; i < thd->user_var_events.elements; i++)
	{
	  BINLOG_USER_VAR_EVENT *user_var_event;
	  get_dynamic(&thd->user_var_events,(gptr) &user_var_event, i);
          User_var_log_event e(thd, user_var_event->user_var_event->name.str,
                               user_var_event->user_var_event->name.length,
                               user_var_event->value,
                               user_var_event->length,
                               user_var_event->type,
			       user_var_event->charset_number);
	  if (e.write(file))
	    goto err;
	}
      }
1728
    }
unknown's avatar
unknown committed
1729

1730 1731 1732
    /* 
       Write the SQL command 
     */
unknown's avatar
unknown committed
1733

unknown's avatar
unknown committed
1734
    if (event_info->write(file))
1735
      goto err;
unknown's avatar
unknown committed
1736

1737
    if (file == &log_file) // we are writing to the real log (disk)
unknown's avatar
unknown committed
1738
    {
1739
      if (flush_io_cache(file) || sync_binlog(file))
unknown's avatar
unknown committed
1740
	goto err;
1741
    }
unknown's avatar
unknown committed
1742
    error=0;
1743

1744 1745 1746 1747
err:
    if (error)
    {
      if (my_errno == EFBIG)
unknown's avatar
unknown committed
1748
	my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0));
1749 1750 1751 1752 1753
      else
	my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
      write_error=1;
    }
    if (file == &log_file)
1754
    {
1755 1756
      signal_update();
      rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
1757
    }
1758
  }
1759 1760

  pthread_mutex_unlock(&LOCK_log);
unknown's avatar
unknown committed
1761

1762
  DBUG_RETURN(error);
1763 1764
}

1765 1766
void MYSQL_LOG::rotate_and_purge(uint flags)
{
1767 1768
  if ((flags & RP_FORCE_ROTATE) ||
      (my_b_tell(&log_file) >= (my_off_t) max_size))
1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781
  {
    new_file(!(flags & RP_LOCK_LOG_IS_ALREADY_LOCKED));
#ifdef HAVE_REPLICATION
    // QQ why do we need #ifdef here ???
    if (expire_logs_days)
    {
      long purge_time= time(0) - expire_logs_days*24*60*60;
      if (purge_time >= 0)
        purge_logs_before_date(purge_time);
    }
#endif
  }
}
1782

unknown's avatar
unknown committed
1783 1784 1785 1786 1787 1788 1789 1790 1791
uint MYSQL_LOG::next_file_id()
{
  uint res;
  pthread_mutex_lock(&LOCK_log);
  res = file_id++;
  pthread_mutex_unlock(&LOCK_log);
  return res;
}

1792

1793 1794
/*
  Write a cached log entry to the binary log
1795

unknown's avatar
unknown committed
1796 1797
  SYNOPSIS
    write()
1798
    thd
unknown's avatar
unknown committed
1799 1800
    cache		The cache to copy to the binlog

1801 1802
  NOTE
    - We only come here if there is something in the cache.
unknown's avatar
unknown committed
1803
    - The thing in the cache is always a complete transaction
1804 1805 1806 1807
    - 'cache' needs to be reinitialized after this functions returns.

  IMPLEMENTATION
    - To support transaction over replication, we wrap the transaction
unknown's avatar
unknown committed
1808
      with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
1809 1810 1811
      We want to write a BEGIN/ROLLBACK block when a non-transactional table
      was updated in a transaction which was rolled back. This is to ensure
      that the same updates are run on the slave.
1812 1813
*/

1814
bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
1815
{
1816
  DBUG_ENTER("MYSQL_LOG::write(THD *, IO_CACHE *, Log_event *)");
1817
  VOID(pthread_mutex_lock(&LOCK_log));
1818

1819
  if (likely(is_open()))                       // Should always be true
1820
  {
1821 1822
    uint length;

unknown's avatar
unknown committed
1823
    /*
1824 1825
      Log "BEGIN" at the beginning of the transaction.
      which may contain more than 1 SQL statement.
1826
    */
1827
    if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
1828
    {
1829
      Query_log_event qinfo(thd, "BEGIN", 5, TRUE, FALSE);
1830 1831 1832 1833 1834 1835 1836 1837
      /*
        Imagine this is rollback due to net timeout, after all statements of
        the transaction succeeded. Then we want a zero-error code in BEGIN.
        In other words, if there was a really serious error code it's already
        in the statement's events.
        This is safer than thd->clear_error() against kills at shutdown.
      */
      qinfo.error_code= 0;
1838 1839 1840
      /*
        Now this Query_log_event has artificial log_pos 0. It must be adjusted
        to reflect the real position in the log. Not doing it would confuse the
1841 1842 1843
	slave: it would prevent this one from knowing where he is in the
	master's binlog, which would result in wrong positions being shown to
	the user, MASTER_POS_WAIT undue waiting etc.
1844
      */
1845 1846 1847
      if (qinfo.write(&log_file))
	goto err;
    }
unknown's avatar
unknown committed
1848
    /* Read from the file used to cache the queries .*/
unknown's avatar
unknown committed
1849
    if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
1850
      goto err;
1851
    length=my_b_bytes_in_cache(cache);
1852
    DBUG_EXECUTE_IF("half_binlogged_transaction", length-=100;);
1853
    do
1854
    {
unknown's avatar
unknown committed
1855
      /* Write data to the binary log file */
unknown's avatar
unknown committed
1856
      if (my_b_write(&log_file, cache->read_pos, length))
1857
	goto err;
unknown's avatar
unknown committed
1858
      cache->read_pos=cache->read_end;		// Mark buffer used up
1859
      DBUG_EXECUTE_IF("half_binlogged_transaction", goto DBUG_skip_commit;);
1860
    } while ((length=my_b_fill(cache)));
1861

1862 1863 1864
    if (commit_event->write(&log_file))
      goto err;
DBUG_skip_commit:
1865
    if (flush_io_cache(&log_file) || sync_binlog(&log_file))
1866
      goto err;
1867
    DBUG_EXECUTE_IF("half_binlogged_transaction", abort(););
1868 1869
    if (cache->error)				// Error on read
    {
unknown's avatar
unknown committed
1870
      sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
1871
      write_error=1;				// Don't give more errors
1872
      goto err;
unknown's avatar
unknown committed
1873
    }
1874
    signal_update();
1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886
    /*
      if commit_event is Xid_log_event, increase the number of
      prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
      if there're prepared xids in it - see the comment in new_file() for
      an explanation.
      If the commit_event is not Xid_log_event (then it's a Query_log_event)
      rotate binlog, if necessary.
    */
    if (commit_event->get_type_code() == XID_EVENT)
      thread_safe_increment(prepared_xids, &LOCK_prep_xids);
    else
      rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
unknown's avatar
unknown committed
1887
  }
1888
  VOID(pthread_mutex_unlock(&LOCK_log));
unknown's avatar
unknown committed
1889

1890
  DBUG_RETURN(0);
1891 1892

err:
1893 1894 1895 1896 1897
  if (!write_error)
  {
    write_error= 1;
    sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
  }
1898
  VOID(pthread_mutex_unlock(&LOCK_log));
1899
  DBUG_RETURN(1);
unknown's avatar
unknown committed
1900 1901
}

1902

1903
/*
1904
  Write to the slow query log.
1905
*/
unknown's avatar
unknown committed
1906

1907
bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
1908
		      time_t query_start_arg)
unknown's avatar
unknown committed
1909
{
1910
  bool error=0;
unknown's avatar
unknown committed
1911
  time_t current_time;
unknown's avatar
unknown committed
1912 1913
  if (!is_open())
    return 0;
unknown's avatar
unknown committed
1914 1915
  DBUG_ENTER("MYSQL_LOG::write");

unknown's avatar
unknown committed
1916
  VOID(pthread_mutex_lock(&LOCK_log));
1917
  if (is_open())
unknown's avatar
unknown committed
1918 1919 1920 1921
  {						// Safety agains reopen
    int tmp_errno=0;
    char buff[80],*end;
    end=buff;
1922
    if (!(thd->options & OPTION_UPDATE_LOG))
unknown's avatar
unknown committed
1923 1924
    {
      VOID(pthread_mutex_unlock(&LOCK_log));
unknown's avatar
unknown committed
1925
      DBUG_RETURN(0);
unknown's avatar
unknown committed
1926
    }
1927
    if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT) || query_start_arg)
unknown's avatar
unknown committed
1928 1929 1930
    {
      current_time=time(NULL);
      if (current_time != last_time)
unknown's avatar
unknown committed
1931
      {
unknown's avatar
unknown committed
1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946
        last_time=current_time;
        struct tm tm_tmp;
        struct tm *start;
        localtime_r(&current_time,&tm_tmp);
        start=&tm_tmp;
        /* Note that my_b_write() assumes it knows the length for this */
        sprintf(buff,"# Time: %02d%02d%02d %2d:%02d:%02d\n",
                start->tm_year % 100,
                start->tm_mon+1,
                start->tm_mday,
                start->tm_hour,
                start->tm_min,
                start->tm_sec);
        if (my_b_write(&log_file, (byte*) buff,24))
          tmp_errno=errno;
unknown's avatar
unknown committed
1947
      }
unknown's avatar
unknown committed
1948
      if (my_b_printf(&log_file, "# User@Host: %s[%s] @ %s [%s]\n",
unknown's avatar
unknown committed
1949 1950
                      thd->priv_user ? thd->priv_user : "",
                      thd->user ? thd->user : "",
unknown's avatar
unknown committed
1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979
                      thd->host ? thd->host : "",
                      thd->ip ? thd->ip : "") == (uint) -1)
        tmp_errno=errno;
    }
    if (query_start_arg)
    {
      /* For slow query log */
      if (my_b_printf(&log_file,
                      "# Query_time: %lu  Lock_time: %lu  Rows_sent: %lu  Rows_examined: %lu\n",
                      (ulong) (current_time - query_start_arg),
                      (ulong) (thd->time_after_lock - query_start_arg),
                      (ulong) thd->sent_row_count,
                      (ulong) thd->examined_row_count) == (uint) -1)
        tmp_errno=errno;
    }
    if (thd->db && strcmp(thd->db,db))
    {						// Database changed
      if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1)
        tmp_errno=errno;
      strmov(db,thd->db);
    }
    if (thd->last_insert_id_used)
    {
      end=strmov(end,",last_insert_id=");
      end=longlong10_to_str((longlong) thd->current_insert_id,end,-10);
    }
    // Save value if we do an insert.
    if (thd->insert_id_used)
    {
1980
      if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
unknown's avatar
unknown committed
1981
      {
unknown's avatar
unknown committed
1982 1983
        end=strmov(end,",insert_id=");
        end=longlong10_to_str((longlong) thd->last_insert_id,end,-10);
unknown's avatar
unknown committed
1984
      }
unknown's avatar
unknown committed
1985 1986 1987 1988
    }
    if (thd->query_start_used)
    {
      if (query_start_arg != thd->query_start())
unknown's avatar
unknown committed
1989
      {
unknown's avatar
unknown committed
1990 1991 1992
        query_start_arg=thd->query_start();
        end=strmov(end,",timestamp=");
        end=int10_to_str((long) query_start_arg,end,10);
unknown's avatar
unknown committed
1993
      }
unknown's avatar
unknown committed
1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
    }
    if (end != buff)
    {
      *end++=';';
      *end='\n';
      if (my_b_write(&log_file, (byte*) "SET ",4) ||
          my_b_write(&log_file, (byte*) buff+1,(uint) (end-buff)))
        tmp_errno=errno;
    }
    if (!query)
    {
      end=strxmov(buff, "# administrator command: ",
                  command_name[thd->command], NullS);
      query_length=(ulong) (end-buff);
      query=buff;
    }
    if (my_b_write(&log_file, (byte*) query,query_length) ||
        my_b_write(&log_file, (byte*) ";\n",2) ||
        flush_io_cache(&log_file))
      tmp_errno=errno;
    if (tmp_errno)
    {
      error=1;
      if (! write_error)
unknown's avatar
unknown committed
2018
      {
unknown's avatar
unknown committed
2019 2020
        write_error=1;
        sql_print_error(ER(ER_ERROR_ON_WRITE),name,error);
unknown's avatar
unknown committed
2021 2022 2023
      }
    }
  }
unknown's avatar
unknown committed
2024
  VOID(pthread_mutex_unlock(&LOCK_log));
unknown's avatar
unknown committed
2025
  DBUG_RETURN(error);
unknown's avatar
unknown committed
2026 2027
}

unknown's avatar
unknown committed
2028

2029 2030 2031 2032 2033 2034
/*
  Wait until we get a signal that the binary log has been updated

  SYNOPSIS
    wait_for_update()
    thd			Thread variable
2035 2036 2037
    master_or_slave     If 0, the caller is the Binlog_dump thread from master;
                        if 1, the caller is the SQL thread from the slave. This
                        influences only thd->proc_info.
2038 2039 2040

  NOTES
    One must have a lock on LOCK_log before calling this function.
unknown's avatar
unknown committed
2041 2042
    This lock will be freed before return! That's required by
    THD::enter_cond() (see NOTES in sql_class.h).
2043 2044
*/

unknown's avatar
unknown committed
2045
void MYSQL_LOG::wait_for_update(THD* thd, bool master_or_slave)
2046
{
unknown's avatar
unknown committed
2047 2048 2049 2050 2051 2052 2053 2054
  const char *old_msg;
  DBUG_ENTER("wait_for_update");
  old_msg= thd->enter_cond(&update_cond, &LOCK_log,
                           master_or_slave ?
                           "Has read all relay log; waiting for the slave I/O "
                           "thread to update it" : 
                           "Has sent all binlog to slave; waiting for binlog "
                           "to be updated"); 
2055 2056
  pthread_cond_wait(&update_cond, &LOCK_log);
  thd->exit_cond(old_msg);
unknown's avatar
unknown committed
2057
  DBUG_VOID_RETURN;
2058
}
unknown's avatar
unknown committed
2059

unknown's avatar
unknown committed
2060

2061 2062 2063 2064 2065
/*
  Close the log file

  SYNOPSIS
    close()
2066 2067 2068 2069 2070
    exiting     Bitmask for one or more of the following bits:
                LOG_CLOSE_INDEX if we should close the index file
                LOG_CLOSE_TO_BE_OPENED if we intend to call open
                at once after close.
                LOG_CLOSE_STOP_EVENT write a 'stop' event to the log
unknown's avatar
unknown committed
2071 2072 2073 2074

  NOTES
    One can do an open on the object at once after doing a close.
    The internal structures are not freed until cleanup() is called
2075 2076
*/

2077
void MYSQL_LOG::close(uint exiting)
unknown's avatar
unknown committed
2078
{					// One can't set log_type here!
2079 2080
  DBUG_ENTER("MYSQL_LOG::close");
  DBUG_PRINT("enter",("exiting: %d", (int) exiting));
unknown's avatar
unknown committed
2081
  if (log_type != LOG_CLOSED && log_type != LOG_TO_BE_OPENED)
unknown's avatar
unknown committed
2082
  {
unknown's avatar
SCRUM  
unknown committed
2083
#ifdef HAVE_REPLICATION
2084 2085
    if (log_type == LOG_BIN && !no_auto_events &&
	(exiting & LOG_CLOSE_STOP_EVENT))
unknown's avatar
unknown committed
2086 2087
    {
      Stop_log_event s;
2088
      s.write(&log_file);
2089
      bytes_written+= s.data_written;
2090
      signal_update();
unknown's avatar
unknown committed
2091
    }
unknown's avatar
SCRUM  
unknown committed
2092
#endif /* HAVE_REPLICATION */
2093
    end_io_cache(&log_file);
2094 2095

    /* don't pwrite in a file opened with O_APPEND - it doesn't work */
2096
    if (log_file.type == WRITE_CACHE && log_type == LOG_BIN)
2097 2098
    {
      my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
unknown's avatar
unknown committed
2099
      byte flags=0; // clearing LOG_EVENT_BINLOG_IN_USE_F
unknown's avatar
unknown committed
2100
      my_pwrite(log_file.file, &flags, 1, offset, MYF(0));
2101 2102
    }

2103
    if (my_close(log_file.file,MYF(0)) < 0 && ! write_error)
unknown's avatar
unknown committed
2104 2105
    {
      write_error=1;
2106
      sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
unknown's avatar
unknown committed
2107 2108
    }
  }
2109 2110 2111 2112 2113 2114

  /*
    The following test is needed even if is_open() is not set, as we may have
    called a not complete close earlier and the index file is still open.
  */

2115
  if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
2116
  {
2117 2118
    end_io_cache(&index_file);
    if (my_close(index_file.file, MYF(0)) < 0 && ! write_error)
unknown's avatar
unknown committed
2119
    {
2120 2121
      write_error= 1;
      sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name, errno);
unknown's avatar
unknown committed
2122
    }
2123
  }
2124
  log_type= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
2125
  safeFree(name);
2126
  DBUG_VOID_RETURN;
unknown's avatar
unknown committed
2127 2128 2129
}


2130 2131 2132 2133 2134 2135 2136 2137 2138
void MYSQL_LOG::set_max_size(ulong max_size_arg)
{
  /*
    We need to take locks, otherwise this may happen:
    new_file() is called, calls open(old_max_size), then before open() starts,
    set_max_size() sets max_size to max_size_arg, then open() starts and
    uses the old_max_size argument, so max_size_arg has been overwritten and
    it's like if the SET command was never run.
  */
unknown's avatar
unknown committed
2139 2140
  DBUG_ENTER("MYSQL_LOG::set_max_size");
  pthread_mutex_lock(&LOCK_log);
2141 2142
  if (is_open())
    max_size= max_size_arg;
unknown's avatar
unknown committed
2143 2144
  pthread_mutex_unlock(&LOCK_log);
  DBUG_VOID_RETURN;
2145
}
unknown's avatar
unknown committed
2146

unknown's avatar
unknown committed
2147

2148 2149
/*
  Check if a string is a valid number
2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163

  SYNOPSIS
    test_if_number()
    str			String to test
    res			Store value here
    allow_wildcards	Set to 1 if we should ignore '%' and '_'

  NOTE
    For the moment the allow_wildcards argument is not used
    Should be move to some other file.

  RETURN VALUES
    1	String is a number
    0	Error
2164
*/
unknown's avatar
unknown committed
2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176

static bool test_if_number(register const char *str,
			   long *res, bool allow_wildcards)
{
  reg2 int flag;
  const char *start;
  DBUG_ENTER("test_if_number");

  flag=0; start=str;
  while (*str++ == ' ') ;
  if (*--str == '-' || *str == '+')
    str++;
2177 2178
  while (my_isdigit(files_charset_info,*str) ||
	 (allow_wildcards && (*str == wild_many || *str == wild_one)))
unknown's avatar
unknown committed
2179 2180 2181 2182 2183 2184 2185
  {
    flag=1;
    str++;
  }
  if (*str == '.')
  {
    for (str++ ;
2186
	 my_isdigit(files_charset_info,*str) ||
unknown's avatar
unknown committed
2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197
	   (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
	 str++, flag=1) ;
  }
  if (*str != 0 || flag == 0)
    DBUG_RETURN(0);
  if (res)
    *res=atol(start);
  DBUG_RETURN(1);			/* Number ok */
} /* test_if_number */


unknown's avatar
unknown committed
2198
void print_buffer_to_file(enum loglevel level, const char *buffer)
unknown's avatar
unknown committed
2199 2200 2201 2202
{
  time_t skr;
  struct tm tm_tmp;
  struct tm *start;
unknown's avatar
unknown committed
2203 2204
  DBUG_ENTER("print_buffer_to_file");
  DBUG_PRINT("enter",("buffer: %s", buffer));
unknown's avatar
unknown committed
2205 2206

  VOID(pthread_mutex_lock(&LOCK_error_log));
unknown's avatar
unknown committed
2207

unknown's avatar
unknown committed
2208
  skr=time(NULL);
2209
  localtime_r(&skr, &tm_tmp);
unknown's avatar
unknown committed
2210
  start=&tm_tmp;
2211
  fprintf(stderr, "%02d%02d%02d %2d:%02d:%02d [%s] %s\n",
2212 2213 2214 2215 2216 2217
          start->tm_year % 100,
          start->tm_mon+1,
          start->tm_mday,
          start->tm_hour,
          start->tm_min,
          start->tm_sec,
unknown's avatar
unknown committed
2218
          (level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
2219
           "Warning" : "Note"),
unknown's avatar
unknown committed
2220
          buffer);
unknown's avatar
unknown committed
2221

unknown's avatar
unknown committed
2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236
  fflush(stderr);

  VOID(pthread_mutex_unlock(&LOCK_error_log));
  DBUG_VOID_RETURN;
}


void sql_perror(const char *message)
{
#ifdef HAVE_STRERROR
  sql_print_error("%s: %s",message, strerror(errno));
#else
  perror(message);
#endif
}
2237

unknown's avatar
unknown committed
2238

unknown's avatar
unknown committed
2239 2240 2241
bool flush_error_log()
{
  bool result=0;
2242
  if (opt_error_log)
unknown's avatar
unknown committed
2243 2244 2245 2246
  {
    char err_renamed[FN_REFLEN], *end;
    end= strmake(err_renamed,log_error_file,FN_REFLEN-4);
    strmov(end, "-old");
unknown's avatar
unknown committed
2247
    VOID(pthread_mutex_lock(&LOCK_error_log));
unknown's avatar
unknown committed
2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267
#ifdef __WIN__
    char err_temp[FN_REFLEN+4];
    /*
     On Windows is necessary a temporary file for to rename
     the current error file.
    */
    strmov(strmov(err_temp, err_renamed),"-tmp");
    (void) my_delete(err_temp, MYF(0)); 
    if (freopen(err_temp,"a+",stdout))
    {
      freopen(err_temp,"a+",stderr);
      (void) my_delete(err_renamed, MYF(0));
      my_rename(log_error_file,err_renamed,MYF(0));
      if (freopen(log_error_file,"a+",stdout))
        freopen(log_error_file,"a+",stderr);
      int fd, bytes;
      char buf[IO_SIZE];
      if ((fd = my_open(err_temp, O_RDONLY, MYF(0))) >= 0)
      {
        while ((bytes = (int) my_read(fd, (byte*) buf, IO_SIZE, MYF(0))) > 0)
unknown's avatar
unknown committed
2268
             my_fwrite(stderr, (byte*) buf, bytes, MYF(0));
unknown's avatar
unknown committed
2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281
        my_close(fd, MYF(0));
      }
      (void) my_delete(err_temp, MYF(0)); 
    }
    else
     result= 1;
#else
   my_rename(log_error_file,err_renamed,MYF(0));
   if (freopen(log_error_file,"a+",stdout))
     freopen(log_error_file,"a+",stderr);
   else
     result= 1;
#endif
unknown's avatar
unknown committed
2282
    VOID(pthread_mutex_unlock(&LOCK_error_log));
unknown's avatar
unknown committed
2283 2284 2285
  }
   return result;
}
unknown's avatar
unknown committed
2286

unknown's avatar
unknown committed
2287 2288 2289 2290 2291 2292 2293 2294
void MYSQL_LOG::signal_update()
{
  DBUG_ENTER("MYSQL_LOG::signal_update");
  pthread_cond_broadcast(&update_cond);
  DBUG_VOID_RETURN;
}


2295
#ifdef __NT__
unknown's avatar
unknown committed
2296 2297
void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
                                 uint length, int buffLen)
2298 2299 2300 2301
{
  HANDLE event;
  char   *buffptr;
  LPCSTR *buffmsgptr;
unknown's avatar
unknown committed
2302
  DBUG_ENTER("print_buffer_to_nt_eventlog");
2303

unknown's avatar
unknown committed
2304
  buffptr= buff;
2305
  if (length > (uint)(buffLen-5))
2306
  {
2307
    char *newBuff= new char[length + 5];
unknown's avatar
unknown committed
2308
    strcpy(newBuff, buff);
unknown's avatar
unknown committed
2309
    buffptr= newBuff;
2310
  }
unknown's avatar
unknown committed
2311 2312
  strmov(buffptr+length, "\r\n\r\n");
  buffmsgptr= (LPCSTR*) &buffptr;               // Keep windows happy
2313

unknown's avatar
unknown committed
2314 2315
  setup_windows_event_source();
  if ((event= RegisterEventSource(NULL,"MySQL")))
2316
  {
unknown's avatar
unknown committed
2317
    switch (level) {
2318
      case ERROR_LEVEL:
unknown's avatar
unknown committed
2319 2320
        ReportEvent(event, EVENTLOG_ERROR_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
                    buffmsgptr, NULL);
2321 2322
        break;
      case WARNING_LEVEL:
unknown's avatar
unknown committed
2323 2324
        ReportEvent(event, EVENTLOG_WARNING_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
                    buffmsgptr, NULL);
2325 2326
        break;
      case INFORMATION_LEVEL:
unknown's avatar
unknown committed
2327 2328
        ReportEvent(event, EVENTLOG_INFORMATION_TYPE, 0, MSG_DEFAULT, NULL, 1,
                    0, buffmsgptr, NULL);
2329 2330 2331 2332 2333
        break;
    }
    DeregisterEventSource(event);
  }

unknown's avatar
unknown committed
2334
  /* if we created a string buffer, then delete it */
unknown's avatar
unknown committed
2335
  if (buffptr != buff)
2336 2337 2338 2339
    delete[] buffptr;

  DBUG_VOID_RETURN;
}
unknown's avatar
unknown committed
2340 2341
#endif /* __NT__ */

2342 2343

/*
unknown's avatar
unknown committed
2344 2345
  Prints a printf style message to the error log and, under NT, to the
  Windows event log.
2346 2347 2348

  SYNOPSIS
    vprint_msg_to_log()
unknown's avatar
unknown committed
2349 2350
    event_type             Type of event to write (Error, Warning, or Info)
    format                 Printf style format of message
unknown's avatar
unknown committed
2351
    args                   va_list list of arguments for the message
2352 2353 2354 2355

  NOTE

  IMPLEMENTATION
unknown's avatar
unknown committed
2356 2357
    This function prints the message into a buffer and then sends that buffer
    to other functions to write that message to other logging sources.
2358 2359 2360

  RETURN VALUES
    void
unknown's avatar
unknown committed
2361
*/
unknown's avatar
unknown committed
2362

2363
void vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
unknown's avatar
unknown committed
2364 2365
{
  char   buff[1024];
unknown's avatar
unknown committed
2366
  uint length;
2367
  DBUG_ENTER("vprint_msg_to_log");
unknown's avatar
unknown committed
2368

unknown's avatar
unknown committed
2369
  length= my_vsnprintf(buff, sizeof(buff)-5, format, args);
unknown's avatar
unknown committed
2370
  print_buffer_to_file(level, buff);
unknown's avatar
unknown committed
2371 2372

#ifdef __NT__
unknown's avatar
unknown committed
2373
  print_buffer_to_nt_eventlog(level, buff, length, sizeof(buff));
unknown's avatar
unknown committed
2374
#endif
2375

unknown's avatar
unknown committed
2376 2377 2378
  DBUG_VOID_RETURN;
}

2379

unknown's avatar
unknown committed
2380
void sql_print_error(const char *format, ...) 
unknown's avatar
unknown committed
2381 2382
{
  va_list args;
unknown's avatar
unknown committed
2383
  DBUG_ENTER("sql_print_error");
unknown's avatar
unknown committed
2384

unknown's avatar
unknown committed
2385 2386 2387
  va_start(args, format);
  vprint_msg_to_log(ERROR_LEVEL, format, args);
  va_end(args);
unknown's avatar
unknown committed
2388 2389 2390 2391 2392

  DBUG_VOID_RETURN;
}


unknown's avatar
unknown committed
2393
void sql_print_warning(const char *format, ...) 
unknown's avatar
unknown committed
2394 2395
{
  va_list args;
unknown's avatar
unknown committed
2396
  DBUG_ENTER("sql_print_warning");
unknown's avatar
unknown committed
2397

unknown's avatar
unknown committed
2398 2399 2400
  va_start(args, format);
  vprint_msg_to_log(WARNING_LEVEL, format, args);
  va_end(args);
unknown's avatar
unknown committed
2401 2402 2403 2404 2405

  DBUG_VOID_RETURN;
}


unknown's avatar
unknown committed
2406
void sql_print_information(const char *format, ...) 
unknown's avatar
unknown committed
2407 2408
{
  va_list args;
unknown's avatar
unknown committed
2409
  DBUG_ENTER("sql_print_information");
unknown's avatar
unknown committed
2410

unknown's avatar
unknown committed
2411 2412 2413
  va_start(args, format);
  vprint_msg_to_log(INFORMATION_LEVEL, format, args);
  va_end(args);
unknown's avatar
unknown committed
2414

unknown's avatar
unknown committed
2415 2416
  DBUG_VOID_RETURN;
}
2417

unknown's avatar
unknown committed
2418
#ifdef HAVE_MMAP
2419 2420 2421 2422 2423 2424 2425 2426
/********* transaction coordinator log for 2pc - mmap() based solution *******/

/*
  the log consists of a file, mmapped to a memory.
  file is divided on pages of tc_log_page_size size.
  (usable size of the first page is smaller because of log header)
  there's PAGE control structure for each page
  each page (or rather PAGE control structure) can be in one of three
unknown's avatar
unknown committed
2427 2428
  states - active, syncing, pool.
  there could be only one page in active or syncing states,
2429
  but many in pool - pool is fifo queue.
unknown's avatar
unknown committed
2430
  usual lifecycle of a page is pool->active->syncing->pool
2431
  "active" page - is a page where new xid's are logged.
unknown's avatar
unknown committed
2432 2433
  the page stays active as long as syncing slot is taken.
  "syncing" page is being synced to disk. no new xid can be added to it.
2434
  when the sync is done the page is moved to a pool and an active page
unknown's avatar
unknown committed
2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447
  becomes "syncing".

  the result of such an architecture is a natural "commit grouping" -
  If commits are coming faster than the system can sync, they do not
  stall. Instead, all commit that came since the last sync are
  logged to the same page, and they all are synced with the next -
  one - sync. Thus, thought individual commits are delayed, throughput
  is not decreasing.

  when a xid is added to an active page, the thread of this xid waits
  for a page's condition until the page is synced. when syncing slot
  becomes vacant one of these waiters is awaken to take care of syncing.
  it syncs the page and signals all waiters that the page is synced.
2448 2449 2450 2451
  PAGE::waiters is used to count these waiters, and a page may never
  become active again until waiters==0 (that is all waiters from the
  previous sync have noticed the sync was completed)

unknown's avatar
unknown committed
2452 2453
  note, that the page becomes "dirty" and has to be synced only when a
  new xid is added into it. Removing a xid from a page does not make it
2454 2455 2456 2457
  dirty - we don't sync removals to disk.
*/
#define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)

2458
static const char tc_log_magic[]={(char) 254, 0x23, 0x05, 0x74};
2459 2460

uint opt_tc_log_size=TC_LOG_MIN_SIZE;
2461 2462
ulong tc_log_max_pages_used=0, tc_log_page_size=0,
      tc_log_page_waits=0, tc_log_cur_pages_used=0;
2463 2464 2465 2466 2467 2468 2469

int TC_LOG_MMAP::open(const char *opt_name)
{
  uint i;
  bool crashed=FALSE;
  PAGE *pg;

2470
  DBUG_ASSERT(total_ha_2pc > 1);
2471 2472
  DBUG_ASSERT(opt_name && opt_name[0]);

unknown's avatar
unknown committed
2473
  tc_log_page_size= my_getpagesize();
2474 2475 2476
  DBUG_ASSERT(TC_LOG_PAGE_SIZE % tc_log_page_size == 0);

  fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
2477
  if ((fd= my_open(logname, O_RDWR, MYF(0))) < 0)
2478
  {
unknown's avatar
unknown committed
2479 2480
    if (my_errno != ENOENT)
      goto err;
2481 2482
    if (using_heuristic_recover())
      return 1;
2483
    if ((fd= my_create(logname, O_RDWR, 0, MYF(MY_WME))) < 0)
2484 2485
      goto err;
    inited=1;
unknown's avatar
unknown committed
2486
    file_length= opt_tc_log_size;
2487 2488 2489 2490 2491
    if (my_chsize(fd, file_length, 0, MYF(MY_WME)))
      goto err;
  }
  else
  {
unknown's avatar
unknown committed
2492 2493 2494
    inited= 1;
    crashed= TRUE;
    sql_print_information("Recovering after a crash");
2495 2496 2497 2498 2499 2500
    if (tc_heuristic_recover)
    {
      sql_print_error("Cannot perform automatic crash recovery when "
                      "--tc-heuristic-recover is used");
      goto err;
    }
unknown's avatar
unknown committed
2501
    file_length= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
2502 2503 2504 2505
    if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
      goto err;
  }

unknown's avatar
unknown committed
2506
  data= (uchar *)my_mmap(0, file_length, PROT_READ|PROT_WRITE,
2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532
                        MAP_NOSYNC|MAP_SHARED, fd, 0);
  if (data == MAP_FAILED)
  {
    my_errno=errno;
    goto err;
  }
  inited=2;

  npages=file_length/tc_log_page_size;
  DBUG_ASSERT(npages >= 3);             // to guarantee non-empty pool
  if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
    goto err;
  inited=3;
  for (pg=pages, i=0; i < npages; i++, pg++)
  {
    pg->next=pg+1;
    pg->waiters=0;
    pg->state=POOL;
    pthread_mutex_init(&pg->lock, MY_MUTEX_INIT_FAST);
    pthread_cond_init (&pg->cond, 0);
    pg->start=(my_xid *)(data + i*tc_log_page_size);
    pg->end=(my_xid *)(pg->start + tc_log_page_size);
    pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
  }
  pages[0].size=pages[0].free=
                (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
unknown's avatar
unknown committed
2533
  pages[0].start=pages[0].end-pages[0].size;
2534 2535 2536
  pages[npages-1].next=0;
  inited=4;

unknown's avatar
unknown committed
2537
  if (crashed && recover())
2538
      goto err;
unknown's avatar
unknown committed
2539

2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552
  memcpy(data, tc_log_magic, sizeof(tc_log_magic));
  data[sizeof(tc_log_magic)]= total_ha_2pc;
  my_msync(fd, data, tc_log_page_size, MS_SYNC);
  inited=5;

  pthread_mutex_init(&LOCK_sync,    MY_MUTEX_INIT_FAST);
  pthread_mutex_init(&LOCK_active,  MY_MUTEX_INIT_FAST);
  pthread_mutex_init(&LOCK_pool,    MY_MUTEX_INIT_FAST);
  pthread_cond_init(&COND_active, 0);
  pthread_cond_init(&COND_pool, 0);

  inited=6;

unknown's avatar
unknown committed
2553
  syncing= 0;
2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565
  active=pages;
  pool=pages+1;
  pool_last=pages+npages-1;

  return 0;

err:
  close();
  return 1;
}

/*
unknown's avatar
unknown committed
2566 2567
  there is no active page, let's got one from the pool

2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579
  two strategies here:
  1. take the first from the pool
  2. if there're waiters - take the one with the most free space

  TODO page merging. try to allocate adjacent page first,
  so that they can be flushed both in one sync
*/
void TC_LOG_MMAP::get_active_from_pool()
{
  PAGE **p, **best_p=0;
  int best_free;

unknown's avatar
unknown committed
2580 2581 2582
  if (syncing)
    pthread_mutex_lock(&LOCK_pool);

2583 2584 2585
  do
  {
    best_p= p= &pool;
unknown's avatar
unknown committed
2586 2587
    if ((*p)->waiters == 0) // can the first page be used ?
      break;                // yes - take it.
2588

unknown's avatar
unknown committed
2589
    best_free=0;            // no - trying second strategy
2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601
    for (p=&(*p)->next; *p; p=&(*p)->next)
    {
      if ((*p)->waiters == 0 && (*p)->free > best_free)
      {
        best_free=(*p)->free;
        best_p=p;
      }
    }
  }
  while ((*best_p == 0 || best_free == 0) && overflow());

  active=*best_p;
unknown's avatar
unknown committed
2602
  if (active->free == active->size) // we've chosen an empty page
2603 2604 2605 2606 2607
  {
    tc_log_cur_pages_used++;
    set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
  }

unknown's avatar
unknown committed
2608
  if ((*best_p)->next)              // unlink the page from the pool
2609 2610 2611
    *best_p=(*best_p)->next;
  else
    pool_last=*best_p;
unknown's avatar
unknown committed
2612 2613 2614

  if (syncing)
    pthread_mutex_unlock(&LOCK_pool);
2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625
}

int TC_LOG_MMAP::overflow()
{
  /*
    simple overflow handling - just wait
    TODO perhaps, increase log size ?
    let's check the behaviour of tc_log_page_waits first
  */
  tc_log_page_waits++;
  pthread_cond_wait(&COND_pool, &LOCK_pool);
unknown's avatar
unknown committed
2626
  return 1; // always return 1
2627 2628 2629
}

/*
unknown's avatar
unknown committed
2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641
  all access to active page is serialized but it's not a problem, as
  we're assuming that fsync() will be a main bottleneck.
  That is, parallelizing writes to log pages we'll decrease number of
  threads waiting for a page, but then all these threads will be waiting
  for a fsync() anyway

  RETURN
         0  - error
  otherwise - "cookie", a number that will be passed as an argument
              to unlog() call. tc_log can define it any way it wants,
              and use for whatever purposes. TC_LOG_MMAP sets it
              to the position in memory where xid was logged to.
2642 2643 2644 2645 2646 2647 2648 2649 2650 2651
*/

int TC_LOG_MMAP::log(THD *thd, my_xid xid)
{
  int err;
  PAGE *p;
  ulong cookie;

  pthread_mutex_lock(&LOCK_active);

unknown's avatar
unknown committed
2652 2653 2654 2655 2656 2657 2658
  /*
    if active page is full - just wait...
    frankly speaking, active->free here accessed outside of mutex
    protection, but it's safe, because it only means we may miss an
    unlog() for the active page, and we're not waiting for it here -
    unlog() does not signal COND_active.
  */
2659 2660 2661
  while (unlikely(active && active->free == 0))
    pthread_cond_wait(&COND_active, &LOCK_active);

unknown's avatar
unknown committed
2662
  /* no active page ? take one from the pool */
2663 2664 2665 2666 2667 2668
  if (active == 0)
    get_active_from_pool();

  p=active;
  pthread_mutex_lock(&p->lock);

unknown's avatar
unknown committed
2669
  /* searching for an empty slot */
2670 2671 2672 2673 2674 2675
  while (*p->ptr)
  {
    p->ptr++;
    DBUG_ASSERT(p->ptr < p->end);               // because p->free > 0
  }

unknown's avatar
unknown committed
2676 2677
  /* found! store xid there and mark the page dirty */
  cookie= (ulong)((uchar *)p->ptr - data);      // can never be zero
2678 2679 2680 2681 2682
  *p->ptr++= xid;
  p->free--;
  p->state= DIRTY;

  /* to sync or not to sync - this is the question */
unknown's avatar
unknown committed
2683
  pthread_mutex_unlock(&LOCK_active);
2684 2685 2686
  pthread_mutex_lock(&LOCK_sync);
  pthread_mutex_unlock(&p->lock);

unknown's avatar
unknown committed
2687
  if (syncing)
2688 2689
  {                                          // somebody's syncing. let's wait
    p->waiters++;
unknown's avatar
unknown committed
2690 2691 2692 2693 2694
    /*
      note - it must be while(), not do ... while() here
      as p->state may be not DIRTY when we come here
    */
    while (p->state == DIRTY && syncing)
2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705
      pthread_cond_wait(&p->cond, &LOCK_sync);
    p->waiters--;
    err= p->state == ERROR;
    if (p->state != DIRTY)                   // page was synced
    {
      if (p->waiters == 0)
        pthread_cond_signal(&COND_pool);     // in case somebody's waiting
      pthread_mutex_unlock(&LOCK_sync);
      goto done;                             // we're done
    }
  }                                          // page was not synced! do it now
unknown's avatar
unknown committed
2706 2707 2708 2709
  DBUG_ASSERT(active == p && syncing == 0);
  pthread_mutex_lock(&LOCK_active);
  syncing=p;                                 // place is vacant - take it
  active=0;                                  // page is not active anymore
2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722
  pthread_cond_broadcast(&COND_active);      // in case somebody's waiting
  pthread_mutex_unlock(&LOCK_active);
  pthread_mutex_unlock(&LOCK_sync);
  err= sync();

done:
  return err ? 0 : cookie;
}

int TC_LOG_MMAP::sync()
{
  int err;

unknown's avatar
unknown committed
2723
  DBUG_ASSERT(syncing != active);
2724 2725 2726 2727 2728

  /*
    sit down and relax - this can take a while...
    note - no locks are held at this point
  */
unknown's avatar
unknown committed
2729
  err= my_msync(fd, syncing->start, 1, MS_SYNC);
2730

unknown's avatar
unknown committed
2731
  /* page is synced. let's move it to the pool */
2732
  pthread_mutex_lock(&LOCK_pool);
unknown's avatar
unknown committed
2733 2734 2735 2736 2737
  pool_last->next=syncing;
  pool_last=syncing;
  syncing->next=0;
  syncing->state= err ? ERROR : POOL;
  pthread_cond_broadcast(&syncing->cond);    // signal "sync done"
2738 2739 2740
  pthread_cond_signal(&COND_pool);           // in case somebody's waiting
  pthread_mutex_unlock(&LOCK_pool);

unknown's avatar
unknown committed
2741
  /* marking 'syncing' slot free */
2742
  pthread_mutex_lock(&LOCK_sync);
unknown's avatar
unknown committed
2743
  syncing=0;
2744 2745 2746 2747 2748
  pthread_cond_signal(&active->cond);        // wake up a new syncer
  pthread_mutex_unlock(&LOCK_sync);
  return err;
}

unknown's avatar
unknown committed
2749 2750 2751 2752
/*
  erase xid from the page, update page free space counters/pointers.
  cookie points directly to the memory where xid was logged
*/
2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765
void TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
{
  PAGE *p=pages+(cookie/tc_log_page_size);
  my_xid *x=(my_xid *)(data+cookie);

  DBUG_ASSERT(*x == xid);
  DBUG_ASSERT(x >= p->start && x < p->end);
  *x=0;

  pthread_mutex_lock(&p->lock);
  p->free++;
  DBUG_ASSERT(p->free <= p->size);
  set_if_smaller(p->ptr, x);
unknown's avatar
unknown committed
2766
  if (p->free == p->size)               // the page is completely empty
2767 2768
    statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
  if (p->waiters == 0)                 // the page is in pool and ready to rock
unknown's avatar
unknown committed
2769
    pthread_cond_signal(&COND_pool);   // ping ... for overflow()
2770 2771 2772 2773 2774
  pthread_mutex_unlock(&p->lock);
}

void TC_LOG_MMAP::close()
{
unknown's avatar
unknown committed
2775
  uint i;
2776 2777 2778 2779 2780 2781 2782 2783 2784
  switch (inited) {
  case 6:
    pthread_mutex_destroy(&LOCK_sync);
    pthread_mutex_destroy(&LOCK_active);
    pthread_mutex_destroy(&LOCK_pool);
    pthread_cond_destroy(&COND_pool);
  case 5:
    data[0]='A'; // garble the first (signature) byte, in case my_delete fails
  case 4:
unknown's avatar
unknown committed
2785
    for (i=0; i < npages; i++)
2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820
    {
      if (pages[i].ptr == 0)
        break;
      pthread_mutex_destroy(&pages[i].lock);
      pthread_cond_destroy(&pages[i].cond);
    }
  case 3:
    my_free((gptr)pages, MYF(0));
  case 2:
    my_munmap(data, file_length);
  case 1:
    my_close(fd, MYF(0));
  }
  if (inited>=5) // cannot do in the switch because of Windows
    my_delete(logname, MYF(MY_WME));
  inited=0;
}

int TC_LOG_MMAP::recover()
{
  HASH xids;
  PAGE *p=pages, *end_p=pages+npages;

  if (memcmp(data, tc_log_magic, sizeof(tc_log_magic)))
  {
    sql_print_error("Bad magic header in tc log");
    goto err1;
  }

  /*
    the first byte after magic signature is set to current
    number of storage engines on startup
  */
  if (data[sizeof(tc_log_magic)] != total_ha_2pc)
  {
2821
    sql_print_error("Recovery failed! You must enable "
unknown's avatar
unknown committed
2822 2823 2824
                    "exactly %d storage engines that support "
                    "two-phase commit protocol",
                    data[sizeof(tc_log_magic)]);
2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854
    goto err1;
  }

  if (hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
            sizeof(my_xid), 0, 0, MYF(0)))
    goto err1;

  for ( ; p < end_p ; p++)
  {
    for (my_xid *x=p->start; x < p->end; x++)
      if (*x && my_hash_insert(&xids, (byte *)x))
        goto err2; // OOM
  }

  if (ha_recover(&xids))
    goto err2;

  hash_free(&xids);
  bzero(data, file_length);
  return 0;

err2:
  hash_free(&xids);
err1:
  sql_print_error("Crash recovery failed. Either correct the problem "
                  "(if it's, for example, out of memory error) and restart, "
                  "or delete tc log and start mysqld with "
                  "--tc-heuristic-recover={commit|rollback}");
  return 1;
}
unknown's avatar
unknown committed
2855 2856 2857 2858 2859
#endif

TC_LOG *tc_log;
TC_LOG_DUMMY tc_log_dummy;
TC_LOG_MMAP  tc_log_mmap;
2860

2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872
/*
  Perform heuristic recovery, if --tc-heuristic-recover was used

  RETURN VALUE
    0	no heuristic recovery was requested
    1   heuristic recovery was performed

  NOTE
    no matter whether heuristic recovery was successful or not
    mysqld must exit. So, return value is the same in both cases.
*/

2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885
int TC_LOG::using_heuristic_recover()
{
  if (!tc_heuristic_recover)
    return 0;

  sql_print_information("Heuristic crash recovery mode");
  if (ha_recover(0))
    sql_print_error("Heuristic crash recovery failed");
  sql_print_information("Please restart mysqld without --tc-heuristic-recover");
  return 1;
}

/****** transaction coordinator log for 2pc - binlog() based solution ******/
unknown's avatar
unknown committed
2886
#define TC_LOG_BINLOG MYSQL_LOG
2887 2888 2889 2890 2891 2892 2893 2894

/*
  TODO keep in-memory list of prepared transactions
  (add to list in log(), remove on unlog())
  and copy it to the new binlog if rotated
  but let's check the behaviour of tc_log_page_waits first!
*/

unknown's avatar
unknown committed
2895
int TC_LOG_BINLOG::open(const char *opt_name)
2896
{
2897 2898
  LOG_INFO log_info;
  int      error= 1;
2899 2900

  DBUG_ASSERT(total_ha_2pc > 1);
2901
  DBUG_ASSERT(opt_name && opt_name[0]);
2902 2903 2904 2905 2906

  pthread_mutex_init(&LOCK_prep_xids, MY_MUTEX_INIT_FAST);
  pthread_cond_init (&COND_prep_xids, 0);

  if (using_heuristic_recover())
2907 2908 2909 2910
  {
    /* generate a new binlog to mask a corrupted one */
    open(opt_name, LOG_BIN, 0, WRITE_CACHE, 0, max_binlog_size, 0);
    cleanup();
2911
    return 1;
2912
  }
2913

2914
  if ((error= find_log_pos(&log_info, NullS, 1)))
unknown's avatar
unknown committed
2915
  {
2916 2917 2918 2919 2920
    if (error != LOG_INFO_EOF)
      sql_print_error("find_log_pos() failed (error: %d)", error);
    else
      error= 0;
    goto err;
unknown's avatar
unknown committed
2921
  }
2922 2923 2924 2925 2926

  {
    const char *errmsg;
    IO_CACHE    log;
    File        file;
unknown's avatar
unknown committed
2927 2928
    Log_event  *ev=0;
    Format_description_log_event fdle(BINLOG_VERSION);
2929
    char        log_name[FN_REFLEN];
2930 2931 2932 2933

    if (! fdle.is_valid())
      goto err;

2934
    do
2935
    {
2936 2937 2938 2939 2940 2941 2942
      strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
    } while (!(error= find_next_log(&log_info, 1)));

    if (error !=  LOG_INFO_EOF)
    {
      sql_print_error("find_log_pos() failed (error: %d)", error);
      goto err;
2943
    }
2944

2945
    if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
2946 2947 2948 2949 2950
    {
      sql_print_error("%s", errmsg);
      goto err;
    }

2951 2952 2953
    if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
        ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
        ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
2954 2955 2956
      error= recover(&log, (Format_description_log_event *)ev);
    else
      error=0;
2957 2958 2959 2960 2961 2962 2963 2964 2965 2966

    delete ev;
    end_io_cache(&log);
    my_close(file, MYF(MY_WME));

    if (error)
      goto err;
  }

err:
2967
  return error;
2968 2969
}

unknown's avatar
unknown committed
2970 2971
/* this is called on shutdown, after ha_panic */
void TC_LOG_BINLOG::close()
2972 2973 2974 2975 2976 2977
{
  DBUG_ASSERT(prepared_xids==0);
  pthread_mutex_destroy(&LOCK_prep_xids);
  pthread_cond_destroy (&COND_prep_xids);
}

unknown's avatar
unknown committed
2978 2979 2980 2981 2982 2983 2984 2985
/*
  TODO group commit

  RETURN
         0  - error
         1  - success
*/
int TC_LOG_BINLOG::log(THD *thd, my_xid xid)
2986 2987
{
  Xid_log_event xle(thd, xid);
2988
  IO_CACHE *trans_log= (IO_CACHE*)thd->ha_data[binlog_hton.slot];
2989
  return !binlog_end_trans(thd, trans_log, &xle);  // invert return value
2990 2991
}

unknown's avatar
unknown committed
2992
void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
2993 2994 2995
{
  if (thread_safe_dec_and_test(prepared_xids, &LOCK_prep_xids))
    pthread_cond_signal(&COND_prep_xids);
2996
  rotate_and_purge(0);     // as ::write() did not rotate
2997 2998
}

unknown's avatar
unknown committed
2999
int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
3000 3001 3002 3003 3004 3005
{
  Log_event  *ev;
  HASH xids;
  MEM_ROOT mem_root;

  if (! fdle->is_valid() ||
3006
      hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
3007 3008 3009
            sizeof(my_xid), 0, 0, MYF(0)))
    goto err1;

3010
  init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
3011

3012 3013
  fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error

3014 3015 3016 3017 3018
  while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
  {
    if (ev->get_type_code() == XID_EVENT)
    {
      Xid_log_event *xev=(Xid_log_event *)ev;
unknown's avatar
unknown committed
3019 3020
      byte *x=(byte *)memdup_root(&mem_root, (char *)& xev->xid,
                                  sizeof(xev->xid));
3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045
      if (! x)
        goto err2;
      my_hash_insert(&xids, x);
    }
    delete ev;
  }

  if (ha_recover(&xids))
    goto err2;

  free_root(&mem_root, MYF(0));
  hash_free(&xids);
  return 0;

err2:
  free_root(&mem_root, MYF(0));
  hash_free(&xids);
err1:
  sql_print_error("Crash recovery failed. Either correct the problem "
                  "(if it's, for example, out of memory error) and restart, "
                  "or delete (or rename) binary log and start mysqld with "
                  "--tc-heuristic-recover={commit|rollback}");
  return 1;
}