Commit 338dde5c authored by unknown's avatar unknown

Forward port of HugeTLB, InnoDB doublewrite and checksums patches to 5.0


BitKeeper/etc/logging_ok:
  Logging to logging@openlogging.org accepted
parent 167a5348
...@@ -96,6 +96,7 @@ joerg@mysql.com ...@@ -96,6 +96,7 @@ joerg@mysql.com
joreland@mysql.com joreland@mysql.com
jorge@linux.jorge.mysql.com jorge@linux.jorge.mysql.com
jplindst@t41.(none) jplindst@t41.(none)
kaa@polly.local
kaj@work.mysql.com kaj@work.mysql.com
kent@mysql.com kent@mysql.com
konstantin@mysql.com konstantin@mysql.com
......
...@@ -748,7 +748,7 @@ AC_CHECK_HEADERS(fcntl.h float.h floatingpoint.h ieeefp.h limits.h \ ...@@ -748,7 +748,7 @@ AC_CHECK_HEADERS(fcntl.h float.h floatingpoint.h ieeefp.h limits.h \
strings.h string.h synch.h sys/mman.h sys/socket.h netinet/in.h arpa/inet.h \ strings.h string.h synch.h sys/mman.h sys/socket.h netinet/in.h arpa/inet.h \
sys/timeb.h sys/types.h sys/un.h sys/vadvise.h sys/wait.h term.h \ sys/timeb.h sys/types.h sys/un.h sys/vadvise.h sys/wait.h term.h \
unistd.h utime.h sys/utime.h termio.h termios.h sched.h crypt.h alloca.h \ unistd.h utime.h sys/utime.h termio.h termios.h sched.h crypt.h alloca.h \
sys/ioctl.h malloc.h sys/malloc.h linux/config.h) sys/ioctl.h malloc.h sys/malloc.h sys/ipc.h sys/shm.h linux/config.h)
#-------------------------------------------------------------------- #--------------------------------------------------------------------
# Check for system libraries. Adds the library to $LIBS # Check for system libraries. Adds the library to $LIBS
...@@ -775,6 +775,22 @@ AC_CHECK_FUNC(crypt, AC_DEFINE([HAVE_CRYPT], [1], [crypt])) ...@@ -775,6 +775,22 @@ AC_CHECK_FUNC(crypt, AC_DEFINE([HAVE_CRYPT], [1], [crypt]))
AC_CHECK_FUNC(sem_init, , AC_CHECK_LIB(posix4, sem_init)) AC_CHECK_FUNC(sem_init, , AC_CHECK_LIB(posix4, sem_init))
MYSQL_CHECK_ZLIB_WITH_COMPRESS MYSQL_CHECK_ZLIB_WITH_COMPRESS
# For large pages support
if test "$IS_LINUX" = "true"
then
# For SHM_HUGETLB on Linux
AC_CHECK_DECLS(SHM_HUGETLB,
AC_DEFINE([HAVE_LARGE_PAGES], [1],
[Define if you have large pages support])
AC_DEFINE([HUGETLB_USE_PROC_MEMINFO], [1],
[Define if /proc/meminfo shows the huge page size (Linux only)])
, ,
[
#include <sys/shm.h>
]
)
fi
#-------------------------------------------------------------------- #--------------------------------------------------------------------
# Check for TCP wrapper support # Check for TCP wrapper support
#-------------------------------------------------------------------- #--------------------------------------------------------------------
......
...@@ -168,6 +168,16 @@ extern char *my_strdup_with_length(const byte *from, uint length, ...@@ -168,6 +168,16 @@ extern char *my_strdup_with_length(const byte *from, uint length,
#define TRASH(A,B) /* nothing */ #define TRASH(A,B) /* nothing */
#endif #endif
#ifdef HAVE_LARGE_PAGES
extern uint my_get_large_page_size(void);
extern gptr my_large_malloc(uint size, myf my_flags);
extern void my_large_free(gptr ptr, myf my_flags);
#else
#define my_get_large_page_size() (0)
#define my_large_malloc(A,B) my_malloc_lock((A),(B))
#define my_large_free(A,B) my_free_lock((A),(B))
#endif /* HAVE_LARGE_PAGES */
#ifdef HAVE_ALLOCA #ifdef HAVE_ALLOCA
#if defined(_AIX) && !defined(__GNUC__) && !defined(_AIX43) #if defined(_AIX) && !defined(__GNUC__) && !defined(_AIX43)
#pragma alloca #pragma alloca
...@@ -213,6 +223,11 @@ extern int (*fatal_error_handler_hook)(uint my_err, const char *str, ...@@ -213,6 +223,11 @@ extern int (*fatal_error_handler_hook)(uint my_err, const char *str,
myf MyFlags); myf MyFlags);
extern uint my_file_limit; extern uint my_file_limit;
#ifdef HAVE_LARGE_PAGES
extern my_bool my_use_large_pages;
extern uint my_large_page_size;
#endif
/* charsets */ /* charsets */
extern CHARSET_INFO *default_charset_info; extern CHARSET_INFO *default_charset_info;
extern CHARSET_INFO *all_charsets[256]; extern CHARSET_INFO *all_charsets[256];
......
...@@ -331,33 +331,43 @@ buf_page_is_corrupted( ...@@ -331,33 +331,43 @@ buf_page_is_corrupted(
} }
} }
#endif #endif
old_checksum = buf_calc_page_old_checksum(read_buf);
/* If we use checksums validation, make additional check before returning
old_checksum_field = mach_read_from_4(read_buf + UNIV_PAGE_SIZE TRUE to ensure that the checksum is not equal to BUF_NO_CHECKSUM_MAGIC which
might be stored by InnoDB with checksums disabled.
Otherwise, skip checksum calculation and return FALSE */
if (srv_use_checksums) {
old_checksum = buf_calc_page_old_checksum(read_buf);
old_checksum_field = mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM); - FIL_PAGE_END_LSN_OLD_CHKSUM);
/* There are 2 valid formulas for old_checksum_field: /* There are 2 valid formulas for old_checksum_field:
1. Very old versions of InnoDB only stored 8 byte lsn to the start 1. Very old versions of InnoDB only stored 8 byte lsn to the start
and the end of the page. and the end of the page.
2. Newer InnoDB versions store the old formula checksum there. */ 2. Newer InnoDB versions store the old formula checksum there. */
if (old_checksum_field != mach_read_from_4(read_buf + FIL_PAGE_LSN) if (old_checksum_field != mach_read_from_4(read_buf + FIL_PAGE_LSN)
&& old_checksum_field != old_checksum) { && old_checksum_field != old_checksum
&& old_checksum_field != BUF_NO_CHECKSUM_MAGIC) {
return(TRUE); return(TRUE);
} }
checksum = buf_calc_page_new_checksum(read_buf); checksum = buf_calc_page_new_checksum(read_buf);
checksum_field = mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM); checksum_field = mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
(always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */ (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
if (checksum_field != 0 && checksum_field != checksum) { if (checksum_field != 0 && checksum_field != checksum
&& checksum_field != BUF_NO_CHECKSUM_MAGIC) {
return(TRUE);
}
return(TRUE);
}
}
return(FALSE); return(FALSE);
} }
...@@ -379,8 +389,10 @@ buf_page_print( ...@@ -379,8 +389,10 @@ buf_page_print(
ut_print_buf(stderr, read_buf, UNIV_PAGE_SIZE); ut_print_buf(stderr, read_buf, UNIV_PAGE_SIZE);
fputs("InnoDB: End of page dump\n", stderr); fputs("InnoDB: End of page dump\n", stderr);
checksum = buf_calc_page_new_checksum(read_buf); checksum = srv_use_checksums ?
old_checksum = buf_calc_page_old_checksum(read_buf); buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
old_checksum = srv_use_checksums ?
buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
ut_print_timestamp(stderr); ut_print_timestamp(stderr);
fprintf(stderr, fprintf(stderr,
...@@ -548,7 +560,7 @@ buf_pool_init( ...@@ -548,7 +560,7 @@ buf_pool_init(
} }
/*----------------------------------------*/ /*----------------------------------------*/
} else { } else {
buf_pool->frame_mem = ut_malloc_low( buf_pool->frame_mem = os_mem_alloc_large(
UNIV_PAGE_SIZE * (n_frames + 1), UNIV_PAGE_SIZE * (n_frames + 1),
TRUE, FALSE); TRUE, FALSE);
} }
......
...@@ -448,7 +448,8 @@ buf_flush_init_for_writing( ...@@ -448,7 +448,8 @@ buf_flush_init_for_writing(
/* Store the new formula checksum */ /* Store the new formula checksum */
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM,
buf_calc_page_new_checksum(page)); srv_use_checksums ?
buf_calc_page_new_checksum(page) : BUF_NO_CHECKSUM_MAGIC);
/* We overwrite the first 4 bytes of the end lsn field to store /* We overwrite the first 4 bytes of the end lsn field to store
the old formula checksum. Since it depends also on the field the old formula checksum. Since it depends also on the field
...@@ -456,7 +457,8 @@ buf_flush_init_for_writing( ...@@ -456,7 +457,8 @@ buf_flush_init_for_writing(
new formula checksum. */ new formula checksum. */
mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
buf_calc_page_old_checksum(page)); srv_use_checksums ?
buf_calc_page_old_checksum(page) : BUF_NO_CHECKSUM_MAGIC);
} }
/************************************************************************ /************************************************************************
......
...@@ -52,6 +52,8 @@ Created 11/5/1995 Heikki Tuuri ...@@ -52,6 +52,8 @@ Created 11/5/1995 Heikki Tuuri
/* Modes for buf_page_get_known_nowait */ /* Modes for buf_page_get_known_nowait */
#define BUF_MAKE_YOUNG 51 #define BUF_MAKE_YOUNG 51
#define BUF_KEEP_OLD 52 #define BUF_KEEP_OLD 52
/* Magic value to use instead of checksums when they are disabled */
#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
extern buf_pool_t* buf_pool; /* The buffer pool of the database */ extern buf_pool_t* buf_pool; /* The buffer pool of the database */
extern ibool buf_debug_prints;/* If this is set TRUE, the program extern ibool buf_debug_prints;/* If this is set TRUE, the program
......
...@@ -12,6 +12,11 @@ Created 9/30/1995 Heikki Tuuri ...@@ -12,6 +12,11 @@ Created 9/30/1995 Heikki Tuuri
#include "univ.i" #include "univ.i"
#ifdef UNIV_LINUX
#include <sys/ipc.h>
#include <sys/shm.h>
#endif
typedef void* os_process_t; typedef void* os_process_t;
typedef unsigned long int os_process_id_t; typedef unsigned long int os_process_id_t;
...@@ -27,6 +32,10 @@ page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB ...@@ -27,6 +32,10 @@ page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
pages. */ pages. */
#define OS_AWE_X86_PAGE_SIZE 4096 #define OS_AWE_X86_PAGE_SIZE 4096
extern ibool os_use_large_pages;
/* Large page size. This may be a boot-time option on some platforms */
extern ulint os_large_page_size;
/******************************************************************** /********************************************************************
Windows AWE support. Tries to enable the "lock pages in memory" privilege for Windows AWE support. Tries to enable the "lock pages in memory" privilege for
the current process so that the current process can allocate memory-locked the current process so that the current process can allocate memory-locked
...@@ -103,6 +112,25 @@ os_mem_alloc_nocache( ...@@ -103,6 +112,25 @@ os_mem_alloc_nocache(
/* out: allocated memory */ /* out: allocated memory */
ulint n); /* in: number of bytes */ ulint n); /* in: number of bytes */
/******************************************************************** /********************************************************************
Allocates large pages memory. */
void*
os_mem_alloc_large(
/*=================*/
/* out: allocated memory */
ulint n, /* in: number of bytes */
ibool set_to_zero, /* in: TRUE if allocated memory should be set
to zero if UNIV_SET_MEM_TO_ZERO is defined */
ibool assert_on_error); /* in: if TRUE, we crash mysqld if the memory
cannot be allocated */
/********************************************************************
Frees large pages memory. */
void
os_mem_free_large(
/*=================*/
void *ptr); /* in: number of bytes */
/********************************************************************
Sets the priority boost for threads released from waiting within the current Sets the priority boost for threads released from waiting within the current
process. */ process. */
......
...@@ -107,6 +107,7 @@ extern ibool srv_very_fast_shutdown; /* if this TRUE, do not flush the ...@@ -107,6 +107,7 @@ extern ibool srv_very_fast_shutdown; /* if this TRUE, do not flush the
extern ibool srv_innodb_status; extern ibool srv_innodb_status;
extern ibool srv_use_doublewrite_buf; extern ibool srv_use_doublewrite_buf;
extern ibool srv_use_checksums;
extern ibool srv_set_thread_priorities; extern ibool srv_set_thread_priorities;
extern int srv_query_thread_priority; extern int srv_query_thread_priority;
......
...@@ -69,6 +69,10 @@ byte* os_awe_window; ...@@ -69,6 +69,10 @@ byte* os_awe_window;
ulint os_awe_window_size; ulint os_awe_window_size;
#endif #endif
ibool os_use_large_pages;
/* Large page size. This may be a boot-time option on some platforms */
ulint os_large_page_size;
/******************************************************************** /********************************************************************
Windows AWE support. Tries to enable the "lock pages in memory" privilege for Windows AWE support. Tries to enable the "lock pages in memory" privilege for
the current process so that the current process can allocate memory-locked the current process so that the current process can allocate memory-locked
...@@ -515,6 +519,83 @@ os_mem_alloc_nocache( ...@@ -515,6 +519,83 @@ os_mem_alloc_nocache(
#endif #endif
} }
/********************************************************************
Allocates large pages memory. */
void*
os_mem_alloc_large(
/*=================*/
/* out: allocated memory */
ulint n, /* in: number of bytes */
ibool set_to_zero, /* in: TRUE if allocated memory should be set
to zero if UNIV_SET_MEM_TO_ZERO is defined */
ibool assert_on_error) /* in: if TRUE, we crash mysqld if the memory
cannot be allocated */
{
#ifdef UNIV_LINUX
ulint size;
int shmid;
void *ptr = NULL;
struct shmid_ds buf;
if (!os_use_large_pages || !os_large_page_size) {
goto skip;
}
/* Align block size to os_large_page_size */
size = ((n - 1) & ~(os_large_page_size - 1)) + os_large_page_size;
shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
if (shmid < 0) {
fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate %lu bytes. "
"errno %d\n", n, errno);
} else {
ptr = shmat(shmid, NULL, 0);
if (ptr == (void *)-1) {
fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to attach shared memory "
"segment, errno %d\n", errno);
}
/*
Remove the shared memory segment so that it will be automatically freed
after memory is detached or process exits
*/
shmctl(shmid, IPC_RMID, &buf);
}
if (ptr) {
if (set_to_zero) {
#ifdef UNIV_SET_MEM_TO_ZERO
memset(ret, '\0', size);
#endif
}
return(ptr);
}
fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional memory pool\n");
#endif
skip:
return(ut_malloc_low(n, set_to_zero, assert_on_error));
}
/********************************************************************
Frees large pages memory. */
void
os_mem_free_large(
/*=================*/
void *ptr) /* in: number of bytes */
{
#ifdef UNIV_LINUX
if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) {
return;
}
#endif
ut_free(ptr);
}
/******************************************************************** /********************************************************************
Sets the priority boost for threads released from waiting within the current Sets the priority boost for threads released from waiting within the current
process. */ process. */
......
...@@ -313,6 +313,7 @@ ibool srv_very_fast_shutdown = FALSE; /* if this TRUE, do not flush the ...@@ -313,6 +313,7 @@ ibool srv_very_fast_shutdown = FALSE; /* if this TRUE, do not flush the
ibool srv_innodb_status = FALSE; ibool srv_innodb_status = FALSE;
ibool srv_use_doublewrite_buf = TRUE; ibool srv_use_doublewrite_buf = TRUE;
ibool srv_use_checksums = TRUE;
ibool srv_set_thread_priorities = TRUE; ibool srv_set_thread_priorities = TRUE;
int srv_query_thread_priority = 0; int srv_query_thread_priority = 0;
......
...@@ -124,6 +124,22 @@ trx_doublewrite_init( ...@@ -124,6 +124,22 @@ trx_doublewrite_init(
* sizeof(void*)); * sizeof(void*));
} }
/********************************************************************
Frees the doublewrite buffer. */
static
void
trx_doublewrite_free(void)
/*======================*/
{
mutex_free(&(trx_doublewrite->mutex));
mem_free(trx_doublewrite->buf_block_arr);
ut_free(trx_doublewrite->write_buf_unaligned);
mem_free(trx_doublewrite);
trx_doublewrite = NULL;
}
/******************************************************************** /********************************************************************
Marks the trx sys header when we have successfully upgraded to the >= 4.1.x Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
multiple tablespace format. */ multiple tablespace format. */
...@@ -512,6 +528,9 @@ trx_sys_doublewrite_init_or_restore_pages( ...@@ -512,6 +528,9 @@ trx_sys_doublewrite_init_or_restore_pages(
fil_flush_file_spaces(FIL_TABLESPACE); fil_flush_file_spaces(FIL_TABLESPACE);
if (!srv_use_doublewrite_buf)
trx_doublewrite_free();
leave_func: leave_func:
ut_free(unaligned_read_buf); ut_free(unaligned_read_buf);
} }
......
...@@ -53,7 +53,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ ...@@ -53,7 +53,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \
my_net.c my_semaphore.c my_port.c my_sleep.c \ my_net.c my_semaphore.c my_port.c my_sleep.c \
charset.c charset-def.c my_bitmap.c my_bit.c md5.c \ charset.c charset-def.c my_bitmap.c my_bit.c md5.c \
my_gethostbyname.c rijndael.c my_aes.c sha1.c \ my_gethostbyname.c rijndael.c my_aes.c sha1.c \
my_handler.c my_netware.c my_handler.c my_netware.c my_largepage.c
EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
thr_mutex.c thr_rwlock.c thr_mutex.c thr_rwlock.c
libmysys_a_LIBADD = @THREAD_LOBJECTS@ libmysys_a_LIBADD = @THREAD_LOBJECTS@
......
...@@ -341,8 +341,8 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, ...@@ -341,8 +341,8 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
blocks--; blocks--;
/* Allocate memory for cache page buffers */ /* Allocate memory for cache page buffers */
if ((keycache->block_mem= if ((keycache->block_mem=
my_malloc_lock((ulong) blocks * keycache->key_cache_block_size, my_large_malloc((ulong) blocks * keycache->key_cache_block_size,
MYF(0)))) MYF(MY_WME))))
{ {
/* /*
Allocate memory for blocks, hash_links and hash entries; Allocate memory for blocks, hash_links and hash entries;
...@@ -351,7 +351,7 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, ...@@ -351,7 +351,7 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
if ((keycache->block_root= (BLOCK_LINK*) my_malloc((uint) length, if ((keycache->block_root= (BLOCK_LINK*) my_malloc((uint) length,
MYF(0)))) MYF(0))))
break; break;
my_free_lock(keycache->block_mem, MYF(0)); my_large_free(keycache->block_mem, MYF(0));
} }
if (blocks < 8) if (blocks < 8)
{ {
...@@ -421,7 +421,7 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size, ...@@ -421,7 +421,7 @@ int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
keycache->blocks= 0; keycache->blocks= 0;
if (keycache->block_mem) if (keycache->block_mem)
{ {
my_free_lock((gptr) keycache->block_mem, MYF(0)); my_large_free((gptr) keycache->block_mem, MYF(0));
keycache->block_mem= NULL; keycache->block_mem= NULL;
} }
if (keycache->block_root) if (keycache->block_root)
...@@ -605,7 +605,7 @@ void end_key_cache(KEY_CACHE *keycache, my_bool cleanup) ...@@ -605,7 +605,7 @@ void end_key_cache(KEY_CACHE *keycache, my_bool cleanup)
{ {
if (keycache->block_mem) if (keycache->block_mem)
{ {
my_free_lock((gptr) keycache->block_mem, MYF(0)); my_large_free((gptr) keycache->block_mem, MYF(0));
keycache->block_mem= NULL; keycache->block_mem= NULL;
my_free((gptr) keycache->block_root, MYF(0)); my_free((gptr) keycache->block_root, MYF(0));
keycache->block_root= NULL; keycache->block_root= NULL;
......
/* Copyright (C) 2004 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "mysys_priv.h"
#ifdef HAVE_LARGE_PAGES
#ifdef HAVE_SYS_IPC_H
#include <sys/ipc.h>
#endif
#ifdef HAVE_SYS_SHM_H
#include <sys/shm.h>
#endif
static uint my_get_large_page_size_int(void);
static gptr my_large_malloc_int(uint size, myf my_flags);
static my_bool my_large_free_int(gptr ptr, myf my_flags);
/* Gets the size of large pages from the OS */
uint my_get_large_page_size(void)
{
uint size;
DBUG_ENTER("my_get_large_page_size");
if (!(size = my_get_large_page_size_int()))
fprintf(stderr, "Warning: Failed to determine large page size\n");
DBUG_RETURN(size);
}
/*
General large pages allocator.
Tries to allocate memory from large pages pool and falls back to
my_malloc_lock() in case of failure
*/
gptr my_large_malloc(uint size, myf my_flags)
{
gptr ptr;
DBUG_ENTER("my_large_malloc");
if (my_use_large_pages && my_large_page_size)
{
if ((ptr = my_large_malloc_int(size, my_flags)) != NULL)
DBUG_RETURN(ptr);
if (my_flags & MY_WME)
fprintf(stderr, "Warning: Using conventional memory pool\n");
}
DBUG_RETURN(my_malloc_lock(size, my_flags));
}
/*
General large pages deallocator.
Tries to deallocate memory as if it was from large pages pool and falls back
to my_free_lock() in case of failure
*/
void my_large_free(gptr ptr, myf my_flags __attribute__((unused)))
{
DBUG_ENTER("my_large_free");
/*
my_large_free_int() can only fail if ptr was not allocated with
my_large_malloc_int(), i.e. my_malloc_lock() was used so we should free it
with my_free_lock()
*/
if (!my_use_large_pages || !my_large_page_size ||
!my_large_free_int(ptr, my_flags))
my_free_lock(ptr, my_flags);
DBUG_VOID_RETURN;
}
#ifdef HUGETLB_USE_PROC_MEMINFO
/* Linux-specific function to determine the size of large pages */
uint my_get_large_page_size_int(void)
{
FILE *f;
uint size = 0;
char buf[256];
DBUG_ENTER("my_get_large_page_size_int");
if (!(f = my_fopen("/proc/meminfo", O_RDONLY, MYF(MY_WME))))
goto finish;
while (fgets(buf, sizeof(buf), f))
if (sscanf(buf, "Hugepagesize: %u kB", &size))
break;
my_fclose(f, MYF(MY_WME));
finish:
DBUG_RETURN(size * 1024);
}
#endif /* HUGETLB_USE_PROC_MEMINFO */
#if HAVE_DECL_SHM_HUGETLB
/* Linux-specific large pages allocator */
gptr my_large_malloc_int(uint size, myf my_flags)
{
int shmid;
gptr ptr;
struct shmid_ds buf;
DBUG_ENTER("my_large_malloc_int");
/* Align block size to my_large_page_size */
size = ((size - 1) & ~(my_large_page_size - 1)) + my_large_page_size;
shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
if (shmid < 0)
{
if (my_flags & MY_WME)
fprintf(stderr,
"Warning: Failed to allocate %d bytes from HugeTLB memory."
" errno %d\n", size, errno);
DBUG_RETURN(NULL);
}
ptr = shmat(shmid, NULL, 0);
if (ptr == (void *)-1)
{
if (my_flags& MY_WME)
fprintf(stderr, "Warning: Failed to attach shared memory segment,"
" errno %d\n", errno);
shmctl(shmid, IPC_RMID, &buf);
DBUG_RETURN(NULL);
}
/*
Remove the shared memory segment so that it will be automatically freed
after memory is detached or process exits
*/
shmctl(shmid, IPC_RMID, &buf);
DBUG_RETURN(ptr);
}
/* Linux-specific large pages deallocator */
my_bool my_large_free_int(byte *ptr, myf my_flags __attribute__((unused)))
{
DBUG_ENTER("my_large_free_int");
DBUG_RETURN(shmdt(ptr) == 0);
}
#endif /* HAVE_DECL_SHM_HUGETLB */
#endif /* HAVE_LARGE_PAGES */
...@@ -61,6 +61,12 @@ const char *soundex_map= "01230120022455012623010202"; ...@@ -61,6 +61,12 @@ const char *soundex_map= "01230120022455012623010202";
USED_MEM* my_once_root_block=0; /* pointer to first block */ USED_MEM* my_once_root_block=0; /* pointer to first block */
uint my_once_extra=ONCE_ALLOC_INIT; /* Memory to alloc / block */ uint my_once_extra=ONCE_ALLOC_INIT; /* Memory to alloc / block */
/* from my_largepage.c */
#ifdef HAVE_LARGE_PAGES
my_bool my_use_large_pages= 0;
uint my_large_page_size= 0;
#endif
/* from my_tempnam */ /* from my_tempnam */
#if !defined(HAVE_TEMPNAM) || defined(HPUX11) #if !defined(HAVE_TEMPNAM) || defined(HPUX11)
int _my_tempnam_used=0; int _my_tempnam_used=0;
......
...@@ -88,6 +88,7 @@ extern "C" { ...@@ -88,6 +88,7 @@ extern "C" {
uint innobase_init_flags = 0; uint innobase_init_flags = 0;
ulong innobase_cache_size = 0; ulong innobase_cache_size = 0;
ulong innobase_large_page_size = 0;
/* The default values for the following, type long, start-up parameters /* The default values for the following, type long, start-up parameters
are declared in mysqld.cc: */ are declared in mysqld.cc: */
...@@ -116,6 +117,9 @@ values */ ...@@ -116,6 +117,9 @@ values */
uint innobase_flush_log_at_trx_commit = 1; uint innobase_flush_log_at_trx_commit = 1;
my_bool innobase_log_archive = FALSE;/* unused */ my_bool innobase_log_archive = FALSE;/* unused */
my_bool innobase_use_doublewrite = TRUE;
my_bool innobase_use_checksums = TRUE;
my_bool innobase_use_large_pages = FALSE;
my_bool innobase_use_native_aio = FALSE; my_bool innobase_use_native_aio = FALSE;
my_bool innobase_fast_shutdown = TRUE; my_bool innobase_fast_shutdown = TRUE;
my_bool innobase_very_fast_shutdown = FALSE; /* this can be set to my_bool innobase_very_fast_shutdown = FALSE; /* this can be set to
...@@ -1123,6 +1127,12 @@ innobase_init(void) ...@@ -1123,6 +1127,12 @@ innobase_init(void)
srv_fast_shutdown = (ibool) innobase_fast_shutdown; srv_fast_shutdown = (ibool) innobase_fast_shutdown;
srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
srv_use_checksums = (ibool) innobase_use_checksums;
os_use_large_pages = (ibool) innobase_use_large_pages;
os_large_page_size = (ulint) innobase_large_page_size;
srv_file_per_table = (ibool) innobase_file_per_table; srv_file_per_table = (ibool) innobase_file_per_table;
srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog; srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
......
...@@ -181,6 +181,7 @@ extern struct show_var_st innodb_status_variables[]; ...@@ -181,6 +181,7 @@ extern struct show_var_st innodb_status_variables[];
extern uint innobase_init_flags, innobase_lock_type; extern uint innobase_init_flags, innobase_lock_type;
extern uint innobase_flush_log_at_trx_commit; extern uint innobase_flush_log_at_trx_commit;
extern ulong innobase_cache_size; extern ulong innobase_cache_size;
extern ulong innobase_large_page_size;
extern char *innobase_home, *innobase_tmpdir, *innobase_logdir; extern char *innobase_home, *innobase_tmpdir, *innobase_logdir;
extern long innobase_lock_scan_time; extern long innobase_lock_scan_time;
extern long innobase_mirrored_log_groups, innobase_log_files_in_group; extern long innobase_mirrored_log_groups, innobase_log_files_in_group;
...@@ -195,6 +196,9 @@ extern char *innobase_log_group_home_dir, *innobase_log_arch_dir; ...@@ -195,6 +196,9 @@ extern char *innobase_log_group_home_dir, *innobase_log_arch_dir;
extern char *innobase_unix_file_flush_method; extern char *innobase_unix_file_flush_method;
/* The following variables have to be my_bool for SHOW VARIABLES to work */ /* The following variables have to be my_bool for SHOW VARIABLES to work */
extern my_bool innobase_log_archive, extern my_bool innobase_log_archive,
innobase_use_doublewrite,
innobase_use_checksums,
innobase_use_large_pages,
innobase_use_native_aio, innobase_fast_shutdown, innobase_use_native_aio, innobase_fast_shutdown,
innobase_file_per_table, innobase_locks_unsafe_for_binlog, innobase_file_per_table, innobase_locks_unsafe_for_binlog,
innobase_create_status_file; innobase_create_status_file;
......
...@@ -1027,6 +1027,8 @@ extern uint opt_crash_binlog_innodb; ...@@ -1027,6 +1027,8 @@ extern uint opt_crash_binlog_innodb;
extern char *shared_memory_base_name, *mysqld_unix_port; extern char *shared_memory_base_name, *mysqld_unix_port;
extern bool opt_enable_shared_memory; extern bool opt_enable_shared_memory;
extern char *default_tz_name; extern char *default_tz_name;
extern my_bool opt_large_pages;
extern uint opt_large_page_size;
extern MYSQL_LOG mysql_log,mysql_slow_log,mysql_bin_log; extern MYSQL_LOG mysql_log,mysql_slow_log,mysql_bin_log;
extern FILE *bootstrap_file; extern FILE *bootstrap_file;
......
...@@ -299,6 +299,8 @@ my_bool opt_short_log_format= 0; ...@@ -299,6 +299,8 @@ my_bool opt_short_log_format= 0;
my_bool opt_log_queries_not_using_indexes= 0; my_bool opt_log_queries_not_using_indexes= 0;
my_bool lower_case_file_system= 0; my_bool lower_case_file_system= 0;
my_bool opt_innodb_safe_binlog= 0; my_bool opt_innodb_safe_binlog= 0;
my_bool opt_large_pages= 0;
uint opt_large_page_size= 0;
volatile bool mqh_used = 0; volatile bool mqh_used = 0;
uint mysqld_port, test_flags, select_errors, dropping_tables, ha_open_options; uint mysqld_port, test_flags, select_errors, dropping_tables, ha_open_options;
...@@ -2423,6 +2425,19 @@ static int init_common_variables(const char *conf_file_name, int argc, ...@@ -2423,6 +2425,19 @@ static int init_common_variables(const char *conf_file_name, int argc,
DBUG_PRINT("info",("%s Ver %s for %s on %s\n",my_progname, DBUG_PRINT("info",("%s Ver %s for %s on %s\n",my_progname,
server_version, SYSTEM_TYPE,MACHINE_TYPE)); server_version, SYSTEM_TYPE,MACHINE_TYPE));
#ifdef HAVE_LARGE_PAGES
/* Initialize large page size */
if (opt_large_pages && (opt_large_page_size= my_get_large_page_size()))
{
my_use_large_pages= 1;
my_large_page_size= opt_large_page_size;
#ifdef HAVE_INNOBASE_DB
innobase_use_large_pages= 1;
innobase_large_page_size= opt_large_page_size;
#endif
}
#endif /* HAVE_LARGE_PAGES */
/* connections and databases needs lots of files */ /* connections and databases needs lots of files */
{ {
uint files, wanted_files; uint files, wanted_files;
...@@ -4086,6 +4101,8 @@ enum options_mysqld ...@@ -4086,6 +4101,8 @@ enum options_mysqld
OPT_INNODB_LOG_ARCHIVE, OPT_INNODB_LOG_ARCHIVE,
OPT_INNODB_FLUSH_LOG_AT_TRX_COMMIT, OPT_INNODB_FLUSH_LOG_AT_TRX_COMMIT,
OPT_INNODB_FLUSH_METHOD, OPT_INNODB_FLUSH_METHOD,
OPT_INNODB_DOUBLEWRITE,
OPT_INNODB_CHECKSUMS,
OPT_INNODB_FAST_SHUTDOWN, OPT_INNODB_FAST_SHUTDOWN,
OPT_INNODB_FILE_PER_TABLE, OPT_CRASH_BINLOG_INNODB, OPT_INNODB_FILE_PER_TABLE, OPT_CRASH_BINLOG_INNODB,
OPT_INNODB_LOCKS_UNSAFE_FOR_BINLOG, OPT_INNODB_LOCKS_UNSAFE_FOR_BINLOG,
...@@ -4184,7 +4201,8 @@ enum options_mysqld ...@@ -4184,7 +4201,8 @@ enum options_mysqld
OPT_OPTIMIZER_SEARCH_DEPTH, OPT_OPTIMIZER_SEARCH_DEPTH,
OPT_OPTIMIZER_PRUNE_LEVEL, OPT_OPTIMIZER_PRUNE_LEVEL,
OPT_UPDATABLE_VIEWS_WITH_LIMIT, OPT_UPDATABLE_VIEWS_WITH_LIMIT,
OPT_AUTO_INCREMENT, OPT_AUTO_INCREMENT_OFFSET OPT_AUTO_INCREMENT, OPT_AUTO_INCREMENT_OFFSET,
OPT_ENABLE_LARGE_PAGES
}; };
...@@ -4343,6 +4361,12 @@ Disable with --skip-bdb (will save memory).", ...@@ -4343,6 +4361,12 @@ Disable with --skip-bdb (will save memory).",
"Set up signals usable for debugging", "Set up signals usable for debugging",
(gptr*) &opt_debugging, (gptr*) &opt_debugging, (gptr*) &opt_debugging, (gptr*) &opt_debugging,
0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
#ifdef HAVE_LARGE_PAGES
{"large-pages", OPT_ENABLE_LARGE_PAGES, "Enable support for large pages. \
Disable with --skip-large-pages.",
(gptr*) &opt_large_pages, (gptr*) &opt_large_pages, 0, GET_BOOL, NO_ARG, 0, 0, 0,
0, 0, 0},
#endif
{"init-connect", OPT_INIT_CONNECT, "Command(s) that are executed for each new connection", {"init-connect", OPT_INIT_CONNECT, "Command(s) that are executed for each new connection",
(gptr*) &opt_init_connect, (gptr*) &opt_init_connect, 0, GET_STR_ALLOC, (gptr*) &opt_init_connect, (gptr*) &opt_init_connect, 0, GET_STR_ALLOC,
REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
...@@ -4366,6 +4390,12 @@ Disable with --skip-innodb (will save memory).", ...@@ -4366,6 +4390,12 @@ Disable with --skip-innodb (will save memory).",
"The common part for InnoDB table spaces.", (gptr*) &innobase_data_home_dir, "The common part for InnoDB table spaces.", (gptr*) &innobase_data_home_dir,
(gptr*) &innobase_data_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, (gptr*) &innobase_data_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0,
0}, 0},
{"innodb_doublewrite", OPT_INNODB_DOUBLEWRITE, "Enable InnoDB doublewrite buffer (enabled by default). \
Disable with --skip-innodb-doublewrite.", (gptr*) &innobase_use_doublewrite,
(gptr*) &innobase_use_doublewrite, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
{"innodb_checksums", OPT_INNODB_CHECKSUMS, "Enable InnoDB checksums validation (enabled by default). \
Disable with --skip-innodb-checksums.", (gptr*) &innobase_use_checksums,
(gptr*) &innobase_use_checksums, 0, GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
{"innodb_fast_shutdown", OPT_INNODB_FAST_SHUTDOWN, {"innodb_fast_shutdown", OPT_INNODB_FAST_SHUTDOWN,
"Speeds up server shutdown process.", (gptr*) &innobase_fast_shutdown, "Speeds up server shutdown process.", (gptr*) &innobase_fast_shutdown,
(gptr*) &innobase_fast_shutdown, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0}, (gptr*) &innobase_fast_shutdown, 0, GET_BOOL, OPT_ARG, 1, 0, 0, 0, 0, 0},
...@@ -5687,7 +5717,8 @@ static void mysql_init_variables(void) ...@@ -5687,7 +5717,8 @@ static void mysql_init_variables(void)
mysqld_unix_port= opt_mysql_tmpdir= my_bind_addr_str= NullS; mysqld_unix_port= opt_mysql_tmpdir= my_bind_addr_str= NullS;
bzero((gptr) &mysql_tmpdir_list, sizeof(mysql_tmpdir_list)); bzero((gptr) &mysql_tmpdir_list, sizeof(mysql_tmpdir_list));
bzero((char *) &global_status_var, sizeof(global_status_var)); bzero((char *) &global_status_var, sizeof(global_status_var));
opt_large_pages= 0;
/* Character sets */ /* Character sets */
system_charset_info= &my_charset_utf8_general_ci; system_charset_info= &my_charset_utf8_general_ci;
files_charset_info= &my_charset_utf8_general_ci; files_charset_info= &my_charset_utf8_general_ci;
......
...@@ -735,6 +735,8 @@ struct show_var_st init_vars[]= { ...@@ -735,6 +735,8 @@ struct show_var_st init_vars[]= {
{"innodb_buffer_pool_size", (char*) &innobase_buffer_pool_size, SHOW_LONG }, {"innodb_buffer_pool_size", (char*) &innobase_buffer_pool_size, SHOW_LONG },
{"innodb_data_file_path", (char*) &innobase_data_file_path, SHOW_CHAR_PTR}, {"innodb_data_file_path", (char*) &innobase_data_file_path, SHOW_CHAR_PTR},
{"innodb_data_home_dir", (char*) &innobase_data_home_dir, SHOW_CHAR_PTR}, {"innodb_data_home_dir", (char*) &innobase_data_home_dir, SHOW_CHAR_PTR},
{"innodb_doublewrite", (char*) &innobase_use_doublewrite, SHOW_MY_BOOL},
{"innodb_checksums", (char*) &innobase_use_checksums, SHOW_MY_BOOL},
{"innodb_fast_shutdown", (char*) &innobase_fast_shutdown, SHOW_MY_BOOL}, {"innodb_fast_shutdown", (char*) &innobase_fast_shutdown, SHOW_MY_BOOL},
{"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG }, {"innodb_file_io_threads", (char*) &innobase_file_io_threads, SHOW_LONG },
{"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL}, {"innodb_file_per_table", (char*) &innobase_file_per_table, SHOW_MY_BOOL},
...@@ -768,6 +770,8 @@ struct show_var_st init_vars[]= { ...@@ -768,6 +770,8 @@ struct show_var_st init_vars[]= {
SHOW_SYS}, SHOW_SYS},
{"language", language, SHOW_CHAR}, {"language", language, SHOW_CHAR},
{"large_files_support", (char*) &opt_large_files, SHOW_BOOL}, {"large_files_support", (char*) &opt_large_files, SHOW_BOOL},
{"large_pages", (char*) &opt_large_pages, SHOW_MY_BOOL},
{"large_page_size", (char*) &opt_large_page_size, SHOW_INT},
{sys_license.name, (char*) &sys_license, SHOW_SYS}, {sys_license.name, (char*) &sys_license, SHOW_SYS},
{sys_local_infile.name, (char*) &sys_local_infile, SHOW_SYS}, {sys_local_infile.name, (char*) &sys_local_infile, SHOW_SYS},
#ifdef HAVE_MLOCKALL #ifdef HAVE_MLOCKALL
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment