Commit 38d926a8 authored by jyang's avatar jyang

branches/zip: This is patch from Inaam that uses red-black tree

to speed up insertions into the flush_list and thus the recovery
process. The patch has been tested by Nokia.
parent d2b4c4f5
...@@ -78,7 +78,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c ...@@ -78,7 +78,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
usr/usr0sess.c usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c) ut/ut0list.c ut/ut0wqueue.c)
# Windows atomics do not perform well. Disable Windows atomics by default. # Windows atomics do not perform well. Disable Windows atomics by default.
# See bug#52102 for details. # See bug#52102 for details.
......
...@@ -217,6 +217,7 @@ noinst_HEADERS= \ ...@@ -217,6 +217,7 @@ noinst_HEADERS= \
include/ut0lst.h \ include/ut0lst.h \
include/ut0mem.h \ include/ut0mem.h \
include/ut0mem.ic \ include/ut0mem.ic \
include/ut0rbt.h \
include/ut0rnd.h \ include/ut0rnd.h \
include/ut0rnd.ic \ include/ut0rnd.ic \
include/ut0sort.h \ include/ut0sort.h \
...@@ -318,6 +319,7 @@ libinnobase_a_SOURCES= \ ...@@ -318,6 +319,7 @@ libinnobase_a_SOURCES= \
ut/ut0dbg.c \ ut/ut0dbg.c \
ut/ut0list.c \ ut/ut0list.c \
ut/ut0mem.c \ ut/ut0mem.c \
ut/ut0rbt.c \
ut/ut0rnd.c \ ut/ut0rnd.c \
ut/ut0ut.c \ ut/ut0ut.c \
ut/ut0vec.c \ ut/ut0vec.c \
......
...@@ -391,6 +391,8 @@ buf_buddy_relocate_block( ...@@ -391,6 +391,8 @@ buf_buddy_relocate_block(
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage); UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
} }
UNIV_MEM_INVALID(bpage, sizeof *bpage);
mutex_exit(&buf_pool_zip_mutex); mutex_exit(&buf_pool_zip_mutex);
return(TRUE); return(TRUE);
} }
......
...@@ -1191,8 +1191,6 @@ buf_relocate( ...@@ -1191,8 +1191,6 @@ buf_relocate(
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage); HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage); HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
UNIV_MEM_INVALID(bpage, sizeof *bpage);
} }
/********************************************************************//** /********************************************************************//**
...@@ -2224,22 +2222,8 @@ buf_page_get_gen( ...@@ -2224,22 +2222,8 @@ buf_page_get_gen(
ut_ad(!block->page.in_flush_list); ut_ad(!block->page.in_flush_list);
} else { } else {
/* Relocate buf_pool->flush_list. */ /* Relocate buf_pool->flush_list. */
buf_page_t* b; buf_flush_relocate_on_flush_list(bpage,
b = UT_LIST_GET_PREV(list, &block->page);
ut_ad(block->page.in_flush_list);
UT_LIST_REMOVE(list, buf_pool->flush_list,
&block->page);
if (b) {
UT_LIST_INSERT_AFTER(
list, buf_pool->flush_list, b,
&block->page); &block->page);
} else {
UT_LIST_ADD_FIRST(
list, buf_pool->flush_list,
&block->page);
}
} }
/* Buffer-fix, I/O-fix, and X-latch the block /* Buffer-fix, I/O-fix, and X-latch the block
...@@ -2253,6 +2237,9 @@ buf_page_get_gen( ...@@ -2253,6 +2237,9 @@ buf_page_get_gen(
block->page.buf_fix_count = 1; block->page.buf_fix_count = 1;
buf_block_set_io_fix(block, BUF_IO_READ); buf_block_set_io_fix(block, BUF_IO_READ);
rw_lock_x_lock(&block->lock); rw_lock_x_lock(&block->lock);
UNIV_MEM_INVALID(bpage, sizeof *bpage);
mutex_exit(&block->mutex); mutex_exit(&block->mutex);
mutex_exit(&buf_pool_zip_mutex); mutex_exit(&buf_pool_zip_mutex);
buf_pool->n_pend_unzip++; buf_pool->n_pend_unzip++;
......
...@@ -87,6 +87,138 @@ buf_flush_validate_low(void); ...@@ -87,6 +87,138 @@ buf_flush_validate_low(void);
/*========================*/ /*========================*/
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/********************************************************************//**
Insert a block in the flush_rbt and returns a pointer to its
predecessor or NULL if no predecessor. The ordering is maintained
on the basis of the <oldest_modification, space, offset> key.
@return pointer to the predecessor or NULL if no predecessor. */
static
buf_page_t*
buf_flush_insert_in_flush_rbt(
/*==========================*/
buf_page_t* bpage) /*!< in: bpage to be inserted. */
{
buf_page_t* prev = NULL;
const ib_rbt_node_t* c_node;
const ib_rbt_node_t* p_node;
ut_ad(buf_pool_mutex_own());
/* Insert this buffer into the rbt. */
c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
ut_a(c_node != NULL);
/* Get the predecessor. */
p_node = rbt_prev(buf_pool->flush_rbt, c_node);
if (p_node != NULL) {
prev = *rbt_value(buf_page_t*, p_node);
ut_a(prev != NULL);
}
return(prev);
}
/********************************************************************//**
Delete a bpage from the flush_rbt. */
static
void
buf_flush_delete_from_flush_rbt(
/*============================*/
buf_page_t* bpage) /*!< in: bpage to be removed. */
{
ibool ret = FALSE;
ut_ad(buf_pool_mutex_own());
ret = rbt_delete(buf_pool->flush_rbt, &bpage);
ut_ad(ret);
}
/********************************************************************//**
Compare two modified blocks in the buffer pool. The key for comparison
is:
key = <oldest_modification, space, offset>
This comparison is used to maintian ordering of blocks in the
buf_pool->flush_rbt.
Note that for the purpose of flush_rbt, we only need to order blocks
on the oldest_modification. The other two fields are used to uniquely
identify the blocks.
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
static
int
buf_flush_block_cmp(
/*================*/
const void* p1, /*!< in: block1 */
const void* p2) /*!< in: block2 */
{
int ret;
ut_ad(p1 != NULL);
ut_ad(p2 != NULL);
const buf_page_t* b1 = *(const buf_page_t**) p1;
const buf_page_t* b2 = *(const buf_page_t**) p2;
ut_ad(b1 != NULL);
ut_ad(b2 != NULL);
ut_ad(b1->in_flush_list);
ut_ad(b2->in_flush_list);
if (b2->oldest_modification
> b1->oldest_modification) {
return(1);
}
if (b2->oldest_modification
< b1->oldest_modification) {
return(-1);
}
/* If oldest_modification is same then decide on the space. */
ret = (int)(b2->space - b1->space);
/* Or else decide ordering on the offset field. */
return(ret ? ret : (int)(b2->offset - b1->offset));
}
/********************************************************************//**
Initialize the red-black tree to speed up insertions into the flush_list
during recovery process. Should be called at the start of recovery
process before any page has been read/written. */
UNIV_INTERN
void
buf_flush_init_flush_rbt(void)
/*==========================*/
{
buf_pool_mutex_enter();
/* Create red black tree for speedy insertions in flush list. */
buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
buf_flush_block_cmp);
buf_pool_mutex_exit();
}
/********************************************************************//**
Frees up the red-black tree. */
UNIV_INTERN
void
buf_flush_free_flush_rbt(void)
/*==========================*/
{
buf_pool_mutex_enter();
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
rbt_free(buf_pool->flush_rbt);
buf_pool->flush_rbt = NULL;
buf_pool_mutex_exit();
}
/********************************************************************//** /********************************************************************//**
Inserts a modified block into the flush list. */ Inserts a modified block into the flush list. */
UNIV_INTERN UNIV_INTERN
...@@ -100,6 +232,13 @@ buf_flush_insert_into_flush_list( ...@@ -100,6 +232,13 @@ buf_flush_insert_into_flush_list(
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
<= block->page.oldest_modification)); <= block->page.oldest_modification));
/* If we are in the recovery then we need to update the flush
red-black tree as well. */
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_insert_sorted_into_flush_list(block);
return;
}
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.in_LRU_list); ut_ad(block->page.in_LRU_list);
ut_ad(block->page.in_page_hash); ut_ad(block->page.in_page_hash);
...@@ -136,13 +275,28 @@ buf_flush_insert_sorted_into_flush_list( ...@@ -136,13 +275,28 @@ buf_flush_insert_sorted_into_flush_list(
ut_d(block->page.in_flush_list = TRUE); ut_d(block->page.in_flush_list = TRUE);
prev_b = NULL; prev_b = NULL;
/* For the most part when this function is called the flush_rbt
should not be NULL. In a very rare boundary case it is possible
that the flush_rbt has already been freed by the recovery thread
before the last page was hooked up in the flush_list by the
io-handler thread. In that case we'll just do a simple
linear search in the else block. */
if (buf_pool->flush_rbt) {
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
} else {
b = UT_LIST_GET_FIRST(buf_pool->flush_list); b = UT_LIST_GET_FIRST(buf_pool->flush_list);
while (b && b->oldest_modification > block->page.oldest_modification) { while (b && b->oldest_modification
> block->page.oldest_modification) {
ut_ad(b->in_flush_list); ut_ad(b->in_flush_list);
prev_b = b; prev_b = b;
b = UT_LIST_GET_NEXT(list, b); b = UT_LIST_GET_NEXT(list, b);
} }
}
if (prev_b == NULL) { if (prev_b == NULL) {
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page); UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
...@@ -237,7 +391,6 @@ buf_flush_remove( ...@@ -237,7 +391,6 @@ buf_flush_remove(
ut_ad(buf_pool_mutex_own()); ut_ad(buf_pool_mutex_own());
ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(bpage->in_flush_list); ut_ad(bpage->in_flush_list);
ut_d(bpage->in_flush_list = FALSE);
switch (buf_page_get_state(bpage)) { switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_PAGE:
...@@ -259,12 +412,78 @@ buf_flush_remove( ...@@ -259,12 +412,78 @@ buf_flush_remove(
break; break;
} }
/* If the flush_rbt is active then delete from it as well. */
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
}
/* Must be done after we have removed it from the flush_rbt
because we assert on in_flush_list in comparison function. */
ut_d(bpage->in_flush_list = FALSE);
bpage->oldest_modification = 0; bpage->oldest_modification = 0;
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list, ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
ut_ad(ut_list_node_313->in_flush_list))); ut_ad(ut_list_node_313->in_flush_list)));
} }
/********************************************************************//**
Relocates a buffer control block on the flush_list.
Note that it is assumed that the contents of bpage has already been
copied to dpage. */
UNIV_INTERN
void
buf_flush_relocate_on_flush_list(
/*=============================*/
buf_page_t* bpage, /*!< in/out: control block being moved */
buf_page_t* dpage) /*!< in/out: destination block */
{
buf_page_t* prev;
buf_page_t* prev_b = NULL;
ut_ad(buf_pool_mutex_own());
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(bpage->in_flush_list);
ut_ad(dpage->in_flush_list);
/* If recovery is active we must swap the control blocks in
the flush_rbt as well. */
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
prev_b = buf_flush_insert_in_flush_rbt(dpage);
}
/* Must be done after we have removed it from the flush_rbt
because we assert on in_flush_list in comparison function. */
ut_d(bpage->in_flush_list = FALSE);
prev = UT_LIST_GET_PREV(list, bpage);
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
if (prev) {
ut_ad(prev->in_flush_list);
UT_LIST_INSERT_AFTER(
list,
buf_pool->flush_list,
prev, dpage);
} else {
UT_LIST_ADD_FIRST(
list,
buf_pool->flush_list,
dpage);
}
/* Just an extra check. Previous in flush_list
should be the same control block as in flush_rbt. */
ut_a(!buf_pool->flush_rbt || prev_b == prev);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
}
/********************************************************************//** /********************************************************************//**
Updates the flush system data structures when a write is completed. */ Updates the flush system data structures when a write is completed. */
UNIV_INTERN UNIV_INTERN
...@@ -1368,23 +1587,44 @@ buf_flush_validate_low(void) ...@@ -1368,23 +1587,44 @@ buf_flush_validate_low(void)
/*========================*/ /*========================*/
{ {
buf_page_t* bpage; buf_page_t* bpage;
const ib_rbt_node_t* rnode = NULL;
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list, UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
ut_ad(ut_list_node_313->in_flush_list)); ut_ad(ut_list_node_313->in_flush_list));
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
/* If we are in recovery mode i.e.: flush_rbt != NULL
then each block in the flush_list must also be present
in the flush_rbt. */
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
rnode = rbt_first(buf_pool->flush_rbt);
}
while (bpage != NULL) { while (bpage != NULL) {
const ib_uint64_t om = bpage->oldest_modification; const ib_uint64_t om = bpage->oldest_modification;
ut_ad(bpage->in_flush_list); ut_ad(bpage->in_flush_list);
ut_a(buf_page_in_file(bpage)); ut_a(buf_page_in_file(bpage));
ut_a(om > 0); ut_a(om > 0);
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
ut_a(rnode);
buf_page_t* rpage = *rbt_value(buf_page_t*,
rnode);
ut_a(rpage);
ut_a(rpage == bpage);
rnode = rbt_next(buf_pool->flush_rbt, rnode);
}
bpage = UT_LIST_GET_NEXT(list, bpage); bpage = UT_LIST_GET_NEXT(list, bpage);
ut_a(!bpage || om >= bpage->oldest_modification); ut_a(!bpage || om >= bpage->oldest_modification);
} }
/* By this time we must have exhausted the traversal of
flush_rbt (if active) as well. */
ut_a(rnode == NULL);
return(TRUE); return(TRUE);
} }
......
...@@ -1530,26 +1530,8 @@ buf_LRU_free_block( ...@@ -1530,26 +1530,8 @@ buf_LRU_free_block(
if (b->state == BUF_BLOCK_ZIP_PAGE) { if (b->state == BUF_BLOCK_ZIP_PAGE) {
buf_LRU_insert_zip_clean(b); buf_LRU_insert_zip_clean(b);
} else { } else {
buf_page_t* prev; /* Relocate on buf_pool->flush_list. */
buf_flush_relocate_on_flush_list(bpage, b);
ut_ad(b->in_flush_list);
ut_d(bpage->in_flush_list = FALSE);
prev = UT_LIST_GET_PREV(list, b);
UT_LIST_REMOVE(list, buf_pool->flush_list, b);
if (prev) {
ut_ad(prev->in_flush_list);
UT_LIST_INSERT_AFTER(
list,
buf_pool->flush_list,
prev, b);
} else {
UT_LIST_ADD_FIRST(
list,
buf_pool->flush_list,
b);
}
} }
bpage->zip.data = NULL; bpage->zip.data = NULL;
......
...@@ -608,14 +608,14 @@ buf_read_recv_pages( ...@@ -608,14 +608,14 @@ buf_read_recv_pages(
while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) { while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
os_aio_simulated_wake_handler_threads(); os_aio_simulated_wake_handler_threads();
os_thread_sleep(500000); os_thread_sleep(10000);
count++; count++;
if (count > 100) { if (count > 1000) {
fprintf(stderr, fprintf(stderr,
"InnoDB: Error: InnoDB has waited for" "InnoDB: Error: InnoDB has waited for"
" 50 seconds for pending\n" " 10 seconds for pending\n"
"InnoDB: reads to the buffer pool to" "InnoDB: reads to the buffer pool to"
" be finished.\n" " be finished.\n"
"InnoDB: Number of pending reads %lu," "InnoDB: Number of pending reads %lu,"
......
...@@ -33,6 +33,7 @@ Created 11/5/1995 Heikki Tuuri ...@@ -33,6 +33,7 @@ Created 11/5/1995 Heikki Tuuri
#include "hash0hash.h" #include "hash0hash.h"
#include "ut0byte.h" #include "ut0byte.h"
#include "page0types.h" #include "page0types.h"
#include "ut0rbt.h"
#ifndef UNIV_HOTBACKUP #ifndef UNIV_HOTBACKUP
#include "os0proc.h" #include "os0proc.h"
...@@ -1359,6 +1360,19 @@ struct buf_pool_struct{ ...@@ -1359,6 +1360,19 @@ struct buf_pool_struct{
/*!< this is in the set state /*!< this is in the set state
when there is no flush batch when there is no flush batch
of the given type running */ of the given type running */
ib_rbt_t* flush_rbt; /* !< a red-black tree is used
exclusively during recovery to
speed up insertions in the
flush_list. This tree contains
blocks in order of
oldest_modification LSN and is
kept in sync with the
flush_list.
Each member of the tree MUST
also be on the flush_list.
This tree is relevant only in
recovery and is set to NULL
once the recovery is over. */
ulint freed_page_clock;/*!< a sequence number used ulint freed_page_clock;/*!< a sequence number used
to count the number of buffer to count the number of buffer
blocks removed from the end of blocks removed from the end of
......
...@@ -40,6 +40,16 @@ buf_flush_remove( ...@@ -40,6 +40,16 @@ buf_flush_remove(
/*=============*/ /*=============*/
buf_page_t* bpage); /*!< in: pointer to the block in question */ buf_page_t* bpage); /*!< in: pointer to the block in question */
/********************************************************************//** /********************************************************************//**
Relocates a buffer control block on the flush_list.
Note that it is assumed that the contents of bpage has already been
copied to dpage. */
UNIV_INTERN
void
buf_flush_relocate_on_flush_list(
/*=============================*/
buf_page_t* bpage, /*!< in/out: control block being moved */
buf_page_t* dpage); /*!< in/out: destination block */
/********************************************************************//**
Updates the flush system data structures when a write is completed. */ Updates the flush system data structures when a write is completed. */
UNIV_INTERN UNIV_INTERN
void void
...@@ -139,8 +149,8 @@ how much redo the workload is generating and at what rate. */ ...@@ -139,8 +149,8 @@ how much redo the workload is generating and at what rate. */
struct buf_flush_stat_struct struct buf_flush_stat_struct
{ {
ib_uint64_t redo; /**< amount of redo generated. */ ib_uint64_t redo; /*!< amount of redo generated. */
ulint n_flushed; /**< number of pages flushed. */ ulint n_flushed; /*!< number of pages flushed. */
}; };
/** Statistics for selecting flush rate of dirty pages. */ /** Statistics for selecting flush rate of dirty pages. */
...@@ -175,6 +185,22 @@ buf_flush_validate(void); ...@@ -175,6 +185,22 @@ buf_flush_validate(void);
/*====================*/ /*====================*/
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/******************************************************************//**
Initialize the red-black tree to speed up insertions into the flush_list
during recovery process. Should be called at the start of recovery
process before any page has been read/written. */
UNIV_INTERN
void
buf_flush_init_flush_rbt(void);
/*==========================*/
/******************************************************************//**
Frees up the red-black tree. */
UNIV_INTERN
void
buf_flush_free_flush_rbt(void);
/*==========================*/
/** When buf_flush_free_margin is called, it tries to make this many blocks /** When buf_flush_free_margin is called, it tries to make this many blocks
available to replacement in the free list and at the end of the LRU list (to available to replacement in the free list and at the end of the LRU list (to
make sure that a read-ahead batch can be read efficiently in a single make sure that a read-ahead batch can be read efficiently in a single
......
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/*******************************************************************//**
@file include/ut0rbt.h
Red-Black tree implementation.
Created 2007-03-20 Sunny Bains
************************************************************************/
#ifndef INNOBASE_UT0RBT_H
#define INNOBASE_UT0RBT_H
#if !defined(IB_RBT_TESTING)
#include "univ.i"
#include "ut0mem.h"
#else
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define ut_malloc malloc
#define ut_free free
#define ulint unsigned long
#define ut_a(c) assert(c)
#define ut_error assert(0)
#define ibool unsigned int
#define TRUE 1
#define FALSE 0
#endif
/* Red black tree typedefs */
typedef struct ib_rbt_struct ib_rbt_t;
typedef struct ib_rbt_node_struct ib_rbt_node_t;
/* FIXME: Iterator is a better name than _bound_ */
typedef struct ib_rbt_bound_struct ib_rbt_bound_t;
typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
/* Red black tree color types */
enum ib_rbt_color_enum {
IB_RBT_RED,
IB_RBT_BLACK
};
typedef enum ib_rbt_color_enum ib_rbt_color_t;
/* Red black tree node */
struct ib_rbt_node_struct {
ib_rbt_color_t color; /* color of this node */
ib_rbt_node_t* left; /* points left child */
ib_rbt_node_t* right; /* points right child */
ib_rbt_node_t* parent; /* points parent node */
char value[1]; /* Data value */
};
/* Red black tree instance.*/
struct ib_rbt_struct {
ib_rbt_node_t* nil; /* Black colored node that is
used as a sentinel. This is
pre-allocated too.*/
ib_rbt_node_t* root; /* Root of the tree, this is
pre-allocated and the first
data node is the left child.*/
ulint n_nodes; /* Total number of data nodes */
ib_rbt_compare compare; /* Fn. to use for comparison */
ulint sizeof_value; /* Sizeof the item in bytes */
};
/* The result of searching for a key in the tree, this is useful for
a speedy lookup and insert if key doesn't exist.*/
struct ib_rbt_bound_struct {
const ib_rbt_node_t*
last; /* Last node visited */
int result; /* Result of comparing with
the last non-nil node that
was visited */
};
/* Size in elements (t is an rb tree instance) */
#define rbt_size(t) (t->n_nodes)
/* Check whether the rb tree is empty (t is an rb tree instance) */
#define rbt_empty(t) (rbt_size(t) == 0)
/* Get data value (t is the data type, n is an rb tree node instance) */
#define rbt_value(t, n) ((t*) &n->value[0])
/* Compare a key with the node value (t is tree, k is key, n is node)*/
#define rbt_compare(t, k, n) (t->compare(k, n->value))
/****************************************************************//**
Free an instance of a red black tree */
UNIV_INTERN
void
rbt_free(
/*=====*/
ib_rbt_t* tree); /*!< in: rb tree to free */
/****************************************************************//**
Create an instance of a red black tree
@return rb tree instance */
UNIV_INTERN
ib_rbt_t*
rbt_create(
/*=======*/
size_t sizeof_value, /*!< in: size in bytes */
ib_rbt_compare compare); /*!< in: comparator */
/****************************************************************//**
Delete a node from the red black tree, identified by key.
@return TRUE if success FALSE if not found */
UNIV_INTERN
ibool
rbt_delete(
/*=======*/
ib_rbt_t* tree, /*!< in: rb tree */
const void* key); /*!< in: key to delete */
/****************************************************************//**
Remove a node from the rb tree, the node is not free'd, that is the
callers responsibility.
@return the deleted node with the const. */
UNIV_INTERN
ib_rbt_node_t*
rbt_remove_node(
/*============*/
ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t*
node); /*!< in: node to delete, this
is a fudge and declared const
because the caller has access
only to const nodes.*/
/****************************************************************//**
Find a matching node in the rb tree.
@return node if found else return NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_lookup(
/*=======*/
const ib_rbt_t* tree, /*!< in: rb tree to search */
const void* key); /*!< in: key to lookup */
/****************************************************************//**
Generic insert of a value in the rb tree.
@return inserted node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_insert(
/*=======*/
ib_rbt_t* tree, /*!< in: rb tree */
const void* key, /*!< in: key for ordering */
const void* value); /*!< in: data that will be
copied to the node.*/
/****************************************************************//**
Add a new node to the tree, useful for data that is pre-sorted.
@return appended node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_add_node(
/*=========*/
ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_bound_t* parent, /*!< in: parent */
const void* value); /*!< in: this value is copied
to the node */
/****************************************************************//**
Return the left most data node in the tree
@return left most node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_first(
/*======*/
const ib_rbt_t* tree); /*!< in: rb tree */
/****************************************************************//**
Return the right most data node in the tree
@return right most node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_last(
/*=====*/
const ib_rbt_t* tree); /*!< in: rb tree */
/****************************************************************//**
Return the next node from current.
@return successor node to current that is passed in. */
UNIV_INTERN
const ib_rbt_node_t*
rbt_next(
/*=====*/
const ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t* /*!< in: current node */
current);
/****************************************************************//**
Return the prev node from current.
@return precedessor node to current that is passed in */
UNIV_INTERN
const ib_rbt_node_t*
rbt_prev(
/*=====*/
const ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t* /*!< in: current node */
current);
/****************************************************************//**
Find the node that has the lowest key that is >= key.
@return node that satisfies the lower bound constraint or NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_lower_bound(
/*============*/
const ib_rbt_t* tree, /*!< in: rb tree */
const void* key); /*!< in: key to search */
/****************************************************************//**
Find the node that has the greatest key that is <= key.
@return node that satisifies the upper bound constraint or NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_upper_bound(
/*============*/
const ib_rbt_t* tree, /*!< in: rb tree */
const void* key); /*!< in: key to search */
/****************************************************************//**
Search for the key, a node will be retuned in parent.last, whether it
was found or not. If not found then parent.last will contain the
parent node for the possibly new key otherwise the matching node.
@return result of last comparison */
UNIV_INTERN
int
rbt_search(
/*=======*/
const ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_bound_t* parent, /*!< in: search bounds */
const void* key); /*!< in: key to search */
/****************************************************************//**
Search for the key, a node will be retuned in parent.last, whether it
was found or not. If not found then parent.last will contain the
parent node for the possibly new key otherwise the matching node.
@return result of last comparison */
UNIV_INTERN
int
rbt_search_cmp(
/*===========*/
const ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_bound_t* parent, /*!< in: search bounds */
const void* key, /*!< in: key to search */
ib_rbt_compare compare); /*!< in: comparator */
/****************************************************************//**
Clear the tree, deletes (and free's) all the nodes. */
UNIV_INTERN
void
rbt_clear(
/*======*/
ib_rbt_t* tree); /*!< in: rb tree */
/****************************************************************//**
Merge the node from dst into src. Return the number of nodes merged.
@return no. of recs merged */
UNIV_INTERN
ulint
rbt_merge_uniq(
/*===========*/
ib_rbt_t* dst, /*!< in: dst rb tree */
const ib_rbt_t* src); /*!< in: src rb tree */
/****************************************************************//**
Merge the node from dst into src. Return the number of nodes merged.
Delete the nodes from src after copying node to dst. As a side effect
the duplicates will be left untouched in the src, since we don't support
duplicates (yet). NOTE: src and dst must be similar, the function doesn't
check for this condition (yet).
@return no. of recs merged */
UNIV_INTERN
ulint
rbt_merge_uniq_destructive(
/*=======================*/
ib_rbt_t* dst, /*!< in: dst rb tree */
ib_rbt_t* src); /*!< in: src rb tree */
/****************************************************************//**
Verify the integrity of the RB tree. For debugging. 0 failure else height
of tree (in count of black nodes).
@return TRUE if OK FALSE if tree invalid. */
UNIV_INTERN
ibool
rbt_validate(
/*=========*/
const ib_rbt_t* tree); /*!< in: tree to validate */
/****************************************************************//**
Iterate over the tree in depth first order. */
UNIV_INTERN
void
rbt_print(
/*======*/
const ib_rbt_t* tree, /*!< in: tree to traverse */
ib_rbt_print_node print); /*!< in: print function */
#endif /* INNOBASE_UT0RBT_H */
...@@ -138,7 +138,9 @@ UNIV_INTERN ulint recv_max_parsed_page_no; ...@@ -138,7 +138,9 @@ UNIV_INTERN ulint recv_max_parsed_page_no;
/** This many frames must be left free in the buffer pool when we scan /** This many frames must be left free in the buffer pool when we scan
the log and store the scanned log records in the buffer pool: we will the log and store the scanned log records in the buffer pool: we will
use these free frames to read in pages when we start applying the use these free frames to read in pages when we start applying the
log records to the database. */ log records to the database.
This is the default value. If the actual size of the buffer pool is
larger than 10 MB we'll set this value to 512. */
UNIV_INTERN ulint recv_n_pool_free_frames; UNIV_INTERN ulint recv_n_pool_free_frames;
/** The maximum lsn we see for a page during the recovery process. If this /** The maximum lsn we see for a page during the recovery process. If this
...@@ -294,6 +296,12 @@ recv_sys_init( ...@@ -294,6 +296,12 @@ recv_sys_init(
return; return;
} }
/* Initialize red-black tree for fast insertions into the
flush_list during recovery process.
As this initialization is done while holding the buffer pool
mutex we perform it before acquiring recv_sys->mutex. */
buf_flush_init_flush_rbt();
mutex_enter(&(recv_sys->mutex)); mutex_enter(&(recv_sys->mutex));
#ifndef UNIV_HOTBACKUP #ifndef UNIV_HOTBACKUP
...@@ -303,6 +311,12 @@ recv_sys_init( ...@@ -303,6 +311,12 @@ recv_sys_init(
recv_is_from_backup = TRUE; recv_is_from_backup = TRUE;
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
/* Set appropriate value of recv_n_pool_free_frames. */
if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) {
/* Buffer pool of size greater than 10 MB. */
recv_n_pool_free_frames = 512;
}
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE); recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
recv_sys->len = 0; recv_sys->len = 0;
recv_sys->recovered_offset = 0; recv_sys->recovered_offset = 0;
...@@ -372,6 +386,9 @@ recv_sys_debug_free(void) ...@@ -372,6 +386,9 @@ recv_sys_debug_free(void)
recv_sys->last_block_buf_start = NULL; recv_sys->last_block_buf_start = NULL;
mutex_exit(&(recv_sys->mutex)); mutex_exit(&(recv_sys->mutex));
/* Free up the flush_rbt. */
buf_flush_free_flush_rbt();
} }
# endif /* UNIV_LOG_DEBUG */ # endif /* UNIV_LOG_DEBUG */
......
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/*******************************************************************//**
@file ut/ut0rbt.c
Red-Black tree implementation
Created 2007-03-20 Sunny Bains
***********************************************************************/
#include "ut0rbt.h"
/************************************************************************
Definition of a red-black tree
==============================
A red-black tree is a binary search tree which has the following
red-black properties:
1. Every node is either red or black.
2. Every leaf (NULL - in our case tree->nil) is black.
3. If a node is red, then both its children are black.
4. Every simple path from a node to a descendant leaf contains the
same number of black nodes.
from (3) above, the implication is that on any path from the root
to a leaf, red nodes must not be adjacent.
However, any number of black nodes may appear in a sequence. */
#if defined(IB_RBT_TESTING)
#warning "Testing enabled!"
#endif
#define ROOT(t) (t->root->left)
#define SIZEOF_NODE(t) ((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1)
/****************************************************************//**
Print out the sub-tree recursively. */
static
void
rbt_print_subtree(
/*==============*/
const ib_rbt_t* tree, /*!< in: tree to traverse */
const ib_rbt_node_t* node, /*!< in: node to print */
ib_rbt_print_node print) /*!< in: print key function */
{
/* FIXME: Doesn't do anything yet */
if (node != tree->nil) {
print(node);
rbt_print_subtree(tree, node->left, print);
rbt_print_subtree(tree, node->right, print);
}
}
/****************************************************************//**
Verify that the keys are in order.
@return TRUE of OK. FALSE if not ordered */
static
ibool
rbt_check_ordering(
/*===============*/
const ib_rbt_t* tree) /*!< in: tree to verfify */
{
const ib_rbt_node_t* node;
const ib_rbt_node_t* prev = NULL;
/* Iterate over all the nodes, comparing each node with the prev */
for (node = rbt_first(tree); node; node = rbt_next(tree, prev)) {
if (prev && tree->compare(prev->value, node->value) >= 0) {
return(FALSE);
}
prev = node;
}
return(TRUE);
}
/****************************************************************//**
Check that every path from the root to the leaves has the same count.
Count is expressed in the number of black nodes.
@return 0 on failure else black height of the subtree */
static
ibool
rbt_count_black_nodes(
/*==================*/
const ib_rbt_t* tree, /*!< in: tree to verify */
const ib_rbt_node_t* node) /*!< in: start of sub-tree */
{
ulint result;
if (node != tree->nil) {
ulint left_height = rbt_count_black_nodes(tree, node->left);
ulint right_height = rbt_count_black_nodes(tree, node->right);
if (left_height == 0
|| right_height == 0
|| left_height != right_height) {
result = 0;
} else if (node->color == IB_RBT_RED) {
/* Case 3 */
if (node->left->color != IB_RBT_BLACK
|| node->right->color != IB_RBT_BLACK) {
result = 0;
} else {
result = left_height;
}
/* Check if it's anything other than RED or BLACK. */
} else if (node->color != IB_RBT_BLACK) {
result = 0;
} else {
result = right_height + 1;
}
} else {
result = 1;
}
return(result);
}
/****************************************************************//**
Turn the node's right child's left sub-tree into node's right sub-tree.
This will also make node's right child it's parent. */
static
void
rbt_rotate_left(
/*============*/
const ib_rbt_node_t* nil, /*!< in: nil node of the tree */
ib_rbt_node_t* node) /*!< in: node to rotate */
{
ib_rbt_node_t* right = node->right;
node->right = right->left;
if (right->left != nil) {
right->left->parent = node;
}
/* Right's new parent was node's parent. */
right->parent = node->parent;
/* Since root's parent is tree->nil and root->parent->left points
back to root, we can avoid the check. */
if (node == node->parent->left) {
/* Node was on the left of its parent. */
node->parent->left = right;
} else {
/* Node must have been on the right. */
node->parent->right = right;
}
/* Finally, put node on right's left. */
right->left = node;
node->parent = right;
}
/****************************************************************//**
Turn the node's left child's right sub-tree into node's left sub-tree.
This also make node's left child it's parent. */
static
void
rbt_rotate_right(
/*=============*/
const ib_rbt_node_t* nil, /*!< in: nil node of tree */
ib_rbt_node_t* node) /*!< in: node to rotate */
{
ib_rbt_node_t* left = node->left;
node->left = left->right;
if (left->right != nil) {
left->right->parent = node;
}
/* Left's new parent was node's parent. */
left->parent = node->parent;
/* Since root's parent is tree->nil and root->parent->left points
back to root, we can avoid the check. */
if (node == node->parent->right) {
/* Node was on the left of its parent. */
node->parent->right = left;
} else {
/* Node must have been on the left. */
node->parent->left = left;
}
/* Finally, put node on left's right. */
left->right = node;
node->parent = left;
}
/****************************************************************//**
Append a node to the tree.
@return inserted node */
static
ib_rbt_node_t*
rbt_tree_add_child(
/*===============*/
const ib_rbt_t* tree, /*!< in: rbt tree */
ib_rbt_bound_t* parent, /*!< in: node's parent */
ib_rbt_node_t* node) /*!< in: node to add */
{
/* Cast away the const. */
ib_rbt_node_t* last = (ib_rbt_node_t*) parent->last;
if (last == tree->root || parent->result < 0) {
last->left = node;
} else {
/* FIXME: We don't handle duplicates (yet)! */
ut_a(parent->result != 0);
last->right = node;
}
node->parent = last;
return(node);
}
/****************************************************************//**
Generic binary tree insert
@return inserted node */
static
ib_rbt_node_t*
rbt_tree_insert(
/*============*/
ib_rbt_t* tree, /*!< in: rb tree */
const void* key, /*!< in: key for ordering */
ib_rbt_node_t* node) /*!< in: node hold the insert value */
{
ib_rbt_bound_t parent;
ib_rbt_node_t* current = ROOT(tree);
parent.result = 0;
parent.last = tree->root;
/* Regular binary search. */
while (current != tree->nil) {
parent.last = current;
parent.result = tree->compare(key, current->value);
if (parent.result < 0) {
current = current->left;
} else {
current = current->right;
}
}
ut_a(current == tree->nil);
rbt_tree_add_child(tree, &parent, node);
return(node);
}
/****************************************************************//**
Balance a tree after inserting a node. */
static
void
rbt_balance_tree(
/*=============*/
const ib_rbt_t* tree, /*!< in: tree to balance */
ib_rbt_node_t* node) /*!< in: node that was inserted */
{
const ib_rbt_node_t* nil = tree->nil;
ib_rbt_node_t* parent = node->parent;
/* Restore the red-black property. */
node->color = IB_RBT_RED;
while (node != ROOT(tree) && parent->color == IB_RBT_RED) {
ib_rbt_node_t* grand_parent = parent->parent;
if (parent == grand_parent->left) {
ib_rbt_node_t* uncle = grand_parent->right;
if (uncle->color == IB_RBT_RED) {
/* Case 1 - change the colors. */
uncle->color = IB_RBT_BLACK;
parent->color = IB_RBT_BLACK;
grand_parent->color = IB_RBT_RED;
/* Move node up the tree. */
node = grand_parent;
} else {
if (node == parent->right) {
/* Right is a black node and node is
to the right, case 2 - move node
up and rotate. */
node = parent;
rbt_rotate_left(nil, node);
}
grand_parent = node->parent->parent;
/* Case 3. */
node->parent->color = IB_RBT_BLACK;
grand_parent->color = IB_RBT_RED;
rbt_rotate_right(nil, grand_parent);
}
} else {
ib_rbt_node_t* uncle = grand_parent->left;
if (uncle->color == IB_RBT_RED) {
/* Case 1 - change the colors. */
uncle->color = IB_RBT_BLACK;
parent->color = IB_RBT_BLACK;
grand_parent->color = IB_RBT_RED;
/* Move node up the tree. */
node = grand_parent;
} else {
if (node == parent->left) {
/* Left is a black node and node is to
the right, case 2 - move node up and
rotate. */
node = parent;
rbt_rotate_right(nil, node);
}
grand_parent = node->parent->parent;
/* Case 3. */
node->parent->color = IB_RBT_BLACK;
grand_parent->color = IB_RBT_RED;
rbt_rotate_left(nil, grand_parent);
}
}
parent = node->parent;
}
/* Color the root black. */
ROOT(tree)->color = IB_RBT_BLACK;
}
/****************************************************************//**
Find the given node's successor.
@return successor node or NULL if no successor */
static
ib_rbt_node_t*
rbt_find_successor(
/*===============*/
const ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t* current)/*!< in: this is declared const
because it can be called via
rbt_next() */
{
const ib_rbt_node_t* nil = tree->nil;
ib_rbt_node_t* next = current->right;
/* Is there a sub-tree to the right that we can follow. */
if (next != nil) {
/* Follow the left most links of the current right child. */
while (next->left != nil) {
next = next->left;
}
} else { /* We will have to go up the tree to find the successor. */
ib_rbt_node_t* parent = current->parent;
/* Cast away the const. */
next = (ib_rbt_node_t*) current;
while (parent != tree->root && next == parent->right) {
next = parent;
parent = next->parent;
}
next = (parent == tree->root) ? NULL : parent;
}
return(next);
}
/****************************************************************//**
Find the given node's precedecessor.
@return predecessor node or NULL if no predecesor */
static
ib_rbt_node_t*
rbt_find_predecessor(
/*=================*/
const ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t* current) /*!< in: this is declared const
because it can be called via
rbt_prev() */
{
const ib_rbt_node_t* nil = tree->nil;
ib_rbt_node_t* prev = current->left;
/* Is there a sub-tree to the left that we can follow. */
if (prev != nil) {
/* Follow the right most links of the current left child. */
while (prev->right != nil) {
prev = prev->right;
}
} else { /* We will have to go up the tree to find the precedecessor. */
ib_rbt_node_t* parent = current->parent;
/* Cast away the const. */
prev = (ib_rbt_node_t*)current;
while (parent != tree->root && prev == parent->left) {
prev = parent;
parent = prev->parent;
}
prev = (parent == tree->root) ? NULL : parent;
}
return(prev);
}
/****************************************************************//**
Replace node with child. After applying transformations eject becomes
an orphan. */
static
void
rbt_eject_node(
/*===========*/
ib_rbt_node_t* eject, /*!< in: node to eject */
ib_rbt_node_t* node) /*!< in: node to replace with */
{
/* Update the to be ejected node's parent's child pointers. */
if (eject->parent->left == eject) {
eject->parent->left = node;
} else if (eject->parent->right == eject) {
eject->parent->right = node;
} else {
ut_a(0);
}
/* eject is now an orphan but otherwise its pointers
and color are left intact. */
node->parent = eject->parent;
}
/****************************************************************//**
Replace a node with another node. */
static
void
rbt_replace_node(
/*=============*/
ib_rbt_node_t* replace, /*!< in: node to replace */
ib_rbt_node_t* node) /*!< in: node to replace with */
{
ib_rbt_color_t color = node->color;
/* Update the node pointers. */
node->left = replace->left;
node->right = replace->right;
/* Update the child node pointers. */
node->left->parent = node;
node->right->parent = node;
/* Make the parent of replace point to node. */
rbt_eject_node(replace, node);
/* Swap the colors. */
node->color = replace->color;
replace->color = color;
}
/****************************************************************//**
Detach node from the tree replacing it with one of it's children.
@return the child node that now occupies the position of the detached node */
static
ib_rbt_node_t*
rbt_detach_node(
/*============*/
const ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_node_t* node) /*!< in: node to detach */
{
ib_rbt_node_t* child;
const ib_rbt_node_t* nil = tree->nil;
if (node->left != nil && node->right != nil) {
/* Case where the node to be deleted has two children. */
ib_rbt_node_t* successor = rbt_find_successor(tree, node);
ut_a(successor != nil);
ut_a(successor->parent != nil);
ut_a(successor->left == nil);
child = successor->right;
/* Remove the successor node and replace with its child. */
rbt_eject_node(successor, child);
/* Replace the node to delete with its successor node. */
rbt_replace_node(node, successor);
} else {
ut_a(node->left == nil || node->right == nil);
child = (node->left != nil) ? node->left : node->right;
/* Replace the node to delete with one of it's children. */
rbt_eject_node(node, child);
}
/* Reset the node links. */
node->parent = node->right = node->left = tree->nil;
return(child);
}
/****************************************************************//**
Rebalance the right sub-tree after deletion.
@return node to rebalance if more rebalancing required else NULL */
static
ib_rbt_node_t*
rbt_balance_right(
/*==============*/
const ib_rbt_node_t* nil, /*!< in: rb tree nil node */
ib_rbt_node_t* parent, /*!< in: parent node */
ib_rbt_node_t* sibling)/*!< in: sibling node */
{
ib_rbt_node_t* node = NULL;
ut_a(sibling != nil);
/* Case 3. */
if (sibling->color == IB_RBT_RED) {
parent->color = IB_RBT_RED;
sibling->color = IB_RBT_BLACK;
rbt_rotate_left(nil, parent);
sibling = parent->right;
ut_a(sibling != nil);
}
/* Since this will violate case 3 because of the change above. */
if (sibling->left->color == IB_RBT_BLACK
&& sibling->right->color == IB_RBT_BLACK) {
node = parent; /* Parent needs to be rebalanced too. */
sibling->color = IB_RBT_RED;
} else {
if (sibling->right->color == IB_RBT_BLACK) {
ut_a(sibling->left->color == IB_RBT_RED);
sibling->color = IB_RBT_RED;
sibling->left->color = IB_RBT_BLACK;
rbt_rotate_right(nil, sibling);
sibling = parent->right;
ut_a(sibling != nil);
}
sibling->color = parent->color;
sibling->right->color = IB_RBT_BLACK;
parent->color = IB_RBT_BLACK;
rbt_rotate_left(nil, parent);
}
return(node);
}
/****************************************************************//**
Rebalance the left sub-tree after deletion.
@return node to rebalance if more rebalancing required else NULL */
static
ib_rbt_node_t*
rbt_balance_left(
/*=============*/
const ib_rbt_node_t* nil, /*!< in: rb tree nil node */
ib_rbt_node_t* parent, /*!< in: parent node */
ib_rbt_node_t* sibling)/*!< in: sibling node */
{
ib_rbt_node_t* node = NULL;
ut_a(sibling != nil);
/* Case 3. */
if (sibling->color == IB_RBT_RED) {
parent->color = IB_RBT_RED;
sibling->color = IB_RBT_BLACK;
rbt_rotate_right(nil, parent);
sibling = parent->left;
ut_a(sibling != nil);
}
/* Since this will violate case 3 because of the change above. */
if (sibling->right->color == IB_RBT_BLACK
&& sibling->left->color == IB_RBT_BLACK) {
node = parent; /* Parent needs to be rebalanced too. */
sibling->color = IB_RBT_RED;
} else {
if (sibling->left->color == IB_RBT_BLACK) {
ut_a(sibling->right->color == IB_RBT_RED);
sibling->color = IB_RBT_RED;
sibling->right->color = IB_RBT_BLACK;
rbt_rotate_left(nil, sibling);
sibling = parent->left;
ut_a(sibling != nil);
}
sibling->color = parent->color;
sibling->left->color = IB_RBT_BLACK;
parent->color = IB_RBT_BLACK;
rbt_rotate_right(nil, parent);
}
return(node);
}
/****************************************************************//**
Delete the node and rebalance the tree if necessary */
static
void
rbt_remove_node_and_rebalance(
/*==========================*/
ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_node_t* node) /*!< in: node to remove */
{
/* Detach node and get the node that will be used
as rebalance start. */
ib_rbt_node_t* child = rbt_detach_node(tree, node);
if (node->color == IB_RBT_BLACK) {
ib_rbt_node_t* last = child;
ROOT(tree)->color = IB_RBT_RED;
while (child && child->color == IB_RBT_BLACK) {
ib_rbt_node_t* parent = child->parent;
/* Did the deletion cause an imbalance in the
parents left sub-tree. */
if (parent->left == child) {
child = rbt_balance_right(
tree->nil, parent, parent->right);
} else if (parent->right == child) {
child = rbt_balance_left(
tree->nil, parent, parent->left);
} else {
ut_error;
}
if (child) {
last = child;
}
}
ut_a(last);
last->color = IB_RBT_BLACK;
ROOT(tree)->color = IB_RBT_BLACK;
}
/* Note that we have removed a node from the tree. */
--tree->n_nodes;
}
/****************************************************************//**
Recursively free the nodes. */
static
void
rbt_free_node(
/*==========*/
ib_rbt_node_t* node, /*!< in: node to free */
ib_rbt_node_t* nil) /*!< in: rb tree nil node */
{
if (node != nil) {
rbt_free_node(node->left, nil);
rbt_free_node(node->right, nil);
ut_free(node);
}
}
/****************************************************************//**
Free all the nodes and free the tree. */
UNIV_INTERN
void
rbt_free(
/*=====*/
ib_rbt_t* tree) /*!< in: rb tree to free */
{
rbt_free_node(tree->root, tree->nil);
ut_free(tree->nil);
ut_free(tree);
}
/****************************************************************//**
Create an instance of a red black tree.
@return an empty rb tree */
UNIV_INTERN
ib_rbt_t*
rbt_create(
/*=======*/
size_t sizeof_value, /*!< in: sizeof data item */
ib_rbt_compare compare) /*!< in: fn to compare items */
{
ib_rbt_t* tree;
ib_rbt_node_t* node;
tree = (ib_rbt_t*) ut_malloc(sizeof(*tree));
memset(tree, 0, sizeof(*tree));
tree->sizeof_value = sizeof_value;
/* Create the sentinel (NIL) node. */
node = tree->nil = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
memset(node, 0, sizeof(*node));
node->color = IB_RBT_BLACK;
node->parent = node->left = node->right = node;
/* Create the "fake" root, the real root node will be the
left child of this node. */
node = tree->root = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
memset(node, 0, sizeof(*node));
node->color = IB_RBT_BLACK;
node->parent = node->left = node->right = tree->nil;
tree->compare = compare;
return(tree);
}
/****************************************************************//**
Generic insert of a value in the rb tree.
@return inserted node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_insert(
/*=======*/
ib_rbt_t* tree, /*!< in: rb tree */
const void* key, /*!< in: key for ordering */
const void* value) /*!< in: value of key, this value
is copied to the node */
{
ib_rbt_node_t* node;
/* Create the node that will hold the value data. */
node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
memcpy(node->value, value, tree->sizeof_value);
node->parent = node->left = node->right = tree->nil;
/* Insert in the tree in the usual way. */
rbt_tree_insert(tree, key, node);
rbt_balance_tree(tree, node);
++tree->n_nodes;
return(node);
}
/****************************************************************//**
Add a new node to the tree, useful for data that is pre-sorted.
@return appended node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_add_node(
/*=========*/
ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_bound_t* parent, /*!< in: bounds */
const void* value) /*!< in: this value is copied
to the node */
{
ib_rbt_node_t* node;
/* Create the node that will hold the value data */
node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
memcpy(node->value, value, tree->sizeof_value);
node->parent = node->left = node->right = tree->nil;
/* If tree is empty */
if (parent->last == NULL) {
parent->last = tree->root;
}
/* Append the node, the hope here is that the caller knows
what s/he is doing. */
rbt_tree_add_child(tree, parent, node);
rbt_balance_tree(tree, node);
++tree->n_nodes;
#if defined(IB_RBT_TESTING)
ut_a(rbt_validate(tree));
#endif
return(node);
}
/****************************************************************//**
Find a matching node in the rb tree.
@return NULL if not found else the node where key was found */
UNIV_INTERN
const ib_rbt_node_t*
rbt_lookup(
/*=======*/
const ib_rbt_t* tree, /*!< in: rb tree */
const void* key) /*!< in: key to use for search */
{
const ib_rbt_node_t* current = ROOT(tree);
/* Regular binary search. */
while (current != tree->nil) {
int result = tree->compare(key, current->value);
if (result < 0) {
current = current->left;
} else if (result > 0) {
current = current->right;
} else {
break;
}
}
return(current != tree->nil ? current : NULL);
}
/****************************************************************//**
Delete a node from the red black tree, identified by key.
@return TRUE if success FALSE if not found */
UNIV_INTERN
ibool
rbt_delete(
/*=======*/
ib_rbt_t* tree, /*!< in: rb tree */
const void* key) /*!< in: key to delete */
{
ibool deleted = FALSE;
ib_rbt_node_t* node = (ib_rbt_node_t*) rbt_lookup(tree, key);
if (node) {
rbt_remove_node_and_rebalance(tree, node);
ut_free(node);
deleted = TRUE;
}
return(deleted);
}
/****************************************************************//**
Remove a node from the rb tree, the node is not free'd, that is the
callers responsibility.
@return deleted node but without the const */
UNIV_INTERN
ib_rbt_node_t*
rbt_remove_node(
/*============*/
ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t* const_node) /*!< in: node to delete, this
is a fudge and declared const
because the caller can access
only const nodes */
{
/* Cast away the const. */
rbt_remove_node_and_rebalance(tree, (ib_rbt_node_t*) const_node);
/* This is to make it easier to do something like this:
ut_free(rbt_remove_node(node));
*/
return((ib_rbt_node_t*) const_node);
}
/****************************************************************//**
Find the node that has the lowest key that is >= key.
@return node satisfying the lower bound constraint or NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_lower_bound(
/*============*/
const ib_rbt_t* tree, /*!< in: rb tree */
const void* key) /*!< in: key to search */
{
ib_rbt_node_t* lb_node = NULL;
ib_rbt_node_t* current = ROOT(tree);
while (current != tree->nil) {
int result = tree->compare(key, current->value);
if (result > 0) {
current = current->right;
} else if (result < 0) {
lb_node = current;
current = current->left;
} else {
lb_node = current;
break;
}
}
return(lb_node);
}
/****************************************************************//**
Find the node that has the greatest key that is <= key.
@return node satisfying the upper bound constraint or NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_upper_bound(
/*============*/
const ib_rbt_t* tree, /*!< in: rb tree */
const void* key) /*!< in: key to search */
{
ib_rbt_node_t* ub_node = NULL;
ib_rbt_node_t* current = ROOT(tree);
while (current != tree->nil) {
int result = tree->compare(key, current->value);
if (result > 0) {
ub_node = current;
current = current->right;
} else if (result < 0) {
current = current->left;
} else {
ub_node = current;
break;
}
}
return(ub_node);
}
/****************************************************************//**
Find the node that has the greatest key that is <= key.
@return value of result */
UNIV_INTERN
int
rbt_search(
/*=======*/
const ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_bound_t* parent, /*!< in: search bounds */
const void* key) /*!< in: key to search */
{
ib_rbt_node_t* current = ROOT(tree);
/* Every thing is greater than the NULL root. */
parent->result = 1;
parent->last = NULL;
while (current != tree->nil) {
parent->last = current;
parent->result = tree->compare(key, current->value);
if (parent->result > 0) {
current = current->right;
} else if (parent->result < 0) {
current = current->left;
} else {
break;
}
}
return(parent->result);
}
/****************************************************************//**
Find the node that has the greatest key that is <= key. But use the
supplied comparison function.
@return value of result */
UNIV_INTERN
int
rbt_search_cmp(
/*===========*/
const ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_bound_t* parent, /*!< in: search bounds */
const void* key, /*!< in: key to search */
ib_rbt_compare compare) /*!< in: fn to compare items */
{
ib_rbt_node_t* current = ROOT(tree);
/* Every thing is greater than the NULL root. */
parent->result = 1;
parent->last = NULL;
while (current != tree->nil) {
parent->last = current;
parent->result = compare(key, current->value);
if (parent->result > 0) {
current = current->right;
} else if (parent->result < 0) {
current = current->left;
} else {
break;
}
}
return(parent->result);
}
/****************************************************************//**
Get the leftmost node.
Return the left most node in the tree. */
UNIV_INTERN
const ib_rbt_node_t*
rbt_first(
/*======*/
const ib_rbt_t* tree) /* in: rb tree */
{
ib_rbt_node_t* first = NULL;
ib_rbt_node_t* current = ROOT(tree);
while (current != tree->nil) {
first = current;
current = current->left;
}
return(first);
}
/****************************************************************//**
Return the right most node in the tree.
@return the rightmost node or NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_last(
/*=====*/
const ib_rbt_t* tree) /*!< in: rb tree */
{
ib_rbt_node_t* last = NULL;
ib_rbt_node_t* current = ROOT(tree);
while (current != tree->nil) {
last = current;
current = current->right;
}
return(last);
}
/****************************************************************//**
Return the next node.
@return node next from current */
UNIV_INTERN
const ib_rbt_node_t*
rbt_next(
/*=====*/
const ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t* current)/*!< in: current node */
{
return(current ? rbt_find_successor(tree, current) : NULL);
}
/****************************************************************//**
Return the previous node.
@return node prev from current */
UNIV_INTERN
const ib_rbt_node_t*
rbt_prev(
/*=====*/
const ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t* current)/*!< in: current node */
{
return(current ? rbt_find_predecessor(tree, current) : NULL);
}
/****************************************************************//**
Reset the tree. Delete all the nodes. */
UNIV_INTERN
void
rbt_clear(
/*======*/
ib_rbt_t* tree) /*!< in: rb tree */
{
rbt_free_node(ROOT(tree), tree->nil);
tree->n_nodes = 0;
tree->root->left = tree->root->right = tree->nil;
}
/****************************************************************//**
Merge the node from dst into src. Return the number of nodes merged.
@return no. of recs merged */
UNIV_INTERN
ulint
rbt_merge_uniq(
/*===========*/
ib_rbt_t* dst, /*!< in: dst rb tree */
const ib_rbt_t* src) /*!< in: src rb tree */
{
ib_rbt_bound_t parent;
ulint n_merged = 0;
const ib_rbt_node_t* src_node = rbt_first(src);
if (rbt_empty(src) || dst == src) {
return(0);
}
for (/* No op */; src_node; src_node = rbt_next(src, src_node)) {
if (rbt_search(dst, &parent, src_node->value) != 0) {
rbt_add_node(dst, &parent, src_node->value);
++n_merged;
}
}
return(n_merged);
}
/****************************************************************//**
Merge the node from dst into src. Return the number of nodes merged.
Delete the nodes from src after copying node to dst. As a side effect
the duplicates will be left untouched in the src.
@return no. of recs merged */
UNIV_INTERN
ulint
rbt_merge_uniq_destructive(
/*=======================*/
ib_rbt_t* dst, /*!< in: dst rb tree */
ib_rbt_t* src) /*!< in: src rb tree */
{
ib_rbt_bound_t parent;
ib_rbt_node_t* src_node;
ulint old_size = rbt_size(dst);
if (rbt_empty(src) || dst == src) {
return(0);
}
for (src_node = (ib_rbt_node_t*) rbt_first(src); src_node; /* */) {
ib_rbt_node_t* prev = src_node;
src_node = (ib_rbt_node_t*)rbt_next(src, prev);
/* Skip duplicates. */
if (rbt_search(dst, &parent, prev->value) != 0) {
/* Remove and reset the node but preserve
the node (data) value. */
rbt_remove_node_and_rebalance(src, prev);
/* The nil should be taken from the dst tree. */
prev->parent = prev->left = prev->right = dst->nil;
rbt_tree_add_child(dst, &parent, prev);
rbt_balance_tree(dst, prev);
++dst->n_nodes;
}
}
#if defined(IB_RBT_TESTING)
ut_a(rbt_validate(dst));
ut_a(rbt_validate(src));
#endif
return(rbt_size(dst) - old_size);
}
/****************************************************************//**
Check that every path from the root to the leaves has the same count and
the tree nodes are in order.
@return TRUE if OK FALSE otherwise */
UNIV_INTERN
ibool
rbt_validate(
/*=========*/
const ib_rbt_t* tree) /*!< in: RB tree to validate */
{
if (rbt_count_black_nodes(tree, ROOT(tree)) > 0) {
return(rbt_check_ordering(tree));
}
return(FALSE);
}
/****************************************************************//**
Iterate over the tree in depth first order. */
UNIV_INTERN
void
rbt_print(
/*======*/
const ib_rbt_t* tree, /*!< in: tree to traverse */
ib_rbt_print_node print) /*!< in: print function */
{
rbt_print_subtree(tree, ROOT(tree), print);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment