Commit ed61d47e authored by unknown's avatar unknown

btr0sea.c, buf0lru.c, buf0buf.c, ha0ha.c, hash0hash.h, ha0ha.h, buf0buf.h:

  Link adaptive hash index entries to the buffer page, so that we can remove them quickly without knowing the record structure on that page; this was requested by Marko for the compact InnoDB table format; note that the adaptive hash index memory overhead grows by 67 %, maybe we have to tune this later somehow


innobase/include/buf0buf.h:
  Link adaptive hash index entries to the buffer page, so that we can remove them quickly without knowing the record structure on that page; this was requested by Marko for the compact InnoDB table format; note that the adaptive hash index overhead grows by 67 %, maybe we have to tune this later somehow
innobase/include/ha0ha.h:
  Link adaptive hash index entries to the buffer page, so that we can remove them quickly without knowing the record structure on that page; this was requested by Marko for the compact InnoDB table format; note that the adaptive hash index overhead grows by 67 %, maybe we have to tune this later somehow
innobase/include/hash0hash.h:
  Link adaptive hash index entries to the buffer page, so that we can remove them quickly without knowing the record structure on that page; this was requested by Marko for the compact InnoDB table format; note that the adaptive hash index overhead grows by 67 %, maybe we have to tune this later somehow
innobase/ha/ha0ha.c:
  Link adaptive hash index entries to the buffer page, so that we can remove them quickly without knowing the record structure on that page; this was requested by Marko for the compact InnoDB table format; note that the adaptive hash index overhead grows by 67 %, maybe we have to tune this later somehow
innobase/buf/buf0buf.c:
  Link adaptive hash index entries to the buffer page, so that we can remove them quickly without knowing the record structure on that page; this was requested by Marko for the compact InnoDB table format; note that the adaptive hash index overhead grows by 67 %, maybe we have to tune this later somehow
innobase/buf/buf0lru.c:
  Link adaptive hash index entries to the buffer page, so that we can remove them quickly without knowing the record structure on that page; this was requested by Marko for the compact InnoDB table format; note that the adaptive hash index overhead grows by 67 %, maybe we have to tune this later somehow
innobase/btr/btr0sea.c:
  Link adaptive hash index entries to the buffer page, so that we can remove them quickly without knowing the record structure on that page; this was requested by Marko for the compact InnoDB table format; note that the adaptive hash index overhead grows by 67 %, maybe we have to tune this later somehow
parent 0a52a675
...@@ -915,17 +915,6 @@ btr_search_drop_page_hash_index( ...@@ -915,17 +915,6 @@ btr_search_drop_page_hash_index(
{ {
hash_table_t* table; hash_table_t* table;
buf_block_t* block; buf_block_t* block;
ulint n_fields;
ulint n_bytes;
rec_t* rec;
rec_t* sup;
ulint fold;
ulint prev_fold;
dulint tree_id;
ulint n_cached;
ulint n_recs;
ulint* folds;
ulint i;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
...@@ -951,72 +940,17 @@ btr_search_drop_page_hash_index( ...@@ -951,72 +940,17 @@ btr_search_drop_page_hash_index(
|| (block->buf_fix_count == 0)); || (block->buf_fix_count == 0));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
n_fields = block->curr_n_fields; ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
n_bytes = block->curr_n_bytes;
ut_a(n_fields + n_bytes > 0);
rw_lock_s_unlock(&btr_search_latch); rw_lock_s_unlock(&btr_search_latch);
n_recs = page_get_n_recs(page);
/* Calculate and cache fold values into an array for fast deletion
from the hash index */
folds = mem_alloc(n_recs * sizeof(ulint));
n_cached = 0;
sup = page_get_supremum_rec(page);
rec = page_get_infimum_rec(page);
rec = page_rec_get_next(rec);
if (rec != sup) {
ut_a(n_fields <= rec_get_n_fields(rec));
if (n_bytes > 0) {
ut_a(n_fields < rec_get_n_fields(rec));
}
}
tree_id = btr_page_get_index_id(page);
prev_fold = 0;
while (rec != sup) {
/* FIXME: in a mixed tree, not all records may have enough
ordering fields: */
fold = rec_fold(rec, n_fields, n_bytes, tree_id);
if (fold == prev_fold && prev_fold != 0) {
goto next_rec;
}
/* Remove all hash nodes pointing to this page from the
hash chain */
folds[n_cached] = fold;
n_cached++;
next_rec:
rec = page_rec_get_next(rec);
prev_fold = fold;
}
rw_lock_x_lock(&btr_search_latch); rw_lock_x_lock(&btr_search_latch);
for (i = 0; i < n_cached; i++) { ha_remove_all_nodes_to_page(table, page);
ha_remove_all_nodes_to_page(table, folds[i], page);
}
block->is_hashed = FALSE; block->is_hashed = FALSE;
rw_lock_x_unlock(&btr_search_latch); rw_lock_x_unlock(&btr_search_latch);
mem_free(folds);
} }
/************************************************************************ /************************************************************************
......
...@@ -465,6 +465,7 @@ buf_block_init( ...@@ -465,6 +465,7 @@ buf_block_init(
block->in_LRU_list = FALSE; block->in_LRU_list = FALSE;
block->n_pointers = 0; block->n_pointers = 0;
block->hash_nodes = NULL;
rw_lock_create(&(block->lock)); rw_lock_create(&(block->lock));
ut_ad(rw_lock_validate(&(block->lock))); ut_ad(rw_lock_validate(&(block->lock)));
......
...@@ -789,6 +789,7 @@ buf_LRU_block_free_non_file_page( ...@@ -789,6 +789,7 @@ buf_LRU_block_free_non_file_page(
|| (block->state == BUF_BLOCK_READY_FOR_USE)); || (block->state == BUF_BLOCK_READY_FOR_USE));
ut_a(block->n_pointers == 0); ut_a(block->n_pointers == 0);
ut_a(block->hash_nodes == NULL);
ut_a(!block->in_free_list); ut_a(!block->in_free_list);
block->state = BUF_BLOCK_NOT_USED; block->state = BUF_BLOCK_NOT_USED;
......
...@@ -65,10 +65,53 @@ ha_create( ...@@ -65,10 +65,53 @@ ha_create(
return(table); return(table);
} }
/*****************************************************************
Removes an adaptive hash index node from the doubly linked list of hash nodes
for the buffer block. */
UNIV_INLINE
void
ha_remove_buf_block_node(
/*=====================*/
buf_block_t* block, /* in: buffer block */
ha_node_t* node) /* in: an adaptive hash index node */
{
if (node == block->hash_nodes) {
block->hash_nodes = node->next_for_block;
}
if (node->prev_for_block != NULL) {
(node->prev_for_block)->next_for_block = node->next_for_block;
}
if (node->next_for_block != NULL) {
(node->next_for_block)->prev_for_block = node->prev_for_block;
}
}
/*****************************************************************
Adds an adaptive hash index node to the start of the doubly linked list of
hash nodes for the buffer block. */
UNIV_INLINE
void
ha_add_buf_block_node(
/*==================*/
buf_block_t* block, /* in: buffer block */
ha_node_t* node) /* in: an adaptive hash index node */
{
node->next_for_block = block->hash_nodes;
node->prev_for_block = NULL;
block->hash_nodes = node;
if (node->next_for_block != NULL) {
(node->next_for_block)->prev_for_block = node;
}
}
/***************************************************************** /*****************************************************************
Inserts an entry into a hash table. If an entry with the same fold number Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node is found, its node is updated to point to the new data, and no new node
is inserted. */ is inserted. This function is only used in the adaptive hash index. */
ibool ibool
ha_insert_for_fold( ha_insert_for_fold(
...@@ -84,6 +127,7 @@ ha_insert_for_fold( ...@@ -84,6 +127,7 @@ ha_insert_for_fold(
{ {
hash_cell_t* cell; hash_cell_t* cell;
ha_node_t* node; ha_node_t* node;
buf_block_t* block;
ha_node_t* prev_node; ha_node_t* prev_node;
buf_block_t* prev_block; buf_block_t* prev_block;
ulint hash; ulint hash;
...@@ -92,6 +136,9 @@ ha_insert_for_fold( ...@@ -92,6 +136,9 @@ ha_insert_for_fold(
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
block = buf_block_align(data);
hash = hash_calc_hash(fold, table); hash = hash_calc_hash(fold, table);
cell = hash_get_nth_cell(table, hash); cell = hash_get_nth_cell(table, hash);
...@@ -104,7 +151,15 @@ ha_insert_for_fold( ...@@ -104,7 +151,15 @@ ha_insert_for_fold(
prev_block = buf_block_align(prev_node->data); prev_block = buf_block_align(prev_node->data);
ut_a(prev_block->n_pointers > 0); ut_a(prev_block->n_pointers > 0);
prev_block->n_pointers--; prev_block->n_pointers--;
buf_block_align(data)->n_pointers++;
block->n_pointers++;
if (prev_block != block) {
ha_remove_buf_block_node(prev_block,
prev_node);
ha_add_buf_block_node(block,
prev_node);
}
} }
prev_node->data = data; prev_node->data = data;
...@@ -131,7 +186,9 @@ ha_insert_for_fold( ...@@ -131,7 +186,9 @@ ha_insert_for_fold(
ha_node_set_data(node, data); ha_node_set_data(node, data);
if (table->adaptive) { if (table->adaptive) {
buf_block_align(data)->n_pointers++; block->n_pointers++;
ha_add_buf_block_node(block, node);
} }
node->fold = fold; node->fold = fold;
...@@ -166,9 +223,15 @@ ha_delete_hash_node( ...@@ -166,9 +223,15 @@ ha_delete_hash_node(
hash_table_t* table, /* in: hash table */ hash_table_t* table, /* in: hash table */
ha_node_t* del_node) /* in: node to be deleted */ ha_node_t* del_node) /* in: node to be deleted */
{ {
buf_block_t* block;
if (table->adaptive) { if (table->adaptive) {
ut_a(buf_block_align(del_node->data)->n_pointers > 0); block = buf_block_align(del_node->data);
buf_block_align(del_node->data)->n_pointers--;
ut_a(block->n_pointers > 0);
block->n_pointers--;
ha_remove_buf_block_node(block, del_node);
} }
HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node); HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
...@@ -209,6 +272,8 @@ ha_search_and_update_if_found( ...@@ -209,6 +272,8 @@ ha_search_and_update_if_found(
void* data, /* in: pointer to the data */ void* data, /* in: pointer to the data */
void* new_data)/* in: new pointer to the data */ void* new_data)/* in: new pointer to the data */
{ {
buf_block_t* old_block;
buf_block_t* block;
ha_node_t* node; ha_node_t* node;
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
...@@ -220,8 +285,15 @@ ha_search_and_update_if_found( ...@@ -220,8 +285,15 @@ ha_search_and_update_if_found(
if (node) { if (node) {
if (table->adaptive) { if (table->adaptive) {
ut_a(buf_block_align(node->data)->n_pointers > 0); ut_a(buf_block_align(node->data)->n_pointers > 0);
buf_block_align(node->data)->n_pointers--;
buf_block_align(new_data)->n_pointers++; old_block = buf_block_align(node->data);
ut_a(old_block->n_pointers > 0);
old_block->n_pointers--;
ha_remove_buf_block_node(old_block, node);
block = buf_block_align(new_data);
block->n_pointers++;
ha_add_buf_block_node(block, node);
} }
node->data = new_data; node->data = new_data;
...@@ -236,43 +308,25 @@ void ...@@ -236,43 +308,25 @@ void
ha_remove_all_nodes_to_page( ha_remove_all_nodes_to_page(
/*========================*/ /*========================*/
hash_table_t* table, /* in: hash table */ hash_table_t* table, /* in: hash table */
ulint fold, /* in: fold value */
page_t* page) /* in: buffer page */ page_t* page) /* in: buffer page */
{ {
buf_block_t* block;
ha_node_t* node; ha_node_t* node;
#ifdef UNIV_SYNC_DEBUG block = buf_block_align(page);
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
#endif /* UNIV_SYNC_DEBUG */
node = ha_chain_get_first(table, fold);
while (node) { node = block->hash_nodes;
if (buf_frame_align(ha_node_get_data(node)) == page) {
while (node) {
/* Remove the hash node */ /* Remove the hash node */
ha_delete_hash_node(table, node); ha_delete_hash_node(table, node);
/* Start again from the first node in the chain node = block->hash_nodes;
because the deletion may compact the heap of
nodes and move other nodes! */
node = ha_chain_get_first(table, fold);
} else {
node = ha_chain_get_next(node);
}
} }
#ifdef UNIV_DEBUG
/* Check that all nodes really got deleted */
node = ha_chain_get_first(table, fold); ut_a(block->n_pointers == 0);
ut_a(block->hash_nodes == NULL);
while (node) {
ut_a(buf_frame_align(ha_node_get_data(node)) != page);
node = ha_chain_get_next(node);
}
#endif
} }
/***************************************************************** /*****************************************************************
...@@ -352,6 +406,7 @@ ha_print_info( ...@@ -352,6 +406,7 @@ ha_print_info(
n_bufs++; n_bufs++;
} }
fprintf(file, ", node heap has %lu buffer(s)\n", (ulong) n_bufs); fprintf(file, ", node heap has %lu buffer(s)\n",
(ulong) n_bufs);
} }
} }
...@@ -29,6 +29,7 @@ Created 11/5/1995 Heikki Tuuri ...@@ -29,6 +29,7 @@ Created 11/5/1995 Heikki Tuuri
#include "buf0types.h" #include "buf0types.h"
#include "sync0rw.h" #include "sync0rw.h"
#include "hash0hash.h" #include "hash0hash.h"
#include "ha0ha.h"
#include "ut0byte.h" #include "ut0byte.h"
#include "os0proc.h" #include "os0proc.h"
...@@ -817,7 +818,7 @@ struct buf_block_struct{ ...@@ -817,7 +818,7 @@ struct buf_block_struct{
records with the same prefix should be records with the same prefix should be
indexed in the hash index */ indexed in the hash index */
/* The following 4 fields are protected by btr_search_latch: */ /* The following 6 fields are protected by btr_search_latch: */
ibool is_hashed; /* TRUE if hash index has already been ibool is_hashed; /* TRUE if hash index has already been
built on this page; note that it does built on this page; note that it does
...@@ -834,6 +835,11 @@ struct buf_block_struct{ ...@@ -834,6 +835,11 @@ struct buf_block_struct{
ulint curr_side; /* BTR_SEARCH_LEFT_SIDE or ulint curr_side; /* BTR_SEARCH_LEFT_SIDE or
BTR_SEARCH_RIGHT_SIDE in hash BTR_SEARCH_RIGHT_SIDE in hash
indexing */ indexing */
ha_node_t* hash_nodes; /* a doubly linked list of hash nodes
for this buffer block; this points to
the first node in the list if any;
note that we do not use UT_LST_ macros
to manipulate this list */
/* 6. Debug fields */ /* 6. Debug fields */
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread rw_lock_t debug_latch; /* in the debug version, each thread
......
...@@ -54,7 +54,7 @@ ha_create( ...@@ -54,7 +54,7 @@ ha_create(
/***************************************************************** /*****************************************************************
Inserts an entry into a hash table. If an entry with the same fold number Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node is found, its node is updated to point to the new data, and no new node
is inserted. */ is inserted. This function is only used in the adaptive hash index. */
ibool ibool
ha_insert_for_fold( ha_insert_for_fold(
...@@ -111,7 +111,6 @@ void ...@@ -111,7 +111,6 @@ void
ha_remove_all_nodes_to_page( ha_remove_all_nodes_to_page(
/*========================*/ /*========================*/
hash_table_t* table, /* in: hash table */ hash_table_t* table, /* in: hash table */
ulint fold, /* in: fold value */
page_t* page); /* in: buffer page */ page_t* page); /* in: buffer page */
/***************************************************************** /*****************************************************************
Validates a hash table. */ Validates a hash table. */
...@@ -134,9 +133,18 @@ ha_print_info( ...@@ -134,9 +133,18 @@ ha_print_info(
typedef struct ha_node_struct ha_node_t; typedef struct ha_node_struct ha_node_t;
struct ha_node_struct { struct ha_node_struct {
ha_node_t* next; /* next chain node or NULL if none */ ha_node_t* next; /* next chain node; NULL if none */
void* data; /* pointer to the data */ void* data; /* pointer to the data */
ulint fold; /* fold value for the data */ ulint fold; /* fold value for the data */
ha_node_t* next_for_block;/* in an adaptive hash index
(btr0sea.c), a doubly linked list of hash
nodes for the buffer block; these nodes
contain pointers to index records on the
page; in the last node this field is NULL;
note that we do not use UT_LST_ macros
to manipulate this list */
ha_node_t* prev_for_block;/* pointer to the previous node; in the
first node NULL */
}; };
#ifndef UNIV_NONINL #ifndef UNIV_NONINL
......
...@@ -166,7 +166,7 @@ hash_get_n_cells( ...@@ -166,7 +166,7 @@ hash_get_n_cells(
/*********************************************************************** /***********************************************************************
Deletes a struct which is stored in the heap of the hash table, and compacts Deletes a struct which is stored in the heap of the hash table, and compacts
the heap. The fold value must be stored in the struct NODE in a field named the heap. The fold value must be stored in the struct NODE in a field named
'fold'. */ 'fold'. This macro is only used for the adaptive hash index. */
#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\ #define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
do {\ do {\
...@@ -191,11 +191,23 @@ do {\ ...@@ -191,11 +191,23 @@ do {\
/* Copy the top node in place of NODE */\ /* Copy the top node in place of NODE */\
\ \
*(NODE) = *top_node111;\ *(NODE) = *top_node111;\
\
/* Update the adaptive hash list of the buffer block that\
corresponds to the top node */\
if (top_node111->next_for_block != NULL) {\
(top_node111->next_for_block)->prev_for_block = NODE;\
}\
\
if (top_node111->prev_for_block != NULL) {\
(top_node111->prev_for_block)->next_for_block = NODE;\
} else {\
buf_block_align(top_node111->data)->hash_nodes = NODE;\
}\
\
/* Look for the hash pointer to the top node, to update it */\
\ \
cell111 = hash_get_nth_cell(TABLE,\ cell111 = hash_get_nth_cell(TABLE,\
hash_calc_hash(top_node111->fold, TABLE));\ hash_calc_hash(top_node111->fold, TABLE));\
\
/* Look for the pointer to the top node, to update it */\
\ \
if (cell111->node == top_node111) {\ if (cell111->node == top_node111) {\
/* The top node is the first in the chain */\ /* The top node is the first in the chain */\
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment