Commit 9c019f4e authored by unknown's avatar unknown

Automatic primary key for BDB tables


sql/field.h:
  Cleanup
sql/filesort.cc:
  Cleanup
sql/opt_range.cc:
  Remove index in use
sql/sql_base.cc:
  Allow Berkeley DB tables used only the index when the query only uses
  the index + data from the primary key
sql/sql_select.cc:
  Fix for left join optimization
BitKeeper/etc/logging_ok:
  Logging to logging@openlogging.org accepted
parent d78f9840
monty@tik.mysql.com
monty@donna.mysql.com
......@@ -1793,16 +1793,20 @@ Big changes made in @strong{MySQL} Version 3.22.12.
@item @strong{MyODBC} (uses ODBC SDK 2.5) --- Gamma
It seems to work well with some programs.
@item Replication -- Alpha
@item Replication -- Alpha / Beta
We are still working on replication, so don't expect this to be rock
solid yet. On the other hand, some @strong{MySQL} users are already
using this with good results.
@item BDB Tables -- Alpha
@item BDB Tables -- Alpha / Beta
The Berkeley DB code is very stable, but we are still improving the interface
between @strong{MySQL} and BDB tables, so it will take some time before this
is as tested as the other table types.
@item Automatic recovery of MyISAM tables - Alpha.
This only affects the new code that checks if the table was closed properly
on open and executes an automatic check/repair of the table if it wasn't.
@end table
MySQL AB provides e-mail support for paying customers, but the @strong{MySQL}
......@@ -7979,12 +7983,10 @@ you should also compile your code to be multi-threaded!
@node Windows and BDB tables., Windows vs Unix, Windows compiling, Windows
@subsection Windows and BDB Tables
We are working on removing the requirement that one must have a primary
key in a BDB table. As soon as this is fixed we will throughly test the
BDB interface by running the @strong{MySQL} benchmark and our internal
test suite on it. When the above is done we will start to release binary
distributions (for Windows and UNIX) of @strong{MySQL} that will include
support for BDB tables.
We will shortly do a full test on the new BDB interface on Windows.
When this is done we will start to release binary distributions (for
Windows and UNIX) of @strong{MySQL} that will include support for BDB
tables.
@cindex Windows, versus UNIX
@cindex operating systems, Windows versus UNIX
......@@ -21876,13 +21878,22 @@ Some characteristic of @code{BDB} tables:
@itemize @bullet
@item
All @code{BDB} tables must have a primary key.
@strong{MySQL} requires a @code{PRIMARY KEY} in each BDB table to be
able to refer to previously read rows; If you don't create on,
@strong{MySQL} will create an maintain a hidden @code{PRIMARY KEY} for
you. The hidden key has a length of 5 bytes and is incremented for each
insert attempt.
@item
If all columns you access in a @code{BDB} tables is part of the same index or
part of the the primary key then @strong{MySQL} can execute the query
without having to access the actual row. In a @code{MyISAM} table the
above holds only if the columns are part of the same index.
@item
The @code{PRIMARY KEY} will be faster than any other key, as the
@code{PRIMARY KEY} is stored together with the row data. As the other keys are
stored as the key data + the @code{PRIMARY KEY}, its important to keep the
@code{PRIMARY KEY} as short as possible to save disk and get better speed.
@item
@code{LOCK TABLES} works on @code{BDB} tables as with other tables. If
you don't use @code{LOCK TABLE}, @strong{MYSQL} will issue an internal
multiple write lock on the table to ensure that the table will be
......@@ -37972,6 +37983,11 @@ though, so 3.23 is not released as a stable version yet.
@appendixsubsec Changes in release 3.23.26
@itemize @bullet
@item
If one don't create a @code{PRIMARY KEY} in a BDB table, a hidden
@code{PRIMARY KEY} will be created.
@item
Added read-only-key optimization to BDB tables.
@item
@code{LEFT JOIN} did in some case prefer a full table scan when one
didn't have a @code{WHERE} clause.
@item
......@@ -44,7 +44,7 @@ class Field {
uint8 null_bit; // And position to it
struct st_table *table; // Pointer for table
ulong query_id; // For quick test of used fields
key_map key_start,part_of_key; // Which keys a field is in
key_map key_start,part_of_key; // Key is part of these keys.
const char *table_name,*field_name;
utype unireg_check;
uint32 field_length; // Length of field
......
......@@ -331,7 +331,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
if (! indexfile && ! quick_select)
{
file->reset(); // QQ; Shouldn't be needed
if (table->keyread) // QQ Can be removed after the reset
if (sort_form->key_read) // QQ Can be removed after the reset
file->extra(HA_EXTRA_KEYREAD); // QQ is removed
next_pos=(byte*) 0; /* Find records in sequence */
file->rnd_init();
......
This diff is collapsed.
......@@ -23,15 +23,18 @@
#include <db.h>
#define BDB_HIDDEN_PRIMARY_KEY_LENGTH 5
typedef struct st_berkeley_share {
ulonglong auto_ident;
THR_LOCK lock;
pthread_mutex_t mutex;
char *table_name;
uint table_name_length,use_count;
my_bool inited;
bool primary_key_inited;
} BDB_SHARE;
class ha_berkeley: public handler
{
THR_LOCK_DATA lock;
......@@ -46,13 +49,15 @@ class ha_berkeley: public handler
BDB_SHARE *share;
ulong int_option_flag;
ulong alloced_rec_buff_length;
uint primary_key,last_dup_key;
bool fixed_length_row, fixed_length_primary_key, hidden_primary_key;
uint primary_key,last_dup_key, hidden_primary_key;
bool fixed_length_row, fixed_length_primary_key, key_read;
bool fix_rec_buff_for_blob(ulong length);
byte current_ident[BDB_HIDDEN_PRIMARY_KEY_LENGTH];
ulong max_row_length(const byte *buf);
int pack_row(DBT *row,const byte *record);
int pack_row(DBT *row,const byte *record, bool new_row);
void unpack_row(char *record, DBT *row);
void ha_berkeley::unpack_key(char *record, DBT *key, uint index);
DBT *pack_key(DBT *key, uint keynr, char *buff, const byte *record);
DBT *pack_key(DBT *key, uint keynr, char *buff, const byte *key_ptr,
uint key_length);
......@@ -64,7 +69,7 @@ class ha_berkeley: public handler
int update_primary_key(DB_TXN *trans, bool primary_key_changed,
const byte * old_row, const byte * new_row,
DBT *prim_key);
int read_row(int error, char *buf, uint keynr, DBT *row, bool);
int read_row(int error, char *buf, uint keynr, DBT *row, DBT *key, bool);
DBT *get_pos(DBT *to, byte *pos);
public:
......@@ -72,9 +77,8 @@ class ha_berkeley: public handler
int_option_flag(HA_READ_NEXT | HA_READ_PREV |
HA_REC_NOT_IN_SEQ |
HA_KEYPOS_TO_RNDPOS | HA_READ_ORDER | HA_LASTKEY_ORDER |
HA_LONGLONG_KEYS | HA_NULL_KEY |
HA_BLOB_KEY |
HA_REQUIRE_PRIMARY_KEY | HA_NOT_EXACT_COUNT |
HA_LONGLONG_KEYS | HA_NULL_KEY | HA_HAVE_KEY_READ_ONLY |
HA_BLOB_KEY | HA_NOT_EXACT_COUNT |
HA_PRIMARY_KEY_IN_READ_INDEX | HA_DROP_BEFORE_CREATE),
last_dup_key((uint) -1)
{
......@@ -84,14 +88,14 @@ class ha_berkeley: public handler
const char **bas_ext() const;
ulong option_flag() const { return int_option_flag; }
uint max_record_length() const { return HA_MAX_REC_LENGTH; }
uint max_keys() const { return MAX_KEY; }
uint max_keys() const { return MAX_KEY-1; }
uint max_key_parts() const { return MAX_REF_PARTS; }
uint max_key_length() const { return MAX_KEY_LENGTH; }
uint extra_rec_buf_length() { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; }
bool fast_key_read() { return 1;}
bool has_transactions() { return 1;}
int open(const char *name, int mode, uint test_if_locked);
void initialize(void);
int close(void);
double scan_time();
int write_row(byte * buf);
......@@ -129,6 +133,16 @@ class ha_berkeley: public handler
int delete_table(const char *name);
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
enum thr_lock_type lock_type);
void update_auto_primary_key();
inline void get_auto_primary_key(byte *to)
{
ulonglong tmp;
pthread_mutex_lock(&share->mutex);
share->auto_ident++;
int5store(to,share->auto_ident);
pthread_mutex_unlock(&share->mutex);
}
};
extern bool berkeley_skip;
......
......@@ -202,6 +202,7 @@ class handler :public Sql_alloc
virtual double read_time(ha_rows rows) { return rows; }
virtual bool fast_key_read() { return 0;}
virtual bool has_transactions(){ return 0;}
virtual uint extra_rec_buf_length() { return 0; }
virtual int index_init(uint idx) { active_index=idx; return 0;}
virtual int index_end() {return 0; }
......
......@@ -2347,6 +2347,7 @@ bool QUICK_SELECT::unique_key_range()
QUICK_SELECT *get_quick_select_for_ref(TABLE *table, TABLE_REF *ref)
{
table->file->index_end(); // Remove old cursor
QUICK_SELECT *quick=new QUICK_SELECT(table, ref->key, 1);
KEY *key_info = &table->key_info[ref->key];
KEY_PART *key_part;
......
......@@ -1413,11 +1413,17 @@ Field *find_field_in_table(THD *thd,TABLE *table,const char *name,uint length,
if (field->query_id != thd->query_id)
{
field->query_id=thd->query_id;
field->table->used_fields++;
table->used_fields++;
if (field->part_of_key)
{
if (!(field->part_of_key & table->ref_primary_key))
table->used_keys&=field->part_of_key;
}
else
table->used_keys=0;
}
else
thd->dupp_field=field;
field->table->used_keys&=field->part_of_key;
}
if (check_grants && !thd->master_access && check_grant_column(thd,table,name,length))
return WRONG_GRANT;
......@@ -1659,19 +1665,19 @@ static bool
insert_fields(THD *thd,TABLE_LIST *tables, const char *table_name,
List_iterator<Item> *it)
{
TABLE_LIST *table;
uint found;
DBUG_ENTER("insert_fields");
found=0;
for (table=tables ; table ; table=table->next)
for (; tables ; tables=tables->next)
{
TABLE *table=tables->table;
if (grant_option && !thd->master_access &&
check_grant_all_columns(thd,SELECT_ACL,table->table) )
check_grant_all_columns(thd,SELECT_ACL,table) )
DBUG_RETURN(-1);
if (!table_name || !strcmp(table_name,table->name))
if (!table_name || !strcmp(table_name,tables->name))
{
Field **ptr=table->table->field,*field;
Field **ptr=table->field,*field;
while ((field = *ptr++))
{
Item_field *item= new Item_field(field);
......@@ -1682,10 +1688,17 @@ insert_fields(THD *thd,TABLE_LIST *tables, const char *table_name,
if (field->query_id == thd->query_id)
thd->dupp_field=field;
field->query_id=thd->query_id;
field->table->used_keys&=field->part_of_key;
if (field->part_of_key)
{
if (!(field->part_of_key & table->ref_primary_key))
table->used_keys&=field->part_of_key;
}
else
table->used_keys=0;
}
/* All fields are used */
table->table->used_fields=table->table->fields;
table->used_fields=table->fields;
}
}
if (!found)
......@@ -1750,6 +1763,7 @@ int setup_conds(THD *thd,TABLE_LIST *tables,COND **conds)
// TODO: This could be optimized to use hashed names if t2 had a hash
for (j=0 ; j < t2->fields ; j++)
{
key_map tmp_map;
if (!my_strcasecmp(t1->field[i]->field_name,
t2->field[j]->field_name))
{
......@@ -1760,8 +1774,20 @@ int setup_conds(THD *thd,TABLE_LIST *tables,COND **conds)
tmp->fix_length_and_dec(); // Update cmp_type
tmp->const_item_cache=0;
cond_and->list.push_back(tmp);
t1->used_keys&= t1->field[i]->part_of_key;
t2->used_keys&= t2->field[j]->part_of_key;
if ((tmp_map=t1->field[i]->part_of_key))
{
if (!(tmp_map & t1->ref_primary_key))
t1->used_keys&=tmp_map;
}
else
t1->used_keys=0;
if ((tmp_map=t2->field[j]->part_of_key))
{
if (!(tmp_map & t2->ref_primary_key))
t2->used_keys&=tmp_map;
}
else
t2->used_keys=0;
break;
}
}
......
......@@ -2352,6 +2352,11 @@ make_join_readinfo(JOIN *join,uint options)
break;
case JT_EQ_REF:
table->status=STATUS_NO_RECORD;
if (tab->select)
{
delete tab->select->quick;
tab->select->quick=0;
}
delete tab->quick;
tab->quick=0;
table->file->index_init(tab->ref.key);
......@@ -2365,6 +2370,11 @@ make_join_readinfo(JOIN *join,uint options)
break;
case JT_REF:
table->status=STATUS_NO_RECORD;
if (tab->select)
{
delete tab->select->quick;
tab->select->quick=0;
}
delete tab->quick;
tab->quick=0;
table->file->index_init(tab->ref.key);
......
......@@ -207,49 +207,29 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag,
}
#endif
/* Allocate handler */
if (!(outparam->file= get_new_handler(outparam,outparam->db_type)))
goto err_not_open;
error=2;
if (db_stat)
{
int err;
if ((err=(outparam->file->
ha_open(index_file,
(db_stat & HA_READ_ONLY ? O_RDONLY : O_RDWR),
(db_stat & HA_OPEN_TEMPORARY ? HA_OPEN_TMP_TABLE :
((db_stat & HA_WAIT_IF_LOCKED) ||
(specialflag & SPECIAL_WAIT_IF_LOCKED)) ?
HA_OPEN_WAIT_IF_LOCKED :
(db_stat & (HA_ABORT_IF_LOCKED | HA_GET_INFO)) ?
HA_OPEN_ABORT_IF_LOCKED :
HA_OPEN_IGNORE_IF_LOCKED) | ha_open_flags))))
{
/* Set a flag if the table is crashed and it can be auto. repaired */
outparam->crashed=(err == HA_ERR_CRASHED &&
outparam->file->auto_repair() &&
!(ha_open_flags & HA_OPEN_FOR_REPAIR));
goto err_not_open; /* purecov: inspected */
}
}
outparam->db_low_byte_first=outparam->file->low_byte_first();
error=4;
outparam->reginfo.lock_type= TL_UNLOCK;
outparam->current_lock=F_UNLCK;
if (db_stat & HA_OPEN_KEYFILE || (prgflag & DELAYED_OPEN)) records=2;
if ((db_stat & HA_OPEN_KEYFILE) || (prgflag & DELAYED_OPEN)) records=2;
else records=1;
if (prgflag & (READ_ALL+EXTRA_RECORD)) records++;
rec_buff_length=ALIGN_SIZE(outparam->reclength+1);
/* QQ: TODO, remove the +1 from below */
rec_buff_length=ALIGN_SIZE(outparam->reclength+1+
outparam->file->extra_rec_buf_length());
if (!(outparam->record[0]= (byte*)
(record = (char *) alloc_root(&outparam->mem_root,
rec_buff_length * records))))
goto err; /* purecov: inspected */
goto err_not_open; /* purecov: inspected */
record[outparam->reclength]=0; // For purify and ->c_ptr()
outparam->rec_buff_length=rec_buff_length;
if (my_pread(file,(byte*) record,(uint) outparam->reclength,
(ulong) (uint2korr(head+6)+uint2korr(head+14)),
MYF(MY_NABP)))
goto err; /* purecov: inspected */
goto err_not_open; /* purecov: inspected */
for (i=0 ; i < records ; i++, record+=rec_buff_length)
{
outparam->record[i]=(byte*) record;
......@@ -265,12 +245,12 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag,
}
VOID(my_seek(file,pos,MY_SEEK_SET,MYF(0)));
if (my_read(file,(byte*) head,288,MYF(MY_NABP))) goto err;
if (my_read(file,(byte*) head,288,MYF(MY_NABP))) goto err_not_open;
if (crypted)
{
crypted->decode((char*) head+256,288-256);
if (sint2korr(head+284) != 0) // Should be 0
goto err; // Wrong password
goto err_not_open; // Wrong password
}
outparam->fields= uint2korr(head+258);
......@@ -292,13 +272,13 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag,
(outparam->fields+interval_parts+
keys+3)*sizeof(my_string)+
(n_length+int_length)))))
goto err; /* purecov: inspected */
goto err_not_open; /* purecov: inspected */
outparam->field=field_ptr;
read_length=((uint) (outparam->fields*11)+pos+
(uint) (n_length+int_length));
if (read_string(file,(gptr*) &disk_buff,read_length))
goto err; /* purecov: inspected */
goto err_not_open; /* purecov: inspected */
if (crypted)
{
crypted->decode((char*) disk_buff,read_length);
......@@ -321,6 +301,7 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag,
if (keynames)
fix_type_pointers(&int_array,&outparam->keynames,1,&keynames);
VOID(my_close(file,MYF(MY_WME)));
file= -1;
record=(char*) outparam->record[0]-1; /* Fieldstart = 1 */
if (null_field_first)
......@@ -426,7 +407,7 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag,
(uint) key_part->length);
#ifdef EXTRA_DEBUG
if (key_part->fieldnr > outparam->fields)
goto err; // sanity check
goto err_not_open; // sanity check
#endif
if (key_part->fieldnr)
{ // Should always be true !
......@@ -494,6 +475,12 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag,
(outparam->keys_in_use & ((key_map) 1 << primary_key)))
{
outparam->primary_key=primary_key;
if (outparam->file->option_flag() & HA_PRIMARY_KEY_IN_READ_INDEX)
outparam->ref_primary_key= (key_map) 1 << primary_key;
/*
If we are using an integer as the primary key then allow the user to
refer to it as '_rowid'
*/
if (outparam->key_info[primary_key].key_parts == 1)
{
Field *field= outparam->key_info[primary_key].key_part[0].field;
......@@ -505,6 +492,7 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag,
outparam->primary_key = MAX_KEY; // we do not have a primary key
}
x_free((gptr) disk_buff);
disk_buff=0;
if (new_field_pack_flag <= 1)
{ /* Old file format with default null */
uint null_length=(outparam->null_fields+7)/8;
......@@ -523,7 +511,7 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag,
(Field_blob**) alloc_root(&outparam->mem_root,
(uint) (outparam->blob_fields+1)*
sizeof(Field_blob*))))
goto err;
goto err_not_open;
for (ptr=outparam->field ; *ptr ; ptr++)
{
if ((*ptr)->flags & BLOB_FLAG)
......@@ -535,22 +523,43 @@ int openfrm(const char *name, const char *alias, uint db_stat, uint prgflag,
outparam->blob_field=
(Field_blob**) (outparam->field+outparam->fields); // Point at null ptr
/* The table struct is now initialzed; Open the table */
error=2;
if (db_stat)
{
int err;
if ((err=(outparam->file->
ha_open(index_file,
(db_stat & HA_READ_ONLY ? O_RDONLY : O_RDWR),
(db_stat & HA_OPEN_TEMPORARY ? HA_OPEN_TMP_TABLE :
((db_stat & HA_WAIT_IF_LOCKED) ||
(specialflag & SPECIAL_WAIT_IF_LOCKED)) ?
HA_OPEN_WAIT_IF_LOCKED :
(db_stat & (HA_ABORT_IF_LOCKED | HA_GET_INFO)) ?
HA_OPEN_ABORT_IF_LOCKED :
HA_OPEN_IGNORE_IF_LOCKED) | ha_open_flags))))
{
/* Set a flag if the table is crashed and it can be auto. repaired */
outparam->crashed=(err == HA_ERR_CRASHED &&
outparam->file->auto_repair() &&
!(ha_open_flags & HA_OPEN_FOR_REPAIR));
goto err_not_open; /* purecov: inspected */
}
}
outparam->db_low_byte_first=outparam->file->low_byte_first();
my_pthread_setspecific_ptr(THR_MALLOC,old_root);
opened_tables++;
#ifndef DBUG_OFF
if (use_hash)
(void) hash_check(&outparam->name_hash);
#endif
if (db_stat)
outparam->file->initialize();
DBUG_RETURN (0);
err:
if (outparam->file && db_stat)
(void) outparam->file->close();
err_not_open:
x_free((gptr) disk_buff);
VOID(my_close(file,MYF(MY_WME)));
if (file > 0)
VOID(my_close(file,MYF(MY_WME)));
err_end: /* Here when no file */
delete crypted;
......
......@@ -113,7 +113,7 @@ struct st_table {
byte *record_pointers; /* If sorted in memory */
ha_rows found_records; /* How many records in sort */
ORDER *group;
key_map quick_keys,used_keys;
key_map quick_keys, used_keys, ref_primary_key;
ha_rows quick_rows[MAX_KEY];
uint quick_key_parts[MAX_KEY];
key_part_map const_key_parts[MAX_KEY];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment