Bug#20719 - Reading dynamic records with write buffer could fail

Fixed a possible problem with reading of dynamic records
when a write cache is active. The cache must be flushed
whenever a part of the file in the write cache is to be 
read.

Added a read optimization to _mi_read_dynamic_record().

No test case. This was a hypothetical but existing problem.
parent c36dd286
...@@ -1095,12 +1095,41 @@ void _my_store_blob_length(byte *pos,uint pack_length,uint length) ...@@ -1095,12 +1095,41 @@ void _my_store_blob_length(byte *pos,uint pack_length,uint length)
} }
/* Read record from datafile */ /*
/* Returns 0 if ok, -1 if error */ Read record from datafile.
SYNOPSIS
_mi_read_dynamic_record()
info MI_INFO pointer to table.
filepos From where to read the record.
buf Destination for record.
NOTE
If a write buffer is active, it needs to be flushed if its contents
intersects with the record to read. We always check if the position
of the first byte of the write buffer is lower than the position
past the last byte to read. In theory this is also true if the write
buffer is completely below the read segment. That is, if there is no
intersection. But this case is unusual. We flush anyway. Only if the
first byte in the write buffer is above the last byte to read, we do
not flush.
A dynamic record may need several reads. So this check must be done
before every read. Reading a dynamic record starts with reading the
block header. If the record does not fit into the free space of the
header, the block may be longer than the header. In this case a
second read is necessary. These one or two reads repeat for every
part of the record.
RETURN
0 OK
-1 Error
*/
int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf) int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf)
{ {
int flag; int block_of_record;
uint b_type,left_length; uint b_type,left_length;
byte *to; byte *to;
MI_BLOCK_INFO block_info; MI_BLOCK_INFO block_info;
...@@ -1112,17 +1141,16 @@ int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf) ...@@ -1112,17 +1141,16 @@ int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf)
LINT_INIT(to); LINT_INIT(to);
LINT_INIT(left_length); LINT_INIT(left_length);
file=info->dfile; file=info->dfile;
block_info.next_filepos=filepos; /* for easyer loop */ block_of_record= 0; /* First block of record is numbered as zero. */
flag=block_info.second_read=0; block_info.second_read= 0;
do do
{ {
if (info->opt_flag & WRITE_CACHE_USED && if (info->opt_flag & WRITE_CACHE_USED &&
info->rec_cache.pos_in_file <= block_info.next_filepos && info->rec_cache.pos_in_file < filepos + MI_BLOCK_INFO_HEADER_LENGTH &&
flush_io_cache(&info->rec_cache)) flush_io_cache(&info->rec_cache))
goto err; goto err;
info->rec_cache.seek_not_done=1; info->rec_cache.seek_not_done=1;
if ((b_type=_mi_get_block_info(&block_info,file, if ((b_type= _mi_get_block_info(&block_info, file, filepos))
block_info.next_filepos))
& (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
BLOCK_FATAL_ERROR)) BLOCK_FATAL_ERROR))
{ {
...@@ -1130,9 +1158,8 @@ int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf) ...@@ -1130,9 +1158,8 @@ int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf)
my_errno=HA_ERR_RECORD_DELETED; my_errno=HA_ERR_RECORD_DELETED;
goto err; goto err;
} }
if (flag == 0) /* First block */ if (block_of_record++ == 0) /* First block */
{ {
flag=1;
if (block_info.rec_len > (uint) info->s->base.max_pack_length) if (block_info.rec_len > (uint) info->s->base.max_pack_length)
goto panic; goto panic;
if (info->s->base.blobs) if (info->s->base.blobs)
...@@ -1147,11 +1174,35 @@ int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf) ...@@ -1147,11 +1174,35 @@ int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, byte *buf)
} }
if (left_length < block_info.data_len || ! block_info.data_len) if (left_length < block_info.data_len || ! block_info.data_len)
goto panic; /* Wrong linked record */ goto panic; /* Wrong linked record */
if (my_pread(file,(byte*) to,block_info.data_len,block_info.filepos, /* copy information that is already read */
MYF(MY_NABP))) {
goto panic; uint offset= (uint) (block_info.filepos - filepos);
left_length-=block_info.data_len; uint prefetch_len= (sizeof(block_info.header) - offset);
to+=block_info.data_len; filepos+= sizeof(block_info.header);
if (prefetch_len > block_info.data_len)
prefetch_len= block_info.data_len;
if (prefetch_len)
{
memcpy((byte*) to, block_info.header + offset, prefetch_len);
block_info.data_len-= prefetch_len;
left_length-= prefetch_len;
to+= prefetch_len;
}
}
/* read rest of record from file */
if (block_info.data_len)
{
if (info->opt_flag & WRITE_CACHE_USED &&
info->rec_cache.pos_in_file < filepos + block_info.data_len &&
flush_io_cache(&info->rec_cache))
goto err;
if (my_read(file, (byte*) to, block_info.data_len, MYF(MY_NABP)))
goto panic;
left_length-=block_info.data_len;
to+=block_info.data_len;
}
filepos= block_info.next_filepos;
} while (left_length); } while (left_length);
info->update|= HA_STATE_AKTIV; /* We have a aktive record */ info->update|= HA_STATE_AKTIV; /* We have a aktive record */
...@@ -1308,11 +1359,45 @@ static int _mi_cmp_buffer(File file, const byte *buff, my_off_t filepos, ...@@ -1308,11 +1359,45 @@ static int _mi_cmp_buffer(File file, const byte *buff, my_off_t filepos,
} }
/*
Read record from datafile.
SYNOPSIS
_mi_read_rnd_dynamic_record()
info MI_INFO pointer to table.
buf Destination for record.
filepos From where to read the record.
skip_deleted_blocks If to repeat reading until a non-deleted
record is found.
NOTE
If a write buffer is active, it needs to be flushed if its contents
intersects with the record to read. We always check if the position
of the first byte of the write buffer is lower than the position
past the last byte to read. In theory this is also true if the write
buffer is completely below the read segment. That is, if there is no
intersection. But this case is unusual. We flush anyway. Only if the
first byte in the write buffer is above the last byte to read, we do
not flush.
A dynamic record may need several reads. So this check must be done
before every read. Reading a dynamic record starts with reading the
block header. If the record does not fit into the free space of the
header, the block may be longer than the header. In this case a
second read is necessary. These one or two reads repeat for every
part of the record.
RETURN
0 OK
!= 0 Error
*/
int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf, int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf,
register my_off_t filepos, register my_off_t filepos,
my_bool skipp_deleted_blocks) my_bool skip_deleted_blocks)
{ {
int flag,info_read,save_errno; int block_of_record, info_read, save_errno;
uint left_len,b_type; uint left_len,b_type;
byte *to; byte *to;
MI_BLOCK_INFO block_info; MI_BLOCK_INFO block_info;
...@@ -1338,7 +1423,8 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf, ...@@ -1338,7 +1423,8 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf,
else else
info_read=1; /* memory-keyinfoblock is ok */ info_read=1; /* memory-keyinfoblock is ok */
flag=block_info.second_read=0; block_of_record= 0; /* First block of record is numbered as zero. */
block_info.second_read= 0;
left_len=1; left_len=1;
do do
{ {
...@@ -1361,15 +1447,15 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf, ...@@ -1361,15 +1447,15 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf,
{ {
if (_mi_read_cache(&info->rec_cache,(byte*) block_info.header,filepos, if (_mi_read_cache(&info->rec_cache,(byte*) block_info.header,filepos,
sizeof(block_info.header), sizeof(block_info.header),
(!flag && skipp_deleted_blocks ? READING_NEXT : 0) | (!block_of_record && skip_deleted_blocks ?
READING_HEADER)) READING_NEXT : 0) | READING_HEADER))
goto panic; goto panic;
b_type=_mi_get_block_info(&block_info,-1,filepos); b_type=_mi_get_block_info(&block_info,-1,filepos);
} }
else else
{ {
if (info->opt_flag & WRITE_CACHE_USED && if (info->opt_flag & WRITE_CACHE_USED &&
info->rec_cache.pos_in_file <= filepos && info->rec_cache.pos_in_file < filepos + MI_BLOCK_INFO_HEADER_LENGTH &&
flush_io_cache(&info->rec_cache)) flush_io_cache(&info->rec_cache))
DBUG_RETURN(my_errno); DBUG_RETURN(my_errno);
info->rec_cache.seek_not_done=1; info->rec_cache.seek_not_done=1;
...@@ -1380,7 +1466,7 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf, ...@@ -1380,7 +1466,7 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf,
BLOCK_FATAL_ERROR)) BLOCK_FATAL_ERROR))
{ {
if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
&& skipp_deleted_blocks) && skip_deleted_blocks)
{ {
filepos=block_info.filepos+block_info.block_len; filepos=block_info.filepos+block_info.block_len;
block_info.second_read=0; block_info.second_read=0;
...@@ -1394,7 +1480,7 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf, ...@@ -1394,7 +1480,7 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf,
} }
goto err; goto err;
} }
if (flag == 0) /* First block */ if (block_of_record == 0) /* First block */
{ {
if (block_info.rec_len > (uint) share->base.max_pack_length) if (block_info.rec_len > (uint) share->base.max_pack_length)
goto panic; goto panic;
...@@ -1427,7 +1513,7 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf, ...@@ -1427,7 +1513,7 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf,
left_len-=tmp_length; left_len-=tmp_length;
to+=tmp_length; to+=tmp_length;
filepos+=tmp_length; filepos+=tmp_length;
} }
} }
/* read rest of record from file */ /* read rest of record from file */
if (block_info.data_len) if (block_info.data_len)
...@@ -1436,11 +1522,17 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf, ...@@ -1436,11 +1522,17 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf,
{ {
if (_mi_read_cache(&info->rec_cache,(byte*) to,filepos, if (_mi_read_cache(&info->rec_cache,(byte*) to,filepos,
block_info.data_len, block_info.data_len,
(!flag && skipp_deleted_blocks) ? READING_NEXT :0)) (!block_of_record && skip_deleted_blocks) ?
READING_NEXT : 0))
goto panic; goto panic;
} }
else else
{ {
if (info->opt_flag & WRITE_CACHE_USED &&
info->rec_cache.pos_in_file <
block_info.filepos + block_info.data_len &&
flush_io_cache(&info->rec_cache))
goto err;
/* VOID(my_seek(info->dfile,filepos,MY_SEEK_SET,MYF(0))); */ /* VOID(my_seek(info->dfile,filepos,MY_SEEK_SET,MYF(0))); */
if (my_read(info->dfile,(byte*) to,block_info.data_len,MYF(MY_NABP))) if (my_read(info->dfile,(byte*) to,block_info.data_len,MYF(MY_NABP)))
{ {
...@@ -1450,10 +1542,14 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf, ...@@ -1450,10 +1542,14 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, byte *buf,
} }
} }
} }
if (flag++ == 0) /*
Increment block-of-record counter. If it was the first block,
remember the position behind the block for the next call.
*/
if (block_of_record++ == 0)
{ {
info->nextpos=block_info.filepos+block_info.block_len; info->nextpos= block_info.filepos + block_info.block_len;
skipp_deleted_blocks=0; skip_deleted_blocks= 0;
} }
left_len-=block_info.data_len; left_len-=block_info.data_len;
to+=block_info.data_len; to+=block_info.data_len;
...@@ -1485,6 +1581,11 @@ uint _mi_get_block_info(MI_BLOCK_INFO *info, File file, my_off_t filepos) ...@@ -1485,6 +1581,11 @@ uint _mi_get_block_info(MI_BLOCK_INFO *info, File file, my_off_t filepos)
if (file >= 0) if (file >= 0)
{ {
/*
We do not use my_pread() here because we want to have the file
pointer set to the end of the header after this function.
my_pread() may leave the file pointer untouched.
*/
VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0)));
if (my_read(file,(char*) header,sizeof(info->header),MYF(0)) != if (my_read(file,(char*) header,sizeof(info->header),MYF(0)) !=
sizeof(info->header)) sizeof(info->header))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment