Commit c9829242 authored by Sergey Vojtovich's avatar Sergey Vojtovich

MDEV-15592 - Column COMPRESSED should select a 'high order' datatype

Compressed blob columns didn't accept data at their capacity. E.g. storing
255 bytes to TINYBLOB results in "Data too long" error.

Now it is allowed assuming compression method was able to produce shorter
string (so that both metadata and compressed data fits blob) and
column_compression_threshold is lower than blob.

If no compression was performed, we still have to reserve additional byte
for metadata and thus we perform normal data truncation and return it's
status.
parent 8ad12b66
......@@ -1336,15 +1336,33 @@ a LENGTH(a)
DROP TABLE t1;
CREATE TABLE t1(a TINYTEXT COMPRESSED);
SET column_compression_threshold=300;
INSERT INTO t1 VALUES(REPEAT('a', 254));
INSERT INTO t1 VALUES(REPEAT(' ', 254));
INSERT INTO t1 VALUES(REPEAT('a', 255));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 255));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT('a', 256));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 256));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
Note 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT('a', 257));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 257));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
Note 1265 Data truncated for column 'a' at row 1
SET column_compression_threshold=DEFAULT;
SELECT a, LENGTH(a) FROM t1;
a LENGTH(a)
SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1;
LEFT(a, 10) LENGTH(a)
254
254
254
254
aaaaaaaaaa 254
DROP TABLE t1;
# Corner case: VARCHAR(255) COMPRESSED must have 2 bytes pack length
CREATE TABLE t1(a VARCHAR(255) COMPRESSED);
......@@ -1360,6 +1378,32 @@ SELECT a, LENGTH(a) FROM t1;
a LENGTH(a)
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 255
DROP TABLE t1;
CREATE TABLE t1(a VARCHAR(65531) COMPRESSED);
SET column_compression_threshold=65537;
INSERT INTO t1 VALUES(REPEAT('a', 65530));
INSERT INTO t1 VALUES(REPEAT(' ', 65530));
INSERT INTO t1 VALUES(REPEAT('a', 65531));
INSERT INTO t1 VALUES(REPEAT(' ', 65531));
INSERT INTO t1 VALUES(REPEAT('a', 65532));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 65532));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT('a', 65533));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 65533));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
SET column_compression_threshold=DEFAULT;
SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1, 2;
LEFT(a, 10) LENGTH(a)
65530
65531
65531
65531
aaaaaaaaaa 65530
aaaaaaaaaa 65531
DROP TABLE t1;
#
# MDEV-14929 - AddressSanitizer: memcpy-param-overlap in
# Field_longstr::compress
......@@ -1419,3 +1463,12 @@ COLUMN_NAME CHARACTER_MAXIMUM_LENGTH CHARACTER_OCTET_LENGTH
a 10 10
b 10 30
DROP TABLE t1;
#
# MDEV-15592 - Column COMPRESSED should select a 'high order' datatype
#
CREATE TABLE t1(a TINYTEXT COMPRESSED);
INSERT INTO t1 VALUES(REPEAT('a', 255));
SELECT LEFT(a, 10), LENGTH(a) FROM t1;
LEFT(a, 10) LENGTH(a)
aaaaaaaaaa 255
DROP TABLE t1;
......@@ -64,11 +64,19 @@ DROP TABLE t1;
CREATE TABLE t1(a TINYTEXT COMPRESSED);
SET column_compression_threshold=300;
INSERT INTO t1 VALUES(REPEAT('a', 254));
INSERT INTO t1 VALUES(REPEAT(' ', 254));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 255));
INSERT INTO t1 VALUES(REPEAT(' ', 255));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 256));
INSERT INTO t1 VALUES(REPEAT(' ', 256));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 257));
INSERT INTO t1 VALUES(REPEAT(' ', 257));
SET column_compression_threshold=DEFAULT;
SELECT a, LENGTH(a) FROM t1;
SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1;
DROP TABLE t1;
--echo # Corner case: VARCHAR(255) COMPRESSED must have 2 bytes pack length
......@@ -80,6 +88,22 @@ SET column_compression_threshold=DEFAULT;
SELECT a, LENGTH(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a VARCHAR(65531) COMPRESSED);
SET column_compression_threshold=65537;
INSERT INTO t1 VALUES(REPEAT('a', 65530));
INSERT INTO t1 VALUES(REPEAT(' ', 65530));
INSERT INTO t1 VALUES(REPEAT('a', 65531));
INSERT INTO t1 VALUES(REPEAT(' ', 65531));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 65532));
INSERT INTO t1 VALUES(REPEAT(' ', 65532));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 65533));
INSERT INTO t1 VALUES(REPEAT(' ', 65533));
SET column_compression_threshold=DEFAULT;
SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1, 2;
DROP TABLE t1;
--echo #
--echo # MDEV-14929 - AddressSanitizer: memcpy-param-overlap in
......@@ -113,6 +137,7 @@ INSERT INTO t1 VALUES('a');
SET column_compression_threshold=DEFAULT;
DROP TABLE t1;
--echo #
--echo # MDEV-15938 - TINYTEXT CHARACTER SET utf8 COMPRESSED truncates data
--echo #
......@@ -136,3 +161,12 @@ FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1' AND COLUMN_NAME IN ('a','b')
ORDER BY COLUMN_NAME;
DROP TABLE t1;
--echo #
--echo # MDEV-15592 - Column COMPRESSED should select a 'high order' datatype
--echo #
CREATE TABLE t1(a TINYTEXT COMPRESSED);
INSERT INTO t1 VALUES(REPEAT('a', 255));
SELECT LEFT(a, 10), LENGTH(a) FROM t1;
DROP TABLE t1;
......@@ -7920,10 +7920,13 @@ void Field_varstring::hash(ulong *nr, ulong *nr2)
Compress field
@param[out] to destination buffer for compressed data
@param[in,out] to_length in: size of to, out: compressed data length
@param[in] to_length size of to
@param[in] from data to compress
@param[in] length from length
@param[in] max_length truncate `from' to this length
@param[out] out_length compessed data length
@param[in] cs from character set
@param[in] nchars copy no more than "nchars" characters
In worst case (no compression performed) storage requirement is increased by
1 byte to store header. If it exceeds field length, normal data truncation is
......@@ -7947,42 +7950,57 @@ void Field_varstring::hash(ulong *nr, ulong *nr2)
followed by compressed data.
*/
int Field_longstr::compress(char *to, uint *to_length,
int Field_longstr::compress(char *to, uint to_length,
const char *from, uint length,
uint max_length,
uint *out_length,
CHARSET_INFO *cs, size_t nchars)
{
THD *thd= get_thd();
char *buf= 0;
char *buf;
uint buf_length;
int rc= 0;
if (String::needs_conversion_on_storage(length, cs, field_charset) ||
*to_length <= length)
max_length < length)
{
if (!(buf= (char*) my_malloc(*to_length - 1, MYF(MY_WME))))
set_if_smaller(max_length, static_cast<ulonglong>(field_charset->mbmaxlen) * length + 1);
if (!(buf= (char*) my_malloc(max_length, MYF(MY_WME))))
{
*to_length= 0;
*out_length= 0;
return -1;
}
rc= well_formed_copy_with_check(buf, *to_length - 1, cs, from, length,
nchars, true, &length);
from= buf;
rc= well_formed_copy_with_check(buf, max_length, cs, from, length,
nchars, true, &buf_length);
}
else
{
buf= const_cast<char*>(from);
buf_length= length;
}
if (length == 0)
*to_length= 0;
else if (length >= thd->variables.column_compression_threshold &&
(*to_length= compression_method()->compress(thd, to, from, length)))
if (buf_length == 0)
*out_length= 0;
else if (buf_length >= thd->variables.column_compression_threshold &&
(*out_length= compression_method()->compress(thd, to, buf, buf_length)))
status_var_increment(thd->status_var.column_compressions);
else
{
/* Store uncompressed */
to[0]= 0;
memcpy(to + 1, from, length);
*to_length= length + 1;
if (buf_length < to_length)
memcpy(to + 1, buf, buf_length);
else
{
/* Storing string at blob capacity, e.g. 255 bytes string to TINYBLOB. */
rc= well_formed_copy_with_check(to + 1, to_length - 1, cs, from, length,
nchars, true, &buf_length);
}
*out_length= buf_length + 1;
}
if (buf)
if (buf != from)
my_free(buf);
return rc;
}
......@@ -8036,10 +8054,12 @@ int Field_varstring_compressed::store(const char *from, size_t length,
CHARSET_INFO *cs)
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint to_length= (uint)MY_MIN(field_length, field_charset->mbmaxlen * length + 1);
int rc= compress((char*) get_data(), &to_length, from, (uint) length, cs,
(to_length - 1) / field_charset->mbmaxlen);
store_length(to_length);
uint compressed_length;
int rc= compress((char*) get_data(), field_length, from, (uint) length,
Field_varstring_compressed::max_display_length(),
&compressed_length, cs,
Field_varstring_compressed::char_length());
store_length(compressed_length);
return rc;
}
......@@ -8648,7 +8668,10 @@ int Field_blob_compressed::store(const char *from, size_t length,
CHARSET_INFO *cs)
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint to_length= (uint)MY_MIN(max_data_length(), field_charset->mbmaxlen * length + 1);
uint compressed_length;
uint max_length= max_data_length();
uint to_length= (uint) MY_MIN(max_length,
field_charset->mbmaxlen * length + 1);
String tmp(from, length, cs);
int rc;
......@@ -8658,9 +8681,9 @@ int Field_blob_compressed::store(const char *from, size_t length,
if (value.alloc(to_length))
goto oom;
rc= compress((char*) value.ptr(), &to_length, tmp.ptr(), (uint) length, cs,
(uint) length);
set_ptr(to_length, (uchar*) value.ptr());
rc= compress((char*) value.ptr(), to_length, tmp.ptr(), (uint) length,
max_length, &compressed_length, cs, (uint) length);
set_ptr(compressed_length, (uchar*) value.ptr());
return rc;
oom:
......
......@@ -1790,8 +1790,10 @@ class Field_longstr :public Field_str
const Item *item) const;
bool cmp_to_string_with_stricter_collation(const Item_bool_func *cond,
const Item *item) const;
int compress(char *to, uint *to_length,
int compress(char *to, uint to_length,
const char *from, uint length,
uint max_length,
uint *out_length,
CHARSET_INFO *cs, size_t nchars);
String *uncompress(String *val_buffer, String *val_ptr,
const uchar *from, uint from_length);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment