Commit c9829242 authored by Sergey Vojtovich's avatar Sergey Vojtovich

MDEV-15592 - Column COMPRESSED should select a 'high order' datatype

Compressed blob columns didn't accept data at their capacity. E.g. storing
255 bytes to TINYBLOB results in "Data too long" error.

Now it is allowed assuming compression method was able to produce shorter
string (so that both metadata and compressed data fits blob) and
column_compression_threshold is lower than blob.

If no compression was performed, we still have to reserve additional byte
for metadata and thus we perform normal data truncation and return it's
status.
parent 8ad12b66
...@@ -1336,15 +1336,33 @@ a LENGTH(a) ...@@ -1336,15 +1336,33 @@ a LENGTH(a)
DROP TABLE t1; DROP TABLE t1;
CREATE TABLE t1(a TINYTEXT COMPRESSED); CREATE TABLE t1(a TINYTEXT COMPRESSED);
SET column_compression_threshold=300; SET column_compression_threshold=300;
INSERT INTO t1 VALUES(REPEAT('a', 254));
INSERT INTO t1 VALUES(REPEAT(' ', 254));
INSERT INTO t1 VALUES(REPEAT('a', 255)); INSERT INTO t1 VALUES(REPEAT('a', 255));
ERROR 22001: Data too long for column 'a' at row 1 ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 255)); INSERT INTO t1 VALUES(REPEAT(' ', 255));
Warnings: Warnings:
Note 1265 Data truncated for column 'a' at row 1 Note 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT('a', 256));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 256));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
Note 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT('a', 257));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 257));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
Note 1265 Data truncated for column 'a' at row 1
SET column_compression_threshold=DEFAULT; SET column_compression_threshold=DEFAULT;
SELECT a, LENGTH(a) FROM t1; SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1;
a LENGTH(a) LEFT(a, 10) LENGTH(a)
254
254 254
254
254
aaaaaaaaaa 254
DROP TABLE t1; DROP TABLE t1;
# Corner case: VARCHAR(255) COMPRESSED must have 2 bytes pack length # Corner case: VARCHAR(255) COMPRESSED must have 2 bytes pack length
CREATE TABLE t1(a VARCHAR(255) COMPRESSED); CREATE TABLE t1(a VARCHAR(255) COMPRESSED);
...@@ -1360,6 +1378,32 @@ SELECT a, LENGTH(a) FROM t1; ...@@ -1360,6 +1378,32 @@ SELECT a, LENGTH(a) FROM t1;
a LENGTH(a) a LENGTH(a)
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 255 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 255
DROP TABLE t1; DROP TABLE t1;
CREATE TABLE t1(a VARCHAR(65531) COMPRESSED);
SET column_compression_threshold=65537;
INSERT INTO t1 VALUES(REPEAT('a', 65530));
INSERT INTO t1 VALUES(REPEAT(' ', 65530));
INSERT INTO t1 VALUES(REPEAT('a', 65531));
INSERT INTO t1 VALUES(REPEAT(' ', 65531));
INSERT INTO t1 VALUES(REPEAT('a', 65532));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 65532));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT('a', 65533));
ERROR 22001: Data too long for column 'a' at row 1
INSERT INTO t1 VALUES(REPEAT(' ', 65533));
Warnings:
Note 1265 Data truncated for column 'a' at row 1
SET column_compression_threshold=DEFAULT;
SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1, 2;
LEFT(a, 10) LENGTH(a)
65530
65531
65531
65531
aaaaaaaaaa 65530
aaaaaaaaaa 65531
DROP TABLE t1;
# #
# MDEV-14929 - AddressSanitizer: memcpy-param-overlap in # MDEV-14929 - AddressSanitizer: memcpy-param-overlap in
# Field_longstr::compress # Field_longstr::compress
...@@ -1419,3 +1463,12 @@ COLUMN_NAME CHARACTER_MAXIMUM_LENGTH CHARACTER_OCTET_LENGTH ...@@ -1419,3 +1463,12 @@ COLUMN_NAME CHARACTER_MAXIMUM_LENGTH CHARACTER_OCTET_LENGTH
a 10 10 a 10 10
b 10 30 b 10 30
DROP TABLE t1; DROP TABLE t1;
#
# MDEV-15592 - Column COMPRESSED should select a 'high order' datatype
#
CREATE TABLE t1(a TINYTEXT COMPRESSED);
INSERT INTO t1 VALUES(REPEAT('a', 255));
SELECT LEFT(a, 10), LENGTH(a) FROM t1;
LEFT(a, 10) LENGTH(a)
aaaaaaaaaa 255
DROP TABLE t1;
...@@ -64,11 +64,19 @@ DROP TABLE t1; ...@@ -64,11 +64,19 @@ DROP TABLE t1;
CREATE TABLE t1(a TINYTEXT COMPRESSED); CREATE TABLE t1(a TINYTEXT COMPRESSED);
SET column_compression_threshold=300; SET column_compression_threshold=300;
INSERT INTO t1 VALUES(REPEAT('a', 254));
INSERT INTO t1 VALUES(REPEAT(' ', 254));
--error ER_DATA_TOO_LONG --error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 255)); INSERT INTO t1 VALUES(REPEAT('a', 255));
INSERT INTO t1 VALUES(REPEAT(' ', 255)); INSERT INTO t1 VALUES(REPEAT(' ', 255));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 256));
INSERT INTO t1 VALUES(REPEAT(' ', 256));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 257));
INSERT INTO t1 VALUES(REPEAT(' ', 257));
SET column_compression_threshold=DEFAULT; SET column_compression_threshold=DEFAULT;
SELECT a, LENGTH(a) FROM t1; SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1;
DROP TABLE t1; DROP TABLE t1;
--echo # Corner case: VARCHAR(255) COMPRESSED must have 2 bytes pack length --echo # Corner case: VARCHAR(255) COMPRESSED must have 2 bytes pack length
...@@ -80,6 +88,22 @@ SET column_compression_threshold=DEFAULT; ...@@ -80,6 +88,22 @@ SET column_compression_threshold=DEFAULT;
SELECT a, LENGTH(a) FROM t1; SELECT a, LENGTH(a) FROM t1;
DROP TABLE t1; DROP TABLE t1;
CREATE TABLE t1(a VARCHAR(65531) COMPRESSED);
SET column_compression_threshold=65537;
INSERT INTO t1 VALUES(REPEAT('a', 65530));
INSERT INTO t1 VALUES(REPEAT(' ', 65530));
INSERT INTO t1 VALUES(REPEAT('a', 65531));
INSERT INTO t1 VALUES(REPEAT(' ', 65531));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 65532));
INSERT INTO t1 VALUES(REPEAT(' ', 65532));
--error ER_DATA_TOO_LONG
INSERT INTO t1 VALUES(REPEAT('a', 65533));
INSERT INTO t1 VALUES(REPEAT(' ', 65533));
SET column_compression_threshold=DEFAULT;
SELECT LEFT(a, 10), LENGTH(a) FROM t1 ORDER BY 1, 2;
DROP TABLE t1;
--echo # --echo #
--echo # MDEV-14929 - AddressSanitizer: memcpy-param-overlap in --echo # MDEV-14929 - AddressSanitizer: memcpy-param-overlap in
...@@ -113,6 +137,7 @@ INSERT INTO t1 VALUES('a'); ...@@ -113,6 +137,7 @@ INSERT INTO t1 VALUES('a');
SET column_compression_threshold=DEFAULT; SET column_compression_threshold=DEFAULT;
DROP TABLE t1; DROP TABLE t1;
--echo # --echo #
--echo # MDEV-15938 - TINYTEXT CHARACTER SET utf8 COMPRESSED truncates data --echo # MDEV-15938 - TINYTEXT CHARACTER SET utf8 COMPRESSED truncates data
--echo # --echo #
...@@ -136,3 +161,12 @@ FROM INFORMATION_SCHEMA.COLUMNS ...@@ -136,3 +161,12 @@ FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1' AND COLUMN_NAME IN ('a','b') WHERE TABLE_SCHEMA='test' AND TABLE_NAME='t1' AND COLUMN_NAME IN ('a','b')
ORDER BY COLUMN_NAME; ORDER BY COLUMN_NAME;
DROP TABLE t1; DROP TABLE t1;
--echo #
--echo # MDEV-15592 - Column COMPRESSED should select a 'high order' datatype
--echo #
CREATE TABLE t1(a TINYTEXT COMPRESSED);
INSERT INTO t1 VALUES(REPEAT('a', 255));
SELECT LEFT(a, 10), LENGTH(a) FROM t1;
DROP TABLE t1;
...@@ -7920,10 +7920,13 @@ void Field_varstring::hash(ulong *nr, ulong *nr2) ...@@ -7920,10 +7920,13 @@ void Field_varstring::hash(ulong *nr, ulong *nr2)
Compress field Compress field
@param[out] to destination buffer for compressed data @param[out] to destination buffer for compressed data
@param[in,out] to_length in: size of to, out: compressed data length @param[in] to_length size of to
@param[in] from data to compress @param[in] from data to compress
@param[in] length from length @param[in] length from length
@param[in] max_length truncate `from' to this length
@param[out] out_length compessed data length
@param[in] cs from character set @param[in] cs from character set
@param[in] nchars copy no more than "nchars" characters
In worst case (no compression performed) storage requirement is increased by In worst case (no compression performed) storage requirement is increased by
1 byte to store header. If it exceeds field length, normal data truncation is 1 byte to store header. If it exceeds field length, normal data truncation is
...@@ -7947,42 +7950,57 @@ void Field_varstring::hash(ulong *nr, ulong *nr2) ...@@ -7947,42 +7950,57 @@ void Field_varstring::hash(ulong *nr, ulong *nr2)
followed by compressed data. followed by compressed data.
*/ */
int Field_longstr::compress(char *to, uint *to_length, int Field_longstr::compress(char *to, uint to_length,
const char *from, uint length, const char *from, uint length,
uint max_length,
uint *out_length,
CHARSET_INFO *cs, size_t nchars) CHARSET_INFO *cs, size_t nchars)
{ {
THD *thd= get_thd(); THD *thd= get_thd();
char *buf= 0; char *buf;
uint buf_length;
int rc= 0; int rc= 0;
if (String::needs_conversion_on_storage(length, cs, field_charset) || if (String::needs_conversion_on_storage(length, cs, field_charset) ||
*to_length <= length) max_length < length)
{ {
if (!(buf= (char*) my_malloc(*to_length - 1, MYF(MY_WME)))) set_if_smaller(max_length, static_cast<ulonglong>(field_charset->mbmaxlen) * length + 1);
if (!(buf= (char*) my_malloc(max_length, MYF(MY_WME))))
{ {
*to_length= 0; *out_length= 0;
return -1; return -1;
} }
rc= well_formed_copy_with_check(buf, *to_length - 1, cs, from, length, rc= well_formed_copy_with_check(buf, max_length, cs, from, length,
nchars, true, &length); nchars, true, &buf_length);
from= buf; }
else
{
buf= const_cast<char*>(from);
buf_length= length;
} }
if (length == 0) if (buf_length == 0)
*to_length= 0; *out_length= 0;
else if (length >= thd->variables.column_compression_threshold && else if (buf_length >= thd->variables.column_compression_threshold &&
(*to_length= compression_method()->compress(thd, to, from, length))) (*out_length= compression_method()->compress(thd, to, buf, buf_length)))
status_var_increment(thd->status_var.column_compressions); status_var_increment(thd->status_var.column_compressions);
else else
{ {
/* Store uncompressed */ /* Store uncompressed */
to[0]= 0; to[0]= 0;
memcpy(to + 1, from, length); if (buf_length < to_length)
*to_length= length + 1; memcpy(to + 1, buf, buf_length);
else
{
/* Storing string at blob capacity, e.g. 255 bytes string to TINYBLOB. */
rc= well_formed_copy_with_check(to + 1, to_length - 1, cs, from, length,
nchars, true, &buf_length);
}
*out_length= buf_length + 1;
} }
if (buf) if (buf != from)
my_free(buf); my_free(buf);
return rc; return rc;
} }
...@@ -8036,10 +8054,12 @@ int Field_varstring_compressed::store(const char *from, size_t length, ...@@ -8036,10 +8054,12 @@ int Field_varstring_compressed::store(const char *from, size_t length,
CHARSET_INFO *cs) CHARSET_INFO *cs)
{ {
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint to_length= (uint)MY_MIN(field_length, field_charset->mbmaxlen * length + 1); uint compressed_length;
int rc= compress((char*) get_data(), &to_length, from, (uint) length, cs, int rc= compress((char*) get_data(), field_length, from, (uint) length,
(to_length - 1) / field_charset->mbmaxlen); Field_varstring_compressed::max_display_length(),
store_length(to_length); &compressed_length, cs,
Field_varstring_compressed::char_length());
store_length(compressed_length);
return rc; return rc;
} }
...@@ -8648,7 +8668,10 @@ int Field_blob_compressed::store(const char *from, size_t length, ...@@ -8648,7 +8668,10 @@ int Field_blob_compressed::store(const char *from, size_t length,
CHARSET_INFO *cs) CHARSET_INFO *cs)
{ {
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED; ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint to_length= (uint)MY_MIN(max_data_length(), field_charset->mbmaxlen * length + 1); uint compressed_length;
uint max_length= max_data_length();
uint to_length= (uint) MY_MIN(max_length,
field_charset->mbmaxlen * length + 1);
String tmp(from, length, cs); String tmp(from, length, cs);
int rc; int rc;
...@@ -8658,9 +8681,9 @@ int Field_blob_compressed::store(const char *from, size_t length, ...@@ -8658,9 +8681,9 @@ int Field_blob_compressed::store(const char *from, size_t length,
if (value.alloc(to_length)) if (value.alloc(to_length))
goto oom; goto oom;
rc= compress((char*) value.ptr(), &to_length, tmp.ptr(), (uint) length, cs, rc= compress((char*) value.ptr(), to_length, tmp.ptr(), (uint) length,
(uint) length); max_length, &compressed_length, cs, (uint) length);
set_ptr(to_length, (uchar*) value.ptr()); set_ptr(compressed_length, (uchar*) value.ptr());
return rc; return rc;
oom: oom:
......
...@@ -1790,8 +1790,10 @@ class Field_longstr :public Field_str ...@@ -1790,8 +1790,10 @@ class Field_longstr :public Field_str
const Item *item) const; const Item *item) const;
bool cmp_to_string_with_stricter_collation(const Item_bool_func *cond, bool cmp_to_string_with_stricter_collation(const Item_bool_func *cond,
const Item *item) const; const Item *item) const;
int compress(char *to, uint *to_length, int compress(char *to, uint to_length,
const char *from, uint length, const char *from, uint length,
uint max_length,
uint *out_length,
CHARSET_INFO *cs, size_t nchars); CHARSET_INFO *cs, size_t nchars);
String *uncompress(String *val_buffer, String *val_ptr, String *uncompress(String *val_buffer, String *val_ptr,
const uchar *from, uint from_length); const uchar *from, uint from_length);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment