Commit f552febe authored by Andrew Hutchings's avatar Andrew Hutchings Committed by Andrew Hutchings

MDEV-30879 Add support for up to BASE 62 to CONV()

BASE 62 uses 0-9, A-Z and then a-z to give the numbers 0-61. This patch
increases the range of the string functions to cover this.

Based on ideas and tests in PR #2589, but re-written into the charset
functions.

Includes fix by Sergei, UBSAN complained:
ctype-simple.c:683:38: runtime error: negation of -9223372036854775808
cannot be represented in type 'long long int'; cast to an unsigned
type to negate this value to itself
Co-authored-by: default avatarWeijun Huang <huangweijun1001@gmail.com>
Co-authored-by: default avatarSergei Golubchik <serg@mariadb.org>
parent be6d48fd
...@@ -74,6 +74,7 @@ extern "C" { ...@@ -74,6 +74,7 @@ extern "C" {
#endif #endif
/* Declared in int2str() */ /* Declared in int2str() */
extern const char _dig_vec_base62[];
extern const char _dig_vec_upper[]; extern const char _dig_vec_upper[];
extern const char _dig_vec_lower[]; extern const char _dig_vec_lower[];
......
...@@ -1078,8 +1078,8 @@ lpad(12345, 5, "#") ...@@ -1078,8 +1078,8 @@ lpad(12345, 5, "#")
SELECT conv(71, 10, 36), conv('1Z', 36, 10); SELECT conv(71, 10, 36), conv('1Z', 36, 10);
conv(71, 10, 36) conv('1Z', 36, 10) conv(71, 10, 36) conv('1Z', 36, 10)
1Z 71 1Z 71
SELECT conv(71, 10, 37), conv('1Z', 37, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10); SELECT conv(71, 10, 63), conv('1Z', 63, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10);
conv(71, 10, 37) conv('1Z', 37, 10) conv(0,1,10) conv(0,0,10) conv(0,-1,10) conv(71, 10, 63) conv('1Z', 63, 10) conv(0,1,10) conv(0,0,10) conv(0,-1,10)
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
create table t1 (id int(1), str varchar(10)) DEFAULT CHARSET=utf8; create table t1 (id int(1), str varchar(10)) DEFAULT CHARSET=utf8;
insert into t1 values (1,'aaaaaaaaaa'), (2,'bbbbbbbbbb'); insert into t1 values (1,'aaaaaaaaaa'), (2,'bbbbbbbbbb');
...@@ -5535,3 +5535,63 @@ aes_encrypt(a,a) is null ...@@ -5535,3 +5535,63 @@ aes_encrypt(a,a) is null
# #
# End of 11.2 tests # End of 11.2 tests
# #
#
# MDEV-30879 Add conversion to based 62 for CONV function
#
SELECT CONV('1z', 62, 10);
CONV('1z', 62, 10)
123
SELECT CONV('1Z', 62, 10);
CONV('1Z', 62, 10)
97
SELECT CONV('-1Z', 62, 10);
CONV('-1Z', 62, 10)
18446744073709551519
SELECT CONV('-1Z', -62, 10);
CONV('-1Z', -62, 10)
18446744073709551519
SELECT CONV('-1Z', 62, -10);
CONV('-1Z', 62, -10)
-97
SELECT CONV('-1Z', -62, -10);
CONV('-1Z', -62, -10)
-97
SELECT CONV('AzL8n0Y58m7', 62, 10);
CONV('AzL8n0Y58m7', 62, 10)
9223372036854775807
SELECT CONV('LygHa16AHYE', 62, 10);
CONV('LygHa16AHYE', 62, 10)
18446744073709551614
SELECT CONV('LygHa16AHYF', 62, 10);
CONV('LygHa16AHYF', 62, 10)
18446744073709551615
SELECT CONV('LygHa16AHZ0', 62, 10);
CONV('LygHa16AHZ0', 62, 10)
18446744073709551615
SELECT CONV('-AzL8n0Y58m7', -62, -10);
CONV('-AzL8n0Y58m7', -62, -10)
-9223372036854775807
SELECT CONV('-AzL8n0Y58m8', -62, -10);
CONV('-AzL8n0Y58m8', -62, -10)
-9223372036854775808
SELECT CONV('-AzL8n0Y58m9', -62, -10);
CONV('-AzL8n0Y58m9', -62, -10)
-9223372036854775808
SELECT CONV('-LygHa16AHZ0', -62, -10);
CONV('-LygHa16AHZ0', -62, -10)
-9223372036854775808
SELECT CONV('LygHa16AHYF', 63, 10);
CONV('LygHa16AHYF', 63, 10)
NULL
SELECT CONV(18446744073709551615, 10, 63);
CONV(18446744073709551615, 10, 63)
NULL
SELECT CONV(18446744073709551615, 10, 62);
CONV(18446744073709551615, 10, 62)
LygHa16AHYF
SELECT CONV(-9223372036854775808, -10, -62);
CONV(-9223372036854775808, -10, -62)
-AzL8n0Y58m8
#
# End of 11.4 tests
#
...@@ -570,7 +570,7 @@ SELECT lpad(12345, 5, "#"); ...@@ -570,7 +570,7 @@ SELECT lpad(12345, 5, "#");
# #
SELECT conv(71, 10, 36), conv('1Z', 36, 10); SELECT conv(71, 10, 36), conv('1Z', 36, 10);
SELECT conv(71, 10, 37), conv('1Z', 37, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10); SELECT conv(71, 10, 63), conv('1Z', 63, 10), conv(0,1,10),conv(0,0,10), conv(0,-1,10);
# #
# Bug in SUBSTRING when mixed with CONCAT and ORDER BY (Bug #3089) # Bug in SUBSTRING when mixed with CONCAT and ORDER BY (Bug #3089)
...@@ -2481,3 +2481,40 @@ select aes_encrypt(a,a) is null from (values('a'),(NULL),('b')) x; ...@@ -2481,3 +2481,40 @@ select aes_encrypt(a,a) is null from (values('a'),(NULL),('b')) x;
--echo # --echo #
--echo # End of 11.2 tests --echo # End of 11.2 tests
--echo # --echo #
--echo #
--echo # MDEV-30879 Add conversion to based 62 for CONV function
--echo #
SELECT CONV('1z', 62, 10);
SELECT CONV('1Z', 62, 10);
SELECT CONV('-1Z', 62, 10);
SELECT CONV('-1Z', -62, 10);
SELECT CONV('-1Z', 62, -10);
SELECT CONV('-1Z', -62, -10);
# Check limits
SELECT CONV('AzL8n0Y58m7', 62, 10);
SELECT CONV('LygHa16AHYE', 62, 10);
SELECT CONV('LygHa16AHYF', 62, 10);
# Overflow doesn't appear to warn, but does overflow
SELECT CONV('LygHa16AHZ0', 62, 10);
SELECT CONV('-AzL8n0Y58m7', -62, -10);
SELECT CONV('-AzL8n0Y58m8', -62, -10);
SELECT CONV('-AzL8n0Y58m9', -62, -10);
SELECT CONV('-LygHa16AHZ0', -62, -10);
# Should NULL
SELECT CONV('LygHa16AHYF', 63, 10);
SELECT CONV(18446744073709551615, 10, 63);
# Test 10 -> 62
SELECT CONV(18446744073709551615, 10, 62);
SELECT CONV(-9223372036854775808, -10, -62);
--echo #
--echo # End of 11.4 tests
--echo #
...@@ -3936,8 +3936,8 @@ String *Item_func_conv::val_str(String *str) ...@@ -3936,8 +3936,8 @@ String *Item_func_conv::val_str(String *str)
// Note that abs(INT_MIN) is undefined. // Note that abs(INT_MIN) is undefined.
if (args[0]->null_value || args[1]->null_value || args[2]->null_value || if (args[0]->null_value || args[1]->null_value || args[2]->null_value ||
from_base == INT_MIN || to_base == INT_MIN || from_base == INT_MIN || to_base == INT_MIN ||
abs(to_base) > 36 || abs(to_base) < 2 || abs(to_base) > 62 || abs(to_base) < 2 ||
abs(from_base) > 36 || abs(from_base) < 2 || !(res->length())) abs(from_base) > 62 || abs(from_base) < 2 || !(res->length()))
{ {
null_value= 1; null_value= 1;
return NULL; return NULL;
......
...@@ -451,7 +451,11 @@ long my_strntol_8bit(CHARSET_INFO *cs, ...@@ -451,7 +451,11 @@ long my_strntol_8bit(CHARSET_INFO *cs,
else if (c>='A' && c<='Z') else if (c>='A' && c<='Z')
c = c - 'A' + 10; c = c - 'A' + 10;
else if (c>='a' && c<='z') else if (c>='a' && c<='z')
{
c = c - 'a' + 10; c = c - 'a' + 10;
if (base > 36)
c += 26;
}
else else
break; break;
if (c >= base) if (c >= base)
...@@ -546,7 +550,11 @@ ulong my_strntoul_8bit(CHARSET_INFO *cs, ...@@ -546,7 +550,11 @@ ulong my_strntoul_8bit(CHARSET_INFO *cs,
else if (c>='A' && c<='Z') else if (c>='A' && c<='Z')
c = c - 'A' + 10; c = c - 'A' + 10;
else if (c>='a' && c<='z') else if (c>='a' && c<='z')
{
c = c - 'a' + 10; c = c - 'a' + 10;
if (base > 36)
c += 26;
}
else else
break; break;
if (c >= base) if (c >= base)
...@@ -634,7 +642,11 @@ longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)), ...@@ -634,7 +642,11 @@ longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),
else if (c>='A' && c<='Z') else if (c>='A' && c<='Z')
c = c - 'A' + 10; c = c - 'A' + 10;
else if (c>='a' && c<='z') else if (c>='a' && c<='z')
{
c = c - 'a' + 10; c = c - 'a' + 10;
if (base > 36)
c += 26;
}
else else
break; break;
if (c >= base) if (c >= base)
...@@ -656,8 +668,12 @@ longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)), ...@@ -656,8 +668,12 @@ longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),
if (negative) if (negative)
{ {
if (i > (ulonglong) LONGLONG_MIN) if (i >= (ulonglong) LONGLONG_MIN)
{
if (i == (ulonglong) LONGLONG_MIN)
return LONGLONG_MIN;
overflow = 1; overflow = 1;
}
} }
else if (i > (ulonglong) LONGLONG_MAX) else if (i > (ulonglong) LONGLONG_MAX)
overflow = 1; overflow = 1;
...@@ -731,7 +747,11 @@ ulonglong my_strntoull_8bit(CHARSET_INFO *cs, ...@@ -731,7 +747,11 @@ ulonglong my_strntoull_8bit(CHARSET_INFO *cs,
else if (c>='A' && c<='Z') else if (c>='A' && c<='Z')
c = c - 'A' + 10; c = c - 'A' + 10;
else if (c>='a' && c<='z') else if (c>='a' && c<='z')
{
c = c - 'a' + 10; c = c - 'a' + 10;
if (base > 36)
c += 26;
}
else else
break; break;
if (c >= base) if (c >= base)
......
...@@ -462,7 +462,11 @@ my_strntoll_mb2_or_mb4(CHARSET_INFO *cs, ...@@ -462,7 +462,11 @@ my_strntoll_mb2_or_mb4(CHARSET_INFO *cs,
else if ( wc>='A' && wc<='Z') else if ( wc>='A' && wc<='Z')
wc = wc - 'A' + 10; wc = wc - 'A' + 10;
else if ( wc>='a' && wc<='z') else if ( wc>='a' && wc<='z')
{
wc = wc - 'a' + 10; wc = wc - 'a' + 10;
if (base > 36)
wc += 26;
}
else else
break; break;
if ((int)wc >= base) if ((int)wc >= base)
...@@ -575,7 +579,11 @@ my_strntoull_mb2_or_mb4(CHARSET_INFO *cs, ...@@ -575,7 +579,11 @@ my_strntoull_mb2_or_mb4(CHARSET_INFO *cs,
else if ( wc>='A' && wc<='Z') else if ( wc>='A' && wc<='Z')
wc = wc - 'A' + 10; wc = wc - 'A' + 10;
else if ( wc>='a' && wc<='z') else if ( wc>='a' && wc<='z')
{
wc = wc - 'a' + 10; wc = wc - 'a' + 10;
if (base > 36)
wc += 26;
}
else else
break; break;
if ((int)wc >= base) if ((int)wc >= base)
......
...@@ -31,6 +31,8 @@ ...@@ -31,6 +31,8 @@
/* /*
_dig_vec arrays are public because they are used in several outer places. _dig_vec arrays are public because they are used in several outer places.
*/ */
const char _dig_vec_base62[] =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
const char _dig_vec_upper[] = const char _dig_vec_upper[] =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const char _dig_vec_lower[] = const char _dig_vec_lower[] =
...@@ -50,7 +52,7 @@ const char _dig_vec_lower[] = ...@@ -50,7 +52,7 @@ const char _dig_vec_lower[] =
DESCRIPTION DESCRIPTION
Converts the (long) integer value to its character form and moves it to Converts the (long) integer value to its character form and moves it to
the destination buffer followed by a terminating NUL. the destination buffer followed by a terminating NUL.
If radix is -2..-36, val is taken to be SIGNED, if radix is 2..36, val is If radix is -2..-62, val is taken to be SIGNED, if radix is 2..62, val is
taken to be UNSIGNED. That is, val is signed if and only if radix is. taken to be UNSIGNED. That is, val is signed if and only if radix is.
All other radixes treated as bad and nothing will be changed in this case. All other radixes treated as bad and nothing will be changed in this case.
...@@ -68,12 +70,17 @@ int2str(register long int val, register char *dst, register int radix, ...@@ -68,12 +70,17 @@ int2str(register long int val, register char *dst, register int radix,
char buffer[65]; char buffer[65];
register char *p; register char *p;
long int new_val; long int new_val;
const char *dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower; const char *dig_vec;
ulong uval= (ulong) val; ulong uval= (ulong) val;
if (radix < -36 || radix > 36)
dig_vec= _dig_vec_base62;
else
dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower;
if (radix < 0) if (radix < 0)
{ {
if (radix < -36 || radix > -2) if (radix < -62 || radix > -2)
return NullS; return NullS;
if (val < 0) if (val < 0)
{ {
...@@ -83,7 +90,7 @@ int2str(register long int val, register char *dst, register int radix, ...@@ -83,7 +90,7 @@ int2str(register long int val, register char *dst, register int radix,
} }
radix = -radix; radix = -radix;
} }
else if (radix > 36 || radix < 2) else if (radix > 62 || radix < 2)
return NullS; return NullS;
/* /*
......
...@@ -35,8 +35,8 @@ ...@@ -35,8 +35,8 @@
result is normally a pointer to this NUL character, but if the radix result is normally a pointer to this NUL character, but if the radix
is dud the result will be NullS and nothing will be changed. is dud the result will be NullS and nothing will be changed.
If radix is -2..-36, val is taken to be SIGNED. If radix is -2..-62, val is taken to be SIGNED.
If radix is 2.. 36, val is taken to be UNSIGNED. If radix is 2.. 62, val is taken to be UNSIGNED.
That is, val is signed if and only if radix is. You will normally That is, val is signed if and only if radix is. You will normally
use radix -10 only through itoa and ltoa, for radix 2, 8, or 16 use radix -10 only through itoa and ltoa, for radix 2, 8, or 16
unsigned is what you generally want. unsigned is what you generally want.
...@@ -63,12 +63,17 @@ char *ll2str(longlong val,char *dst,int radix, int upcase) ...@@ -63,12 +63,17 @@ char *ll2str(longlong val,char *dst,int radix, int upcase)
char buffer[65]; char buffer[65];
register char *p; register char *p;
long long_val; long long_val;
const char *dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower; const char *dig_vec;
ulonglong uval= (ulonglong) val; ulonglong uval= (ulonglong) val;
if (radix < -36 || radix > 36)
dig_vec= _dig_vec_base62;
else
dig_vec= upcase ? _dig_vec_upper : _dig_vec_lower;
if (radix < 0) if (radix < 0)
{ {
if (radix < -36 || radix > -2) return (char*) 0; if (radix < -62 || radix > -2) return (char*) 0;
if (val < 0) { if (val < 0) {
*dst++ = '-'; *dst++ = '-';
/* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */ /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
...@@ -78,7 +83,7 @@ char *ll2str(longlong val,char *dst,int radix, int upcase) ...@@ -78,7 +83,7 @@ char *ll2str(longlong val,char *dst,int radix, int upcase)
} }
else else
{ {
if (radix > 36 || radix < 2) return (char*) 0; if (radix > 62 || radix < 2) return (char*) 0;
} }
if (uval == 0) if (uval == 0)
{ {
......
...@@ -55,9 +55,9 @@ ...@@ -55,9 +55,9 @@
#include "my_sys.h" /* defines errno */ #include "my_sys.h" /* defines errno */
#include <errno.h> #include <errno.h>
#define char_val(X) (X >= '0' && X <= '9' ? X-'0' :\ #define char_val(X, Y) (X >= '0' && X <= '9' ? X-'0' :\
X >= 'A' && X <= 'Z' ? X-'A'+10 :\ X >= 'A' && X <= 'Z' ? X-'A'+10 :\
X >= 'a' && X <= 'z' ? X-'a'+10 :\ X >= 'a' && X <= 'z' ? (Y <= 36 ? X-'a'+10 : X-'a'+36) :\
'\177') '\177')
char *str2int(register const char *src, register int radix, long int lower, char *str2int(register const char *src, register int radix, long int lower,
...@@ -76,10 +76,10 @@ char *str2int(register const char *src, register int radix, long int lower, ...@@ -76,10 +76,10 @@ char *str2int(register const char *src, register int radix, long int lower,
*val = 0; *val = 0;
/* Check that the radix is in the range 2..36 */ /* Check that the radix is in the range 2..62 */
#ifndef DBUG_OFF #ifndef DBUG_OFF
if (radix < 2 || radix > 36) { if (radix < 2 || radix > 62) {
errno=EDOM; errno=EDOM;
return NullS; return NullS;
} }
...@@ -126,7 +126,7 @@ char *str2int(register const char *src, register int radix, long int lower, ...@@ -126,7 +126,7 @@ char *str2int(register const char *src, register int radix, long int lower,
to left in order to avoid overflow. Answer is after last digit. to left in order to avoid overflow. Answer is after last digit.
*/ */
for (n = 0; (digits[n]=char_val(*src)) < radix && n < 20; n++,src++) ; for (n = 0; (digits[n]=char_val(*src, radix)) < radix && n < 20; n++,src++) ;
/* Check that there is at least one digit */ /* Check that there is at least one digit */
......
...@@ -22,8 +22,8 @@ Speciella anv ...@@ -22,8 +22,8 @@ Speciella anv
the destination string "dst" followed by a terminating NUL. The the destination string "dst" followed by a terminating NUL. The
result is normally a pointer to this NUL character, but if the radix result is normally a pointer to this NUL character, but if the radix
is dud the result will be NullS and nothing will be changed. is dud the result will be NullS and nothing will be changed.
If radix is -2..-36, val is taken to be SIGNED. If radix is -2..-62, val is taken to be SIGNED.
If radix is 2.. 36, val is taken to be UNSIGNED. If radix is 2.. 62, val is taken to be UNSIGNED.
That is, val is signed if and only if radix is. You will normally That is, val is signed if and only if radix is. You will normally
use radix -10 only through itoa and ltoa, for radix 2, 8, or 16 use radix -10 only through itoa and ltoa, for radix 2, 8, or 16
unsigned is what you generally want. unsigned is what you generally want.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment