Commit 56d5c41f authored by Rusty Russell's avatar Rusty Russell

utf8: don't allow NUL in decoded strings.

Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
parent b45a3266
...@@ -117,7 +117,7 @@ test_unicode_scalar_value(void) { ...@@ -117,7 +117,7 @@ test_unicode_scalar_value(void) {
char src[4]; char src[4];
/* Unicode scalar value [U+0000, U+007F] */ /* Unicode scalar value [U+0000, U+007F] */
for (ord = 0x0000; ord <= 0x007F; ord++) { for (ord = 0x0001; ord <= 0x007F; ord++) {
encode_ord(ord, 1, src); encode_ord(ord, 1, src);
TEST_UTF8(src, 1, ord ? 0 : ERANGE); TEST_UTF8(src, 1, ord ? 0 : ERANGE);
} }
...@@ -255,7 +255,7 @@ test_continuations(void) { ...@@ -255,7 +255,7 @@ test_continuations(void) {
int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
plan_tests(2190906); plan_tests(2190906 - 1);
test_unicode_scalar_value(); test_unicode_scalar_value();
test_surrogates(); test_surrogates();
test_non_shortest_form(); test_non_shortest_form();
......
...@@ -63,6 +63,8 @@ bool utf8_decode(struct utf8_state *utf8_state, char c) ...@@ -63,6 +63,8 @@ bool utf8_decode(struct utf8_state *utf8_state, char c)
/* First character in sequence. */ /* First character in sequence. */
if (((unsigned char)c & 0x80) == 0) { if (((unsigned char)c & 0x80) == 0) {
/* ASCII, easy. */ /* ASCII, easy. */
if (c == 0)
goto bad_encoding;
utf8_state->total_len = 1; utf8_state->total_len = 1;
utf8_state->c = c; utf8_state->c = c;
goto finished_decoding; goto finished_decoding;
......
...@@ -33,7 +33,7 @@ static inline void utf8_state_init(struct utf8_state *utf8_state) ...@@ -33,7 +33,7 @@ static inline void utf8_state_init(struct utf8_state *utf8_state)
* Otherwise returns true, @utf8_state can be reused without initializeation, * Otherwise returns true, @utf8_state can be reused without initializeation,
* and sets errno: * and sets errno:
* 0: success * 0: success
* EINVAL: bad encoding. * EINVAL: bad encoding (including a NUL character).
* EFBIG: not a minimal encoding. * EFBIG: not a minimal encoding.
* ERANGE: encoding of invalid character. * ERANGE: encoding of invalid character.
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment