From 56d5c41fa3f3148b6afb4706fa80fead0e232eb2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 1 Dec 2020 11:05:48 +1030 Subject: [PATCH] utf8: don't allow NUL in decoded strings. Signed-off-by: Rusty Russell --- ccan/utf8/test/run-decode.c | 4 ++-- ccan/utf8/utf8.c | 2 ++ ccan/utf8/utf8.h | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ccan/utf8/test/run-decode.c b/ccan/utf8/test/run-decode.c index 34ecb1d1..7b991777 100644 --- a/ccan/utf8/test/run-decode.c +++ b/ccan/utf8/test/run-decode.c @@ -117,7 +117,7 @@ test_unicode_scalar_value(void) { char src[4]; /* Unicode scalar value [U+0000, U+007F] */ - for (ord = 0x0000; ord <= 0x007F; ord++) { + for (ord = 0x0001; ord <= 0x007F; ord++) { encode_ord(ord, 1, src); TEST_UTF8(src, 1, ord ? 0 : ERANGE); } @@ -255,7 +255,7 @@ test_continuations(void) { int main(int argc, char **argv) { - plan_tests(2190906); + plan_tests(2190906 - 1); test_unicode_scalar_value(); test_surrogates(); test_non_shortest_form(); diff --git a/ccan/utf8/utf8.c b/ccan/utf8/utf8.c index 346d2d95..cb18041a 100644 --- a/ccan/utf8/utf8.c +++ b/ccan/utf8/utf8.c @@ -63,6 +63,8 @@ bool utf8_decode(struct utf8_state *utf8_state, char c) /* First character in sequence. */ if (((unsigned char)c & 0x80) == 0) { /* ASCII, easy. */ + if (c == 0) + goto bad_encoding; utf8_state->total_len = 1; utf8_state->c = c; goto finished_decoding; diff --git a/ccan/utf8/utf8.h b/ccan/utf8/utf8.h index a095f02e..9a746968 100644 --- a/ccan/utf8/utf8.h +++ b/ccan/utf8/utf8.h @@ -33,7 +33,7 @@ static inline void utf8_state_init(struct utf8_state *utf8_state) * Otherwise returns true, @utf8_state can be reused without initializeation, * and sets errno: * 0: success - * EINVAL: bad encoding. + * EINVAL: bad encoding (including a NUL character). * EFBIG: not a minimal encoding. * ERANGE: encoding of invalid character. * -- 2.39.2