]> git.ozlabs.org Git - ccan/commitdiff
utf8: don't allow NUL in decoded strings.
authorRusty Russell <rusty@rustcorp.com.au>
Tue, 1 Dec 2020 00:35:48 +0000 (11:05 +1030)
committerRusty Russell <rusty@rustcorp.com.au>
Tue, 1 Dec 2020 00:35:48 +0000 (11:05 +1030)
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
ccan/utf8/test/run-decode.c
ccan/utf8/utf8.c
ccan/utf8/utf8.h

index 34ecb1d19f23da32c747a6863cb45a72acef82e4..7b9917775e42a4b26fc7e31480dea162811d20b7 100644 (file)
@@ -117,7 +117,7 @@ test_unicode_scalar_value(void) {
   char src[4];
 
   /* Unicode scalar value [U+0000, U+007F] */
-  for (ord = 0x0000; ord <= 0x007F; ord++) {
+  for (ord = 0x0001; ord <= 0x007F; ord++) {
     encode_ord(ord, 1, src);
     TEST_UTF8(src, 1, ord ? 0 : ERANGE);
   }
@@ -255,7 +255,7 @@ test_continuations(void) {
 int
 main(int argc, char **argv)
 {
-  plan_tests(2190906);
+  plan_tests(2190906 - 1);
   test_unicode_scalar_value();
   test_surrogates();
   test_non_shortest_form();
index 346d2d95b72f98dc2d1a32ce7884091ccbd200d5..cb18041a5d35cb9f2486388e828d3cd6f77d19c0 100644 (file)
@@ -63,6 +63,8 @@ bool utf8_decode(struct utf8_state *utf8_state, char c)
                /* First character in sequence. */
                if (((unsigned char)c & 0x80) == 0) {
                        /* ASCII, easy. */
+                       if (c == 0)
+                               goto bad_encoding;
                        utf8_state->total_len = 1;
                        utf8_state->c = c;
                        goto finished_decoding;
index a095f02e870baf53d0020fd60ffecabc8a0b0cba..9a74696800cef1d8a5100703e6939d77d06e0343 100644 (file)
@@ -33,7 +33,7 @@ static inline void utf8_state_init(struct utf8_state *utf8_state)
  * Otherwise returns true, @utf8_state can be reused without initializeation,
  * and sets errno:
  * 0: success
- * EINVAL: bad encoding.
+ * EINVAL: bad encoding (including a NUL character).
  * EFBIG: not a minimal encoding.
  * ERANGE: encoding of invalid character.
  *