]> git.ozlabs.org Git - ccan/blobdiff - ccan/charset/test/run-utf8-read-write.c
charset: Rewrote utf8_validate, and added four new functions:
[ccan] / ccan / charset / test / run-utf8-read-write.c
diff --git a/ccan/charset/test/run-utf8-read-write.c b/ccan/charset/test/run-utf8-read-write.c
new file mode 100644 (file)
index 0000000..7758b64
--- /dev/null
@@ -0,0 +1,150 @@
+#include <ccan/charset/charset.c>
+#include <ccan/tap/tap.h>
+
+#include <string.h>
+
+#include "common.h"
+
+/*
+ * Testing procedure for utf8_read_char and utf8_write_char:
+ *
+ *  * Generate N valid and invalid Unicode code points.
+ *  * Encode them with utf8_write_char.
+ *  * Copy the resulting string into a buffer sized exactly as big as
+ *    the string produced.  This way, Valgrind can catch buffer overflows
+ *    by utf8_validate and utf8_read_char.
+ *  * Validate the string with utf8_validate.
+ *  * Decode the string, ensuring that:
+ *    - Valid codepoints are read back.
+ *    - Invalid characters are read back, but replaced
+ *      with REPLACEMENT_CHARACTER.
+ *    - No extra characters are read back.
+ */
+
+#define TRIAL_COUNT             1000
+#define MAX_CHARS_PER_TRIAL     100
+
+#define range(r, lo, hi)  ((r) % ((hi)-(lo)+1) + (lo))
+
+int main(void)
+{
+       int trial;
+       
+       plan_tests(TRIAL_COUNT);
+       
+       for (trial = 1; trial <= TRIAL_COUNT; trial++) {
+               int i, count;
+               uchar_t codepoints[MAX_CHARS_PER_TRIAL];
+               uchar_t c;
+               bool c_valid;
+               
+               char write_buffer[MAX_CHARS_PER_TRIAL * 4];
+               char *o = write_buffer;
+               char *oe = write_buffer + sizeof(write_buffer);
+               
+               char *string;
+               const char *s;
+               const char *e;
+               
+               int len;
+               
+               count = rand32() % MAX_CHARS_PER_TRIAL + 1;
+               
+               for (i = 0; i < count; i++) {
+                       if (o >= oe) {
+                               fail("utf8_write_char: Buffer overflow (1)");
+                               goto next_trial;
+                       }
+                       
+                       switch (rand32() % 7) {
+                               case 0:
+                                       c = range(rand32(), 0x0, 0x7F);
+                                       c_valid = true;
+                                       break;
+                               case 1:
+                                       c = range(rand32(), 0x80, 0x7FF);
+                                       c_valid = true;
+                                       break;
+                               case 2:
+                                       c = range(rand32(), 0x800, 0xD7FF);
+                                       c_valid = true;
+                                       break;
+                               case 3:
+                                       c = range(rand32(), 0xD800, 0xDFFF);
+                                       c_valid = false;
+                                       break;
+                               case 4:
+                                       c = range(rand32(), 0xE000, 0xFFFF);
+                                       c_valid = true;
+                                       break;
+                               case 5:
+                                       c = range(rand32(), 0x10000, 0x10FFFF);
+                                       c_valid = true;
+                                       break;
+                               default:
+                                       do {
+                                               c = rand32();
+                                       } while (c < 0x110000);
+                                       c_valid = false;
+                                       break;
+                       }
+                       
+                       codepoints[i] = c_valid ? c : REPLACEMENT_CHARACTER;
+                       
+                       len = utf8_write_char(c, o);
+                       if (len < 1 || len > 4) {
+                               fail("utf8_write_char: Return value is not 1 thru 4.");
+                               goto next_trial;
+                       }
+                       o += len;
+               }
+               if (o > oe) {
+                       fail("utf8_write_char: Buffer overflow (2)");
+                       goto next_trial;
+               }
+               
+               string = malloc(o - write_buffer);
+               memcpy(string, write_buffer, o - write_buffer);
+               s = string;
+               e = string + (o - write_buffer);
+               
+               if (!utf8_validate(s, e - s)) {
+                       fail("Invalid string produced by utf8_write_char.");
+                       goto next_trial_free_string;
+               }
+               
+               for (i = 0; i < count; i++) {
+                       if (s >= e) {
+                               fail("utf8_read_char: Buffer overflow (1)");
+                               goto next_trial_free_string;
+                       }
+                       
+                       len = utf8_read_char(s, &c);
+                       if (len < 1 || len > 4) {
+                               fail("utf8_read_char: Return value is not 1 thru 4.");
+                               goto next_trial_free_string;
+                       }
+                       if (c != codepoints[i]) {
+                               fail("utf8_read_char: Character read differs from that written.");
+                               goto next_trial_free_string;
+                       }
+                       s += len;
+               }
+               if (s > e) {
+                       fail("utf8_read_char: Buffer overflow (2)");
+                       goto next_trial_free_string;
+               }
+               if (s < e) {
+                       fail("utf8_read_char: Did not reach end of string.");
+                       goto next_trial_free_string;
+               }
+               
+               pass("Trial %d: %d characters", trial, count);
+               
+       next_trial_free_string:
+               free(string);
+       next_trial:;
+       }
+       
+       return exit_status();
+}