1 #include <ccan/charset/charset.c>
2 #include <ccan/tap/tap.h>
9 * Testing procedure for utf8_read_char and utf8_write_char:
11 * * Generate N valid and invalid Unicode code points.
12 * * Encode them with utf8_write_char.
13 * * Copy the resulting string into a buffer sized exactly as big as
14 * the string produced. This way, Valgrind can catch buffer overflows
15 * by utf8_validate and utf8_read_char.
16 * * Validate the string with utf8_validate.
17 * * Decode the string, ensuring that:
18 * - Valid codepoints are read back.
19 * - Invalid characters are read back, but replaced
20 * with REPLACEMENT_CHARACTER.
21 * - No extra characters are read back.
24 #define TRIAL_COUNT 1000
25 #define MAX_CHARS_PER_TRIAL 100
27 #define range(r, lo, hi) ((r) % ((hi)-(lo)+1) + (lo))
33 plan_tests(TRIAL_COUNT);
35 for (trial = 1; trial <= TRIAL_COUNT; trial++) {
37 uchar_t codepoints[MAX_CHARS_PER_TRIAL];
41 char write_buffer[MAX_CHARS_PER_TRIAL * 4];
42 char *o = write_buffer;
43 char *oe = write_buffer + sizeof(write_buffer);
51 count = rand32() % MAX_CHARS_PER_TRIAL + 1;
53 for (i = 0; i < count; i++) {
55 fail("utf8_write_char: Buffer overflow (1)");
59 switch (rand32() % 7) {
61 c = range(rand32(), 0x0, 0x7F);
65 c = range(rand32(), 0x80, 0x7FF);
69 c = range(rand32(), 0x800, 0xD7FF);
73 c = range(rand32(), 0xD800, 0xDFFF);
77 c = range(rand32(), 0xE000, 0xFFFF);
81 c = range(rand32(), 0x10000, 0x10FFFF);
87 } while (c < 0x110000);
92 codepoints[i] = c_valid ? c : REPLACEMENT_CHARACTER;
94 len = utf8_write_char(c, o);
95 if (len < 1 || len > 4) {
96 fail("utf8_write_char: Return value is not 1 thru 4.");
102 fail("utf8_write_char: Buffer overflow (2)");
106 string = malloc(o - write_buffer);
107 memcpy(string, write_buffer, o - write_buffer);
109 e = string + (o - write_buffer);
111 if (!utf8_validate(s, e - s)) {
112 fail("Invalid string produced by utf8_write_char.");
113 goto next_trial_free_string;
116 for (i = 0; i < count; i++) {
118 fail("utf8_read_char: Buffer overflow (1)");
119 goto next_trial_free_string;
122 len = utf8_read_char(s, &c);
123 if (len < 1 || len > 4) {
124 fail("utf8_read_char: Return value is not 1 thru 4.");
125 goto next_trial_free_string;
127 if (c != codepoints[i]) {
128 fail("utf8_read_char: Character read differs from that written.");
129 goto next_trial_free_string;
134 fail("utf8_read_char: Buffer overflow (2)");
135 goto next_trial_free_string;
138 fail("utf8_read_char: Did not reach end of string.");
139 goto next_trial_free_string;
142 pass("Trial %d: %d characters", trial, count);
144 next_trial_free_string:
149 return exit_status();