X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fcharset%2Ftest%2Frun-utf8-read-write.c;fp=ccan%2Fcharset%2Ftest%2Frun-utf8-read-write.c;h=7758b64857a0c7c0f760b8d59bf0c3d21728e7f6;hp=0000000000000000000000000000000000000000;hb=06c4af3163e2bd99999a93a478d1308ea39c5a79;hpb=233190071e5834e1a701d6d2ef4bc0a46cecc537;ds=sidebyside diff --git a/ccan/charset/test/run-utf8-read-write.c b/ccan/charset/test/run-utf8-read-write.c new file mode 100644 index 00000000..7758b648 --- /dev/null +++ b/ccan/charset/test/run-utf8-read-write.c @@ -0,0 +1,150 @@ +#include +#include + +#include + +#include "common.h" + +/* + * Testing procedure for utf8_read_char and utf8_write_char: + * + * * Generate N valid and invalid Unicode code points. + * * Encode them with utf8_write_char. + * * Copy the resulting string into a buffer sized exactly as big as + * the string produced. This way, Valgrind can catch buffer overflows + * by utf8_validate and utf8_read_char. + * * Validate the string with utf8_validate. + * * Decode the string, ensuring that: + * - Valid codepoints are read back. + * - Invalid characters are read back, but replaced + * with REPLACEMENT_CHARACTER. + * - No extra characters are read back. + */ + +#define TRIAL_COUNT 1000 +#define MAX_CHARS_PER_TRIAL 100 + +#define range(r, lo, hi) ((r) % ((hi)-(lo)+1) + (lo)) + +int main(void) +{ + int trial; + + plan_tests(TRIAL_COUNT); + + for (trial = 1; trial <= TRIAL_COUNT; trial++) { + int i, count; + uchar_t codepoints[MAX_CHARS_PER_TRIAL]; + uchar_t c; + bool c_valid; + + char write_buffer[MAX_CHARS_PER_TRIAL * 4]; + char *o = write_buffer; + char *oe = write_buffer + sizeof(write_buffer); + + char *string; + const char *s; + const char *e; + + int len; + + count = rand32() % MAX_CHARS_PER_TRIAL + 1; + + for (i = 0; i < count; i++) { + if (o >= oe) { + fail("utf8_write_char: Buffer overflow (1)"); + goto next_trial; + } + + switch (rand32() % 7) { + case 0: + c = range(rand32(), 0x0, 0x7F); + c_valid = true; + break; + case 1: + c = range(rand32(), 0x80, 0x7FF); + c_valid = true; + break; + case 2: + c = range(rand32(), 0x800, 0xD7FF); + c_valid = true; + break; + case 3: + c = range(rand32(), 0xD800, 0xDFFF); + c_valid = false; + break; + case 4: + c = range(rand32(), 0xE000, 0xFFFF); + c_valid = true; + break; + case 5: + c = range(rand32(), 0x10000, 0x10FFFF); + c_valid = true; + break; + default: + do { + c = rand32(); + } while (c < 0x110000); + c_valid = false; + break; + } + + codepoints[i] = c_valid ? c : REPLACEMENT_CHARACTER; + + len = utf8_write_char(c, o); + if (len < 1 || len > 4) { + fail("utf8_write_char: Return value is not 1 thru 4."); + goto next_trial; + } + o += len; + } + if (o > oe) { + fail("utf8_write_char: Buffer overflow (2)"); + goto next_trial; + } + + string = malloc(o - write_buffer); + memcpy(string, write_buffer, o - write_buffer); + s = string; + e = string + (o - write_buffer); + + if (!utf8_validate(s, e - s)) { + fail("Invalid string produced by utf8_write_char."); + goto next_trial_free_string; + } + + for (i = 0; i < count; i++) { + if (s >= e) { + fail("utf8_read_char: Buffer overflow (1)"); + goto next_trial_free_string; + } + + len = utf8_read_char(s, &c); + if (len < 1 || len > 4) { + fail("utf8_read_char: Return value is not 1 thru 4."); + goto next_trial_free_string; + } + if (c != codepoints[i]) { + fail("utf8_read_char: Character read differs from that written."); + goto next_trial_free_string; + } + s += len; + } + if (s > e) { + fail("utf8_read_char: Buffer overflow (2)"); + goto next_trial_free_string; + } + if (s < e) { + fail("utf8_read_char: Did not reach end of string."); + goto next_trial_free_string; + } + + pass("Trial %d: %d characters", trial, count); + + next_trial_free_string: + free(string); + next_trial:; + } + + return exit_status(); +}