X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fcharset%2Ftest%2Frun-utf8-read-write.c;fp=ccan%2Fcharset%2Ftest%2Frun-utf8-read-write.c;h=7758b64857a0c7c0f760b8d59bf0c3d21728e7f6;hp=0000000000000000000000000000000000000000;hb=06c4af3163e2bd99999a93a478d1308ea39c5a79;hpb=233190071e5834e1a701d6d2ef4bc0a46cecc537;ds=sidebyside

diff --git a/ccan/charset/test/run-utf8-read-write.c b/ccan/charset/test/run-utf8-read-write.c
new file mode 100644
index 00000000..7758b648
--- /dev/null
+++ b/ccan/charset/test/run-utf8-read-write.c
@@ -0,0 +1,150 @@
+#include <ccan/charset/charset.c>
+#include <ccan/tap/tap.h>
+
+#include <string.h>
+
+#include "common.h"
+
+/*
+ * Testing procedure for utf8_read_char and utf8_write_char:
+ *
+ *  * Generate N valid and invalid Unicode code points.
+ *  * Encode them with utf8_write_char.
+ *  * Copy the resulting string into a buffer sized exactly as big as
+ *    the string produced.  This way, Valgrind can catch buffer overflows
+ *    by utf8_validate and utf8_read_char.
+ *  * Validate the string with utf8_validate.
+ *  * Decode the string, ensuring that:
+ *    - Valid codepoints are read back.
+ *    - Invalid characters are read back, but replaced
+ *      with REPLACEMENT_CHARACTER.
+ *    - No extra characters are read back.
+ */
+
+#define TRIAL_COUNT             1000
+#define MAX_CHARS_PER_TRIAL     100
+
+#define range(r, lo, hi)  ((r) % ((hi)-(lo)+1) + (lo))
+
+int main(void)
+{
+	int trial;
+	
+	plan_tests(TRIAL_COUNT);
+	
+	for (trial = 1; trial <= TRIAL_COUNT; trial++) {
+		int i, count;
+		uchar_t codepoints[MAX_CHARS_PER_TRIAL];
+		uchar_t c;
+		bool c_valid;
+		
+		char write_buffer[MAX_CHARS_PER_TRIAL * 4];
+		char *o = write_buffer;
+		char *oe = write_buffer + sizeof(write_buffer);
+		
+		char *string;
+		const char *s;
+		const char *e;
+		
+		int len;
+		
+		count = rand32() % MAX_CHARS_PER_TRIAL + 1;
+		
+		for (i = 0; i < count; i++) {
+			if (o >= oe) {
+				fail("utf8_write_char: Buffer overflow (1)");
+				goto next_trial;
+			}
+			
+			switch (rand32() % 7) {
+				case 0:
+					c = range(rand32(), 0x0, 0x7F);
+					c_valid = true;
+					break;
+				case 1:
+					c = range(rand32(), 0x80, 0x7FF);
+					c_valid = true;
+					break;
+				case 2:
+					c = range(rand32(), 0x800, 0xD7FF);
+					c_valid = true;
+					break;
+				case 3:
+					c = range(rand32(), 0xD800, 0xDFFF);
+					c_valid = false;
+					break;
+				case 4:
+					c = range(rand32(), 0xE000, 0xFFFF);
+					c_valid = true;
+					break;
+				case 5:
+					c = range(rand32(), 0x10000, 0x10FFFF);
+					c_valid = true;
+					break;
+				default:
+					do {
+						c = rand32();
+					} while (c < 0x110000);
+					c_valid = false;
+					break;
+			}
+			
+			codepoints[i] = c_valid ? c : REPLACEMENT_CHARACTER;
+			
+			len = utf8_write_char(c, o);
+			if (len < 1 || len > 4) {
+				fail("utf8_write_char: Return value is not 1 thru 4.");
+				goto next_trial;
+			}
+			o += len;
+		}
+		if (o > oe) {
+			fail("utf8_write_char: Buffer overflow (2)");
+			goto next_trial;
+		}
+		
+		string = malloc(o - write_buffer);
+		memcpy(string, write_buffer, o - write_buffer);
+		s = string;
+		e = string + (o - write_buffer);
+		
+		if (!utf8_validate(s, e - s)) {
+			fail("Invalid string produced by utf8_write_char.");
+			goto next_trial_free_string;
+		}
+		
+		for (i = 0; i < count; i++) {
+			if (s >= e) {
+				fail("utf8_read_char: Buffer overflow (1)");
+				goto next_trial_free_string;
+			}
+			
+			len = utf8_read_char(s, &c);
+			if (len < 1 || len > 4) {
+				fail("utf8_read_char: Return value is not 1 thru 4.");
+				goto next_trial_free_string;
+			}
+			if (c != codepoints[i]) {
+				fail("utf8_read_char: Character read differs from that written.");
+				goto next_trial_free_string;
+			}
+			s += len;
+		}
+		if (s > e) {
+			fail("utf8_read_char: Buffer overflow (2)");
+			goto next_trial_free_string;
+		}
+		if (s < e) {
+			fail("utf8_read_char: Did not reach end of string.");
+			goto next_trial_free_string;
+		}
+		
+		pass("Trial %d: %d characters", trial, count);
+		
+	next_trial_free_string:
+		free(string);
+	next_trial:;
+	}
+	
+	return exit_status();
+}