1 #include <ccan/charset/charset.h>
2 #include <ccan/charset/charset.c>
3 #include <ccan/tap/tap.h>
11 * Finds a pseudorandom 32-bit number from 0 to 2^32-1 .
12 * Uses the BCPL linear congruential generator method.
14 * Used instead of system RNG to ensure tests are consistent.
16 static uint32_t rand32(void)
18 static uint32_t rand32_state = 0;
19 rand32_state *= (uint32_t)0x7FF8A3ED;
20 rand32_state += (uint32_t)0x2AA01D31;
25 * Make a Unicode character requiring exactly @len UTF-8 bytes.
27 * Unless utf8_allow_surrogates is set,
28 * do not return a value in the range U+D800 thru U+DFFF .
30 * If @len is not 1 thru 4, generate an out-of-range character.
32 static unsigned int utf8_randcode(int len)
34 uint32_t r = rand32();
38 case 1: return r % 0x80;
39 case 2: return r % (0x800-0x80) + 0x80;
42 ret = r % (0x10000-0x800) + 0x800;
43 if (!utf8_allow_surrogates && ret >= 0xD800 && ret <= 0xDFFF)
52 case 4: return r % (0x110000-0x10000) + 0x10000;
60 static unsigned int rand_surrogate(void)
62 return rand32() % (0xE000 - 0xD800) + 0xD800;
65 /* Encode @uc as UTF-8 using exactly @len characters.
66 @len should be 1 thru 4.
67 @uc will be truncated to the bits it will go into.
68 If, after bit truncation, @uc is in the wrong range for its length,
69 an invalid character will be generated. */
70 static void utf8_encode_raw(char *out, unsigned int uc, int len)
77 *out++ = 0xC0 | ((uc >> 6) & 0x1F);
78 *out++ = 0x80 | (uc & 0x3F);
81 *out++ = 0xE0 | ((uc >> 12) & 0x0F);
82 *out++ = 0x80 | ((uc >> 6) & 0x3F);
83 *out++ = 0x80 | (uc & 0x3F);
86 *out++ = 0xF0 | ((uc >> 18) & 0x07);
87 *out++ = 0x80 | ((uc >> 12) & 0x3F);
88 *out++ = 0x80 | ((uc >> 6) & 0x3F);
89 *out++ = 0x80 | (uc & 0x3F);
94 /* Generate a UTF-8 string of the given byte length,
95 randomly deciding if it should be valid or not.
97 Return true if it's valid, false if it's not. */
98 static bool utf8_mktest(char *out, int len)
106 /* Probability that, per character, it should be valid.
107 The goal is to make utf8_mktest as a whole
108 have a 50% chance of generating a valid string. */
109 pf = pow(0.5, 2.5/len);
111 /* Convert to uint32_t to test against rand32. */
112 pu = pf * 4294967295.0;
114 for (;len; len -= n) {
115 v = len == 1 || rand32() <= pu;
116 m = len < 4 ? len : 4;
119 /* Generate a valid character. */
120 n = rand32() % m + 1;
121 utf8_encode_raw(out, utf8_randcode(n), n);
123 /* Generate an invalid character. */
125 n = rand32() % (m-1) + 2;
128 utf8_encode_raw(out, utf8_randcode(1), n);
131 if (!utf8_allow_surrogates && (rand32() & 1))
132 utf8_encode_raw(out, rand_surrogate(), n);
134 utf8_encode_raw(out, utf8_randcode(rand32() % (n-1) + 1), n);
137 utf8_encode_raw(out, utf8_randcode(rand32() % (n-1) + 1), n);
148 static void test_utf8_validate(bool allow_surrogates)
154 int passed=0, p_valid=0, p_invalid=0, total=0;
159 utf8_allow_surrogates = allow_surrogates;
161 for (i=0; i<count; i++) {
162 len = rand32() % (1024 + 1);
163 valid = utf8_mktest(buffer, len);
164 if (utf8_validate(buffer, len) == valid) {
174 if (passed == total) {
175 printf("PASS: %d valid tests, %d invalid tests\n",
178 printf("FAIL: Passed %d out of %d tests\n", passed, total);
181 ok(passed, "utf8_validate test passed%s",
182 !allow_surrogates ? " (surrogates disallowed)" : "");
184 ok(p_valid > count/10 && p_invalid > count/10,
185 " valid/invalid are balanced");
190 /* This is how many tests you plan to run */
193 test_utf8_validate(false);
194 test_utf8_validate(true);
196 /* This exits depending on whether all tests passed */
197 return exit_status();