1 #include <ccan/charset/charset.h>
2 #include <ccan/charset/charset.c>
3 #include <ccan/tap/tap.h>
11 * Finds a pseudorandom 32-bit number from 0 to 2^32-1 .
12 * Uses the BCPL linear congruential generator method.
14 * Used instead of system RNG to ensure tests are consistent.
16 static uint32_t rand32(void)
18 static uint32_t rand32_state = 0;
19 rand32_state *= (uint32_t)0x7FF8A3ED;
20 rand32_state += (uint32_t)0x2AA01D31;
25 * Make a Unicode character requiring exactly @len UTF-8 bytes.
27 * Unless utf8_allow_surrogates is set,
28 * do not return a value in the range U+D800 thru U+DFFF .
30 * If @len is not 1 thru 4, generate an out-of-range character.
32 static unsigned int utf8_randcode(int len)
34 uint32_t r = rand32();
38 case 1: return r % 0x80;
39 case 2: return r % (0x800-0x80) + 0x80;
42 ret = r % (0x10000-0x800) + 0x800;
43 if ((!utf8_allow_surrogates && ret >= 0xD800 && ret <= 0xDFFF)
53 case 4: return r % (0x110000-0x10000) + 0x10000;
61 static unsigned int rand_surrogate(void)
63 return rand32() % (0xE000 - 0xD800) + 0xD800;
66 /* Encode @uc as UTF-8 using exactly @len characters.
67 @len should be 1 thru 4.
68 @uc will be truncated to the bits it will go into.
69 If, after bit truncation, @uc is in the wrong range for its length,
70 an invalid character will be generated. */
71 static void utf8_encode_raw(char *out, unsigned int uc, int len)
78 *out++ = 0xC0 | ((uc >> 6) & 0x1F);
79 *out++ = 0x80 | (uc & 0x3F);
82 *out++ = 0xE0 | ((uc >> 12) & 0x0F);
83 *out++ = 0x80 | ((uc >> 6) & 0x3F);
84 *out++ = 0x80 | (uc & 0x3F);
87 *out++ = 0xF0 | ((uc >> 18) & 0x07);
88 *out++ = 0x80 | ((uc >> 12) & 0x3F);
89 *out++ = 0x80 | ((uc >> 6) & 0x3F);
90 *out++ = 0x80 | (uc & 0x3F);
95 /* Generate a UTF-8 string of the given byte length,
96 randomly deciding if it should be valid or not.
98 Return true if it's valid, false if it's not. */
99 static bool utf8_mktest(char *out, int len)
107 /* Probability that, per character, it should be valid.
108 The goal is to make utf8_mktest as a whole
109 have a 50% chance of generating a valid string. */
110 pf = pow(0.5, 2.5/len);
112 /* Convert to uint32_t to test against rand32. */
113 pu = pf * 4294967295.0;
115 for (;len; len -= n) {
116 v = len == 1 || rand32() <= pu;
117 m = len < 4 ? len : 4;
120 /* Generate a valid character. */
121 n = rand32() % m + 1;
122 utf8_encode_raw(out, utf8_randcode(n), n);
124 /* Generate an invalid character. */
126 n = rand32() % (m-1) + 2;
129 utf8_encode_raw(out, utf8_randcode(1), n);
132 if (!utf8_allow_surrogates && (rand32() & 1))
133 utf8_encode_raw(out, rand_surrogate(), n);
135 utf8_encode_raw(out, utf8_randcode(rand32() % (n-1) + 1), n);
138 utf8_encode_raw(out, utf8_randcode(rand32() % (n-1) + 1), n);
149 static void test_utf8_validate(bool allow_surrogates)
155 int passed=0, p_valid=0, p_invalid=0, total=0;
160 utf8_allow_surrogates = allow_surrogates;
162 for (i=0; i<count; i++) {
163 len = rand32() % (1024 + 1);
164 valid = utf8_mktest(buffer, len);
165 if (utf8_validate(buffer, len) == valid) {
175 if (passed == total) {
176 printf("PASS: %d valid tests, %d invalid tests\n",
179 printf("FAIL: Passed %d out of %d tests\n", passed, total);
182 ok(passed, "utf8_validate test passed%s",
183 !allow_surrogates ? " (surrogates disallowed)" : "");
185 ok(p_valid > count/10 && p_invalid > count/10,
186 " valid/invalid are balanced");
191 /* This is how many tests you plan to run */
194 test_utf8_validate(false);
195 test_utf8_validate(true);
197 /* This exits depending on whether all tests passed */
198 return exit_status();