charset: Rewrote utf8_validate, and added four new functions:
[ccan] / ccan / charset / test / run-utf8-read-write.c
1 #include <ccan/charset/charset.c>
2 #include <ccan/tap/tap.h>
3
4 #include <string.h>
5
6 #include "common.h"
7
8 /*
9  * Testing procedure for utf8_read_char and utf8_write_char:
10  *
11  *  * Generate N valid and invalid Unicode code points.
12  *  * Encode them with utf8_write_char.
13  *  * Copy the resulting string into a buffer sized exactly as big as
14  *    the string produced.  This way, Valgrind can catch buffer overflows
15  *    by utf8_validate and utf8_read_char.
16  *  * Validate the string with utf8_validate.
17  *  * Decode the string, ensuring that:
18  *    - Valid codepoints are read back.
19  *    - Invalid characters are read back, but replaced
20  *      with REPLACEMENT_CHARACTER.
21  *    - No extra characters are read back.
22  */
23
24 #define TRIAL_COUNT             1000
25 #define MAX_CHARS_PER_TRIAL     100
26
27 #define range(r, lo, hi)  ((r) % ((hi)-(lo)+1) + (lo))
28
29 int main(void)
30 {
31         int trial;
32         
33         plan_tests(TRIAL_COUNT);
34         
35         for (trial = 1; trial <= TRIAL_COUNT; trial++) {
36                 int i, count;
37                 uchar_t codepoints[MAX_CHARS_PER_TRIAL];
38                 uchar_t c;
39                 bool c_valid;
40                 
41                 char write_buffer[MAX_CHARS_PER_TRIAL * 4];
42                 char *o = write_buffer;
43                 char *oe = write_buffer + sizeof(write_buffer);
44                 
45                 char *string;
46                 const char *s;
47                 const char *e;
48                 
49                 int len;
50                 
51                 count = rand32() % MAX_CHARS_PER_TRIAL + 1;
52                 
53                 for (i = 0; i < count; i++) {
54                         if (o >= oe) {
55                                 fail("utf8_write_char: Buffer overflow (1)");
56                                 goto next_trial;
57                         }
58                         
59                         switch (rand32() % 7) {
60                                 case 0:
61                                         c = range(rand32(), 0x0, 0x7F);
62                                         c_valid = true;
63                                         break;
64                                 case 1:
65                                         c = range(rand32(), 0x80, 0x7FF);
66                                         c_valid = true;
67                                         break;
68                                 case 2:
69                                         c = range(rand32(), 0x800, 0xD7FF);
70                                         c_valid = true;
71                                         break;
72                                 case 3:
73                                         c = range(rand32(), 0xD800, 0xDFFF);
74                                         c_valid = false;
75                                         break;
76                                 case 4:
77                                         c = range(rand32(), 0xE000, 0xFFFF);
78                                         c_valid = true;
79                                         break;
80                                 case 5:
81                                         c = range(rand32(), 0x10000, 0x10FFFF);
82                                         c_valid = true;
83                                         break;
84                                 default:
85                                         do {
86                                                 c = rand32();
87                                         } while (c < 0x110000);
88                                         c_valid = false;
89                                         break;
90                         }
91                         
92                         codepoints[i] = c_valid ? c : REPLACEMENT_CHARACTER;
93                         
94                         len = utf8_write_char(c, o);
95                         if (len < 1 || len > 4) {
96                                 fail("utf8_write_char: Return value is not 1 thru 4.");
97                                 goto next_trial;
98                         }
99                         o += len;
100                 }
101                 if (o > oe) {
102                         fail("utf8_write_char: Buffer overflow (2)");
103                         goto next_trial;
104                 }
105                 
106                 string = malloc(o - write_buffer);
107                 memcpy(string, write_buffer, o - write_buffer);
108                 s = string;
109                 e = string + (o - write_buffer);
110                 
111                 if (!utf8_validate(s, e - s)) {
112                         fail("Invalid string produced by utf8_write_char.");
113                         goto next_trial_free_string;
114                 }
115                 
116                 for (i = 0; i < count; i++) {
117                         if (s >= e) {
118                                 fail("utf8_read_char: Buffer overflow (1)");
119                                 goto next_trial_free_string;
120                         }
121                         
122                         len = utf8_read_char(s, &c);
123                         if (len < 1 || len > 4) {
124                                 fail("utf8_read_char: Return value is not 1 thru 4.");
125                                 goto next_trial_free_string;
126                         }
127                         if (c != codepoints[i]) {
128                                 fail("utf8_read_char: Character read differs from that written.");
129                                 goto next_trial_free_string;
130                         }
131                         s += len;
132                 }
133                 if (s > e) {
134                         fail("utf8_read_char: Buffer overflow (2)");
135                         goto next_trial_free_string;
136                 }
137                 if (s < e) {
138                         fail("utf8_read_char: Did not reach end of string.");
139                         goto next_trial_free_string;
140                 }
141                 
142                 pass("Trial %d: %d characters", trial, count);
143                 
144         next_trial_free_string:
145                 free(string);
146         next_trial:;
147         }
148         
149         return exit_status();
150 }