]> git.ozlabs.org Git - ccan/blob - ccan/ccan_tokenizer/test/run.c
base64: fix for unsigned chars (e.g. ARM).
[ccan] / ccan / ccan_tokenizer / test / run.c
1 /*
2         Copyright (c) 2009  Joseph A. Adams
3         All rights reserved.
4         
5         Redistribution and use in source and binary forms, with or without
6         modification, are permitted provided that the following conditions
7         are met:
8         1. Redistributions of source code must retain the above copyright
9            notice, this list of conditions and the following disclaimer.
10         2. Redistributions in binary form must reproduce the above copyright
11            notice, this list of conditions and the following disclaimer in the
12            documentation and/or other materials provided with the distribution.
13         3. The name of the author may not be used to endorse or promote products
14            derived from this software without specific prior written permission.
15         
16         THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17         IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18         OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19         IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20         INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21         NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22         DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23         THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24         (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25         THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <ccan/ccan_tokenizer/read_cnumber.c>
29 #include <ccan/ccan_tokenizer/read_cstring.c>
30 #include <ccan/ccan_tokenizer/dict.c>
31 #include <ccan/ccan_tokenizer/ccan_tokenizer.c>
32 #include <ccan/ccan_tokenizer/queue.c>
33 #include <ccan/ccan_tokenizer/charflag.c>
34
35 #include <ccan/ccan_tokenizer/ccan_tokenizer.h>
36
37 #include <ccan/tap/tap.h>
38
39 #include <math.h>
40
41 #define array_count_pair(type, ...) (const type []){__VA_ARGS__}, sizeof((const type []){__VA_ARGS__})/sizeof(type)
42
43 static void test_read_cstring(void) {
44         #define next() do {darray_free(str); darray_init(str); csp++;} while(0)
45         #define cs (*csp)
46         #define verify_quotechar(correct, correct_continuation_offset, quotechar) do { \
47                 const size_t s = sizeof(correct)-1; \
48                 p = read_cstring(&str, cs, cs ? strchr(cs, 0) : NULL, quotechar, &mq); \
49                 ok(str.size==s && str.alloc>s && str.item[s]==0 && \
50                 !memcmp(str.item, correct, s), \
51                 "\"%s: Is output correct?", cs); \
52                 ok(p == cs+correct_continuation_offset, "\"%s: Is continuation pointer correct?", cs); \
53                 next(); \
54         } while(0)
55         #define verify(correct, correct_continuation_offset) verify_quotechar(correct, correct_continuation_offset, '"')
56         
57         const char * const cstrings[] = {
58                 NULL,
59                 "",
60                 "\"",
61                 "Hello world!\"",
62                 "Hello world!",
63                 "\\\\\\f\\e\\b\\0\\a\\r\\n\\w\\t\\v\\\'\\\"\"",
64                 "\\\\\\f\\e\\b\\0\\a\\r\\n\\w\\t\\v\\\'\\\"\'",
65                 "الأدب العربي\"",
66                 "Ends with \\",
67                 "Tab: '\\011' Space: '\\040' Overflow: '\\777' Ambiguous: '\\1013'\"",
68                 "\\x50\\x35\\x12\\xEF\\xFE\\x00012\\x345\""
69         };
70         const char * const *csp = cstrings;
71         const char *p;
72         darray_char str = darray_new();
73         tok_message_queue mq;
74         
75         queue_init(mq, NULL);
76         
77         //check null input
78         verify("", 0);
79         
80         //Check an empty input
81         verify("", 0);
82         
83         //Check an empty quote-terminated string
84         verify("", 0);
85         
86         //Check a simple string
87         verify("Hello world!", 12);
88         
89         //Check a simple string without an end quote
90         verify("Hello world!", 12);
91         
92         //Check a collection of single-character sequences
93         verify("\\\f\e\b\0\a\r\nw\t\v\'\"", 26);
94         
95         //Check same collection of single-character sequences, this time using a single quote terminator
96         verify_quotechar("\\\f\e\b\0\a\r\nw\t\v\'\"", 26, '\'');
97         
98         //Check a real UTF-8 string
99         verify("\xd8\xa7\xd9\x84\xd8\xa3\xd8\xaf\xd8\xa8\x20\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a", 23);
100         
101         //Check string ending in backslash
102         verify("Ends with \\", 11);
103         
104         //Check a series of octal escapes
105         verify("Tab: '\t' Space: ' ' Overflow: '\377' Ambiguous: 'A3'", 61);
106         
107         //Check a series of hex escapes
108         verify("\x50\x35\x12\xEF\xFE\x12\x45", 32);
109         
110         darray_free(str);
111         
112         //tok_message_queue_dump(&mq);
113         
114         //Verify the message queue
115         if (1)
116         {
117                 struct tok_message m;
118                 struct tok_message correct_messages[] = {
119                         {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
120                         {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
121                         {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
122                         {.level=TM_WARN, .path="tokenize/read_cstring/unknown_escape"},
123                         //{.level=TM_INFO, .path="tokenize/read_cstring/escaped_single_quote"},
124                         {.level=TM_WARN, .path="tokenize/read_cstring/unknown_escape"},
125                         //{.level=TM_INFO, .path="tokenize/read_cstring/escaped_double_quote"},
126                         {.level=TM_ERROR, .path="tokenize/read_cstring/ended_in_backslash"},
127                         {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
128                         {.level=TM_WARN, .path="tokenize/read_cstring/octal_overflow"},
129                         {.level=TM_INFO, .path="tokenize/read_cstring/ambiguous_octal"},
130                         {.level=TM_WARN, .path="tokenize/read_cstring/ambiguous_hex"},
131                         {.level=TM_WARN, .path="tokenize/read_cstring/ambiguous_hex"},
132                         {.level=TM_WARN, .path="tokenize/read_cstring/hex_overflow"},
133                 };
134                 size_t i, e=sizeof(correct_messages)/sizeof(*correct_messages);
135                 
136                 while(queue_count(mq) && queue_next(mq).level==TM_DEBUG)
137                         queue_skip(mq);
138                 for (i=0; i<e; i++) {
139                         if (!queue_count(mq))
140                                 break;
141                         m = dequeue(mq);
142                         if (m.level != correct_messages[i].level)
143                                 break;
144                         if (strcmp(m.path, correct_messages[i].path))
145                                 break;
146                         while(queue_count(mq) && queue_next(mq).level==TM_DEBUG)
147                                 queue_skip(mq);
148                 }
149                 if (i<e)
150                         printf("Item %zu is incorrect\n", i);
151                 ok(i==e, "Is message queue correct?");
152                 ok(!queue_count(mq), "Message queue should be empty now.");
153         }
154         
155         queue_free(mq);
156         #undef next
157         #undef cs
158         #undef verify_quotechar
159         #undef verify
160 }
161
162 #if 0
163 static void p(const char *str) {
164         if (str)
165                 puts(str);
166         else
167                 puts("(null)");
168 }
169 #endif
170
171 static void test_queue(void) {
172         #define next() do {queue_free(q); queue_init(q, NULL);} while(0)
173         
174         const char * const s[] = {
175                 "zero",
176                 "one",
177                 "two",
178                 "three",
179                 "four",
180                 "five",
181                 "six",
182                 "seven",
183                 "eight",
184                 "nine",
185                 "ten",
186                 "eleven",
187                 "twelve",
188                 "thirteen",
189                 "fourteen",
190                 "fifteen"
191         };
192         queue(const char*) q;
193         queue_init(q, NULL);
194         
195         enqueue(q, s[0]);
196         enqueue(q, s[1]);
197         enqueue(q, s[2]);
198         enqueue(q, s[3]);
199         enqueue(q, s[4]);
200         enqueue(q, s[5]);
201         ok(queue_count(q) == 6, "Checking queue count");
202         
203         ok(dequeue_check(q)==s[0] &&
204                 dequeue_check(q)==s[1] &&
205                 dequeue_check(q)==s[2], "Dequeuing/checking 3 items");
206         ok(queue_count(q) == 3, "Checking queue count");
207         
208         enqueue(q, s[6]);
209         enqueue(q, s[7]);
210         enqueue(q, s[8]);
211         enqueue(q, s[9]);
212         enqueue(q, s[10]);
213         enqueue(q, s[11]);
214         enqueue(q, s[12]);
215         enqueue(q, s[13]);
216         enqueue(q, s[14]);
217         enqueue(q, s[15]);
218         ok(queue_count(q) == 13, "Checking queue count");
219         
220         ok(dequeue_check(q)==s[3] &&
221                 dequeue_check(q)==s[4] &&
222                 dequeue_check(q)==s[5] &&
223                 dequeue_check(q)==s[6] &&
224                 dequeue_check(q)==s[7] &&
225                 dequeue_check(q)==s[8] &&
226                 dequeue_check(q)==s[9] &&
227                 dequeue_check(q)==s[10] &&
228                 dequeue_check(q)==s[11] &&
229                 dequeue_check(q)==s[12] &&
230                 dequeue_check(q)==s[13] &&
231                 dequeue_check(q)==s[14] &&
232                 dequeue_check(q)==s[15], "Are queue items correct?");
233         ok(dequeue_check(q)==NULL && dequeue_check(q)==NULL && queue_count(q)==0, "Does queue run out correctly?");
234         
235         queue_free(q);
236         
237         #undef next
238 }
239
240 #define test_dict_single() _test_dict_single(dict, str, sizeof(str)-1, correct, sizeof(correct)/sizeof(*correct))
241 static void _test_dict_single(struct dict *dict, const char *str, size_t len, int *correct, size_t correct_count) {
242         const char *s=str, *e=str+len;
243         size_t i;
244         struct dict_entry *entry;
245         
246         for (i=0; s<e && i<correct_count; i++) {
247                 const char *s_last = s;
248                 entry = dict_lookup(dict, &s, e);
249                 if (!entry) {
250                         if (s_last != s)
251                                 break; //dict_lookup should not modify *sp when it returns NULL
252                         s++;
253                         if (correct[i] != -100)
254                                 break;
255                         continue;
256                 }
257                 if (correct[i] != entry->id)
258                         break;
259                 if (!*entry->str) {
260                         if (s_last+1 != s)
261                                 break;
262                         if (s[-1] != 0)
263                                 break;
264                 } else {
265                         size_t len = strlen(entry->str);
266                         if (s_last+len != s)
267                                 break;
268                         if (strncmp(entry->str, s-len, len))
269                                 break;
270                 }
271                 //printf("Correctly read %s\n", entry->str);
272         }
273         
274         if (s!=e || i!=correct_count) {
275                 printf("Tokenization failed at ");
276                 fwrite(s, 1, e-s, stdout);
277                 printf("\n");
278         }
279         
280         ok(s==e && i==correct_count, "All of the tokens are correct");
281 }
282
283 static void test_dict(void) {
284         struct dict_entry dict_orig[] = {
285                 {-1, ""},
286                 {0, " "},
287                 {1, "it"},
288                 {2, "it's"},
289                 {3, "a"},
290                 {4, "beautiful"},
291                 {5, "be"},
292                 {6, "day"},
293                 {7, "d"},
294                 {8, "in"},
295                 {9, "the"},
296                 {10, "t"},
297                 {11, "neighborhood"},
298                 {12, "neighbor"},
299                 {13, "won't"},
300                 {14, " you"},
301                 {15, "my"},
302                 {16, "??"},
303                 {17, "item"},
304                 {18, "ip"},
305                 {19, "\xFF\xFA"},
306                 {20, "\xFF\xEE"},
307                 {21, "\x80\x12\x34"},
308                 {22, "\x80\x32"},
309                 {23, "\x80\x32\x34"}
310         };
311         struct dict *dict = dict_build(NULL, dict_orig, sizeof(dict_orig)/sizeof(*dict_orig));
312         
313         {
314                 const char *s=NULL, *e=NULL;
315                 ok(dict_lookup(dict, &s, e)==NULL && s==NULL && e==NULL, "dict_lookup does nothing and returns null on empty input");
316         }
317         
318         {
319                 const char str[] = "it's a beautiful day in the neighborhood\0won't you be my neighbor?";
320                 int correct[] = {2,0, 3,0, 4,0, 6,0, 8,0, 9,0, 11,-1, 13, 14,0, 5,0, 15,0, 12, -100};
321                 test_dict_single();
322         }
323         
324         //check equal-length tokens
325         {
326                 const char str[] = "it'sitem initip";
327                 int correct[] = {2,17,0, 8,1,18};
328                 test_dict_single();
329         }
330         
331         //check mostly invalid tokens
332         {
333                 const char str[] = "&^&beaumx yo youthx";
334                 int correct[] = {-100,-100,-100, 5,3,-100,-100,-100, 0,-100,-100, 14,10,-100,-100};
335                 test_dict_single();
336         }
337         
338         //check tokens that start with a character greater than 0x7F
339         {
340                 const char str[] = "\x80\x12\x34\x80\x32\x80\x32\x34\xFF\xFA\xFF\xEE";
341                 int correct[] = {21, 22, 23, 19, 20};
342                 test_dict_single();
343         }
344         
345         talloc_free(dict);
346         
347         //make sure dict_build doesn't blow up on an empty dictionary
348         dict = dict_build(NULL, NULL, 0);
349         talloc_free(dict);
350 }
351
352 static void test_charflag(void) {
353         char i;
354         int correct = 0;
355         
356         #define CONTROL do { \
357                 if (ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
358                         !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
359                         !cextended(i) ) \
360                         correct++; \
361                 } while(0)
362         #define SPACE do { \
363                 if (!ccontrol(i) && cspace(i) && !creturn(i) && cwhite(i) && \
364                         !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
365                         !cextended(i) ) \
366                         correct++; \
367                 } while(0)
368         #define RETURN do { \
369                 if (!ccontrol(i) && !cspace(i) && creturn(i) && cwhite(i) && \
370                         !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
371                         !cextended(i) ) \
372                         correct++; \
373                 } while(0)
374         #define SYMBOL do { \
375                 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
376                         !cdigit(i) && !cletter(i) && !chex(i) && csymbol(i) && \
377                         !cextended(i) ) \
378                         correct++; \
379                 } while(0)
380         #define DIGIT do { \
381                 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
382                         cdigit(i) && !cletter(i) && chex(i) && !csymbol(i) && \
383                         !cextended(i) ) \
384                         correct++; \
385                 } while(0)
386         #define LETTER_HEX do { \
387                 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
388                         !cdigit(i) && cletter(i) && chex(i) && !csymbol(i) && \
389                         !cextended(i) ) \
390                         correct++; \
391                 } while(0)
392         #define LETTER do { \
393                 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
394                         !cdigit(i) && cletter(i) && !chex(i) && !csymbol(i) && \
395                         !cextended(i) ) \
396                         correct++; \
397                 } while(0)
398         #define EXTENDED do { \
399                 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
400                         !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
401                         cextended(i) ) \
402                         correct++; \
403                 } while(0)
404         
405         for (i=0; i<'\t'; i++) CONTROL;
406         i = '\t'; SPACE;
407         i = '\n'; RETURN;
408         i = '\v'; SPACE;
409         i = '\f'; SPACE;
410         i = '\r'; RETURN;
411         for (i='\r'+1; i<' '; i++) CONTROL;
412         i = ' '; SPACE;
413         for (i='!'; i<='/'; i++) SYMBOL;
414         for (i='0'; i<='9'; i++) DIGIT;
415         for (i=':'; i<='@'; i++) SYMBOL;
416         for (i='A'; i<='F'; i++) LETTER_HEX;
417         for (i='G'; i<='Z'; i++) LETTER;
418         for (i='['; i<='`'; i++) SYMBOL;
419         for (i='a'; i<='f'; i++) LETTER_HEX;
420         for (i='g'; i<='z'; i++) LETTER;
421         for (i='{'; i<='~'; i++) SYMBOL;
422         i = '\x7F'; CONTROL;
423         
424         ok(correct==128, "ASCII characters have correct charflags");
425         correct = 0;
426         
427         //We do some goofy stuff here to make sure sign extension doesn't cause problems with charflags
428         {
429                 unsigned int ui;
430                 int si;
431                 
432                 for (ui=128; ui<=255; ui++) {
433                         i = ui;
434                         EXTENDED;
435                 }
436                 for (si=-128; si<0; si++) {
437                         i = si;
438                         EXTENDED;
439                 }
440         }
441         {
442                 int i;
443                 for (i=-128; i<0; i++) EXTENDED;
444         }
445         {
446                 unsigned int i;
447                 for (i=128; i<=255; i++) EXTENDED;
448         }
449         
450         ok(correct==512, "Extended characters have correct charflags");
451         
452         #undef CONTROL
453         #undef SPACE
454         #undef RETURN
455         #undef SYMBOL
456         #undef DIGIT
457         #undef LETTER_HEX
458         #undef LETTER
459         #undef EXTENDED
460 }
461
462 struct readui_test {
463         const char *txt;
464         size_t txt_size;
465         readui_base base;
466         
467         uint64_t correct_integer;
468         int correct_errno;
469         size_t correct_advance;
470 };
471
472 #define T(txt, ...) {txt, sizeof(txt)-1, __VA_ARGS__}
473 #define M (18446744073709551615ULL)
474
475 struct readui_test readui_tests[] = {
476         //Basic reads
477         T("0",READUI_DEC, 0,0,1),
478         T(" \t42  ",READUI_DEC, 42,0,4),
479         
480         //Different bases
481         T("BADBEEFDEADBAT",READUI_HEX, 0xBADBEEFDEADBAULL,0,13),
482         T("7559",READUI_OCT, 0755,0,3),
483         T("01010010110012",READUI_BIN, 2649,0,13),
484         T("1000000000",0x7F, 8594754748609397887ULL,0,10),
485         
486         //Errors
487         T("",READUI_DEC, 0,EINVAL,0),
488         T("18446744073709551616",
489                 READUI_DEC,M,ERANGE,20),
490         T("1000000000000000000000000",
491                 READUI_DEC,M,ERANGE,25),
492         T("10000000000000000",
493                 READUI_HEX,M,ERANGE,17),
494         T("10000000000000000000000000000000000000000000000000000000000000000",
495                 READUI_BIN,M,ERANGE,65),
496         T("10000000000",
497                 0x7D,M,ERANGE,11),
498         T("9000000000",0x7F, M,ERANGE,10),
499         
500         //Misc
501         T("18446744073709551615",READUI_DEC, M,0,20),
502 };
503
504 static void test_readui_single(struct readui_test *test) {
505         uint64_t result_integer;
506         int result_errno;
507         size_t result_advance;
508         
509         const char *s = test->txt, *e = s+test->txt_size;
510         errno = 0;
511         result_integer = readui(&s, e, test->base);
512         result_errno = errno;
513         result_advance = s-test->txt;
514         
515         ok(result_integer == test->correct_integer &&
516            result_errno   == test->correct_errno &&
517            result_advance == test->correct_advance,
518            "Testing \"%s\"", test->txt);
519 }
520
521 static void test_readui(void) {
522         size_t i, count = sizeof(readui_tests)/sizeof(*readui_tests);
523         
524         for (i=0; i<count; i++)
525                 test_readui_single(readui_tests+i);
526 }
527
528 #undef T
529 #undef M
530
531 static void scan_number_sanity_check(const struct scan_number *sn,
532                 enum token_type type, const char *str_pipes, const char *msg) {
533         //If there is a prefix, it should follow
534         //the pattern (0 [B X b x]*0..1)
535         if (sn->prefix < sn->digits) {
536                 int len = sn->digits - sn->prefix;
537                 if (len!=1 && len!=2) {
538                         fail("%s : Prefix length is %d; should be 1 or 2",
539                                 str_pipes, len);
540                         return;
541                 }
542                 if (sn->prefix[0] != '0') {
543                         fail("%s : Prefix does not start with 0",
544                                 str_pipes);
545                         return;
546                 }
547                 if (len==2 && !strchr("BXbx", sn->prefix[1])) {
548                         fail("%s : Prefix is 0%c; should be 0, 0b, or 0x",
549                                 str_pipes, sn->prefix[1]);
550                         return;
551                 }
552                 if (len==1 && type==TOK_FLOATING) {
553                         fail("%s : Octal prefix appears on floating point number",
554                                 str_pipes);
555                         return;
556                 }
557         } else {
558         //if there is no prefix, the first digit should not be 0
559         //  unless this is a floating point number
560                 if (sn->digits < sn->exponent && sn->digits[0]=='0' &&
561                                 type==TOK_INTEGER) {
562                         fail("%s : First digit of non-prefix integer is 0",
563                                 str_pipes);
564                         return;
565                 }
566         }
567         
568         //Make sure sn->digits contains valid digits and is not empty
569         //  (unless prefix is "0")
570         {
571                 const char *s = sn->digits, *e = sn->exponent;
572                 if (sn->prefix+1 < sn->digits) {
573                         if (s >= e) {
574                                 fail("%s : 0%c not followed by any digits",
575                                         str_pipes, sn->prefix[1]);
576                                 return;
577                         }
578                         if (sn->prefix[1] == 'X' || sn->prefix[1] == 'x') {
579                                 while (s<e && strchr(
580                                         "0123456789ABCDEFabcdef.", *s)) s++;
581                         } else {
582                                 if (s[0]!='0' && s[0]!='1') {
583                                         fail("%s: Binary prefix not followed by a 0 or 1",
584                                                 str_pipes);
585                                         return;
586                                 }
587                                 while (s<e && strchr(
588                                         "0123456789.", *s)) s++;
589                         }
590                 } else {
591                         if (type==TOK_FLOATING && s >= e) {
592                                 fail("%s : sn->digits is empty in a floating point number",
593                                         str_pipes);
594                                 return;
595                         }
596                         if (sn->prefix >= sn->digits && s >= e) {
597                                 fail("%s : both sn->prefix and sn->digits are empty",
598                                         str_pipes);
599                                 return;
600                         }
601                         while (s<e && strchr("0123456789.", *s)) s++;
602                 }
603                 if (s != e) {
604                         fail("%s : sn->digits is not entirely valid", str_pipes);
605                         return;
606                 }
607         }
608         
609         //Make sure exponent follows the rules
610         if (sn->exponent < sn->suffix) {
611                 char c = sn->exponent[0];
612                 if (type==TOK_INTEGER) {
613                         fail("%s : sn->exponent is not empty in an integer", str_pipes);
614                         return;
615                 }
616                 if (sn->prefix < sn->digits && (c=='E' || c=='e')) {
617                         fail("%s : Exponent for hex/binary starts with %c", str_pipes, c);
618                         return;
619                 }
620                 if (sn->prefix >= sn->digits && (c=='P' || c=='p')) {
621                         fail("%s : Exponent for decimal starts with %c", str_pipes, c);
622                         return;
623                 }
624         }
625         
626         pass("%s%s", str_pipes, msg);
627         return;
628 }
629
630 static void test_scan_number_single(const char *str_pipes,
631                                 enum token_type type, size_t dots_found) {
632         char *str = malloc(strlen(str_pipes)+1);
633         const char *expected[5];
634         struct scan_number sn;
635         enum token_type given_type;
636         
637         {
638                 const char *s = str_pipes;
639                 char *d = str;
640                 size_t pipes = 0;
641                 
642                 expected[0] = d;
643                 for (;*s;s++) {
644                         if (*s == ' ')
645                                 continue;
646                         if (*s == '|') {
647                                 if (++pipes > 4)
648                                         goto fail_too_many_pipes;
649                                 expected[pipes] = d;
650                         } else
651                                 *d++ = *s;
652                 }
653                 *d = 0;
654                 
655                 if (pipes < 3)
656                         goto fail_not_enough_pipes;
657                 if (pipes == 3)
658                         expected[4] = d;
659         }
660         
661         given_type = scan_number(&sn, str, strchr(str,0));
662         
663         if (sn.prefix != expected[0]) {
664                 fail("%s : sn.prefix is wrong", str_pipes);
665                 return;
666         }
667         if (sn.digits != expected[1]) {
668                 fail("%s : sn.digits is wrong", str_pipes);
669                 return;
670         }
671         if (sn.exponent != expected[2]) {
672                 fail("%s : sn.exponent is wrong", str_pipes);
673                 return;
674         }
675         if (sn.suffix != expected[3]) {
676                 fail("%s : sn.suffix is wrong", str_pipes);
677                 return;
678         }
679         if (sn.end != expected[4]) {
680                 fail("%s : sn.end is wrong", str_pipes);
681                 return;
682         }
683         if (given_type != type) {
684                 fail("%s : Type incorrect", str_pipes);
685                 return;
686         }
687         if (sn.dots_found != dots_found) {
688                 fail("%s : sn.dots_found is %zu; should be %zu", str_pipes,
689                         sn.dots_found, dots_found);
690                 return;
691         }
692         
693         scan_number_sanity_check(&sn, type, str_pipes, "");
694         
695         free(str);
696         return;
697         
698 fail_too_many_pipes:
699         fail("Too many pipes in the test string \"%s\"; should be 3", str_pipes);
700         return;
701 fail_not_enough_pipes:
702         fail("Not enough pipes in the test string \"%s\"; should be 3", str_pipes);
703         return;
704 }
705
706 #define T(str, type, dots_found) test_scan_number_single(str,type,dots_found)
707
708 static void test_scan_number(void) {
709         T("0x | 50.1 | p+1 | f", TOK_FLOATING, 1);
710         T("| 100 || L", TOK_INTEGER, 0);
711         T("0 ||| b21", TOK_INTEGER, 0);
712         T("0b | 101 || L", TOK_INTEGER, 0);
713         T("0X | 7Af ||| \t2", TOK_INTEGER, 0);
714         T("0|||b", TOK_INTEGER, 0);
715         T("0|||x", TOK_INTEGER, 0);
716 }
717
718 #undef T
719
720 #define T(string, value, theBase, theSuffix) do { \
721         queue_init(mq, NULL); \
722         str = (string); \
723         type = scan_number(&sn, str, str+sizeof(string)-1); \
724         ok(type==TOK_INTEGER, "%s : type==TOK_INTEGER", str); \
725         scan_number_sanity_check(&sn, type, str, \
726                 " : scan_number_sanity_check passed"); \
727         read_integer(&integer, &sn, &mq); \
728         ok(integer.v==(value) && integer.base==(theBase) && \
729                 integer.suffix==(theSuffix), \
730                 "%s : Correct value and suffix", str); \
731         } while(0)
732 #define Q(name) do { \
733         if (queue_count(mq)) { \
734                 const char *path = dequeue(mq).path; \
735                 ok(!strcmp(path, "tokenize/read_cnumber/" #name), \
736                         "%s : Dequeued %s", str, path); \
737         } \
738         } while(0)
739 #define E() do { \
740         ok(queue_count(mq)==0, "%s : Message queue empty", str); \
741         if (queue_count(mq)) \
742                 tok_message_queue_dump(&mq); \
743         queue_free(mq); \
744         } while(0)
745
746 static void test_read_integer(void) {
747         struct scan_number sn;
748         tok_message_queue mq;
749         const char *str;
750         enum token_type type;
751         struct tok_integer integer;
752         
753         T("0b0lu", 0, 8, TOK_UL);
754         E();
755         
756         T("1", 1, 10, TOK_NOSUFFIX);
757         E();
758         
759         T("32Q", 32, 10, TOK_NOSUFFIX);
760         Q(integer_suffix_invalid);
761         E();
762         
763         T("32i", 32, 10, TOK_I);
764         E();
765         
766         T("0755f", 493, 8, TOK_NOSUFFIX);
767         Q(suffix_float_only);
768         E();
769         
770         T("0xDeadBeef", 0xDEADBEEF, 16, TOK_NOSUFFIX);
771         E();
772         
773         T("12345678901234567890$1_LONG.SUFFIX", 12345678901234567890ULL, 10, TOK_NOSUFFIX);
774         ok1(sn.end == strchr(str, 0));
775         Q(integer_suffix_invalid);
776         E();
777         
778         T("0xDEADBEEFlull", 0xDEADBEEF, 16, TOK_NOSUFFIX);
779         Q(integer_suffix_invalid);
780         E();
781         
782         T("0xBALLuu", 0xBA, 16, TOK_NOSUFFIX);
783         Q(integer_suffix_invalid);
784         E();
785         
786         T("123456789012345678901", 18446744073709551615ULL, 10, TOK_NOSUFFIX);
787         Q(integer_out_of_range);
788         E();
789         
790         T("09", 0, 8, TOK_NOSUFFIX);
791         Q(integer_invalid_digits);
792         E();
793 }
794
795 #undef T
796 #undef E
797
798 #define Teq(string, equals, theSuffix) do { \
799         queue_init(mq, NULL); \
800         str = malloc(sizeof(string)); \
801         memcpy(str, string, sizeof(string)); \
802         type = scan_number(&sn, str, str+sizeof(string)-1); \
803         ok(type==TOK_FLOATING, "%s : type==TOK_FLOATING", str); \
804         scan_number_sanity_check(&sn, type, str, \
805                 " : scan_number_sanity_check passed"); \
806         read_floating(&floating, &sn, &mq); \
807         ok((equals) && \
808                 floating.suffix==(theSuffix), \
809                 "%s : Correct value and suffix", str); \
810         } while(0)
811 #define T(string, value, theSuffix) \
812         Teq(string, fabsl(floating.v - (value)) <= 0.00000000000000001, theSuffix)
813 #define E() do { \
814         ok(queue_count(mq)==0, "%s : Message queue empty", str); \
815         if (queue_count(mq)) \
816                 tok_message_queue_dump(&mq); \
817         queue_free(mq); \
818         free(str); \
819         } while(0)
820
821 static void test_read_floating(void) {
822         struct scan_number sn;
823         tok_message_queue mq;
824         char *str; //str is a malloced copy so read_floating can do its null terminator trick
825         enum token_type type;
826         struct tok_floating floating;
827         
828         T("1.0", 1.0, TOK_NOSUFFIX);
829         E();
830         
831         T("0.0", 0.0, TOK_NOSUFFIX);
832         E();
833         
834         T("0755e1", 7550.0, TOK_NOSUFFIX);
835         E();
836         
837         T("0xD.Bp0", 0xD.Bp0, TOK_NOSUFFIX);
838         E();
839         
840         //GCC doesn't throw any errors or warnings for this odd case,
841         //but we call it an error to be consistent with strtold
842         T("0x.p0", 0.0, TOK_NOSUFFIX);
843         Q(floating_invalid_digits);
844         E();
845         
846         T("32.0Q", 32.0, TOK_NOSUFFIX);
847         Q(floating_suffix_invalid);
848         E();
849         
850         T("32.0Li", 32.0, TOK_IMAG_L);
851         E();
852         
853         T("32.0LL", 32.0, TOK_NOSUFFIX);
854         Q(suffix_integer_only);
855         E();
856         
857         Teq("0xDEAD.BEEF", floating.v==0.0, TOK_NOSUFFIX);
858         Q(hex_float_no_exponent);
859         E();
860         
861         T("0b101.0p0", 0, TOK_NOSUFFIX);
862         Q(binary_float);
863         E();
864         
865         /* If any of the following three tests fails, consider increasing
866            the e+ and e- values. */
867         
868         Teq("1.e+4933", isinf(floating.v), TOK_NOSUFFIX);
869         Q(floating_out_of_range);
870         E();
871         
872         /* for some reason, strtold sets errno=EDOM on x86, and
873            on my PowerPC G4 on Fedora 10, the same phenomenon occurs
874            but the exponents are e+309, e-324, and e-325 */
875         Teq("1.e-4951", floating.v==0.0, TOK_NOSUFFIX);
876         Q(floating_out_of_range);
877         E();
878         
879         Teq("1.e-4952", floating.v==0.0, TOK_NOSUFFIX);
880         Q(floating_out_of_range);
881         E();
882         
883 }
884
885 #undef Teq
886 #undef T
887 #undef Q
888 #undef E
889
890 struct tokenizer_test {
891         const char *txt;
892         size_t txt_size;
893         
894         const struct token *tokens;
895         size_t token_count;
896 };
897
898 #define T(txt, ...) {txt, sizeof(txt)-1, array_count_pair(struct token, __VA_ARGS__)}
899 #define string(txt) {.string=(darray_char[1]){{.item = (char *)(txt), .size = sizeof(txt)-1}}}
900 #define opkw(v) {.opkw = (v)}
901 #define txt(t) .txt = (t), .txt_size = sizeof(t)-1
902 #define integer(...) {.integer={__VA_ARGS__}}
903 #define floating(...) {.floating={__VA_ARGS__}}
904 #define space {.type = TOK_WHITE, .txt = " ", .txt_size = 1}
905 #define startline {.type = TOK_STARTLINE}
906 #define include(str) {.include = (char *)(str)}
907
908 struct tokenizer_msg_test {
909         struct tokenizer_test test;
910         
911         const char * const *messages;
912         size_t message_count;
913 };
914
915 #define M(...) array_count_pair(const char *, __VA_ARGS__)
916
917 struct tokenizer_test tokenizer_tests[] = {
918         { "", 0, 0 },
919         T("\n",
920                 {.type = TOK_WHITE, txt("\n")}
921         ),
922         T("\na",
923                 {.type = TOK_WHITE, txt("\n")},
924                 startline,
925                 {.type = TOK_IDENTIFIER, txt("a")}
926         ),
927         T("int n = c++;",
928                 {.type = TOK_KEYWORD,
929                         opkw(INT),
930                         txt("int")
931                 }, space,
932                 {.type = TOK_IDENTIFIER,
933                         txt("n")
934                 }, space,
935                 {.type = TOK_OPERATOR,
936                         opkw('='),
937                         txt("=")
938                 }, space,
939                 {.type = TOK_IDENTIFIER,
940                         txt("c")
941                 },
942                 {.type = TOK_OPERATOR,
943                         opkw(INC_OP),
944                         txt("++")
945                 },
946                 {.type = TOK_OPERATOR,
947                         opkw(';'),
948                         txt(";")
949                 }
950         ),
951         T(".5 42 ",
952                 {.type = TOK_FLOATING,
953                         floating(.5, TOK_NOSUFFIX),
954                         txt(".5")
955                 }, space,
956                 {.type = TOK_INTEGER,
957                         integer(42, 10, TOK_NOSUFFIX),
958                         txt("42")
959                 }, space,
960         ),
961         //Make sure TOK_STRAY doesn't take over the universe
962         T("``AS IS'' AND",
963                 {.type = TOK_STRAY,
964                         txt("``")
965                 },
966                 {.type = TOK_IDENTIFIER,
967                         txt("AS")
968                 }, space,
969                 {.type = TOK_IDENTIFIER,
970                         txt("IS")
971                 },
972                 {.type = TOK_CHAR,
973                         string(""),
974                         txt("\'\'")
975                 }, space,
976                 {.type = TOK_IDENTIFIER,
977                         txt("AND")
978                 }
979         ),
980         //Make sure starting with 0 doesn't result in skipping whitespace
981         T("0 .05 0 500",
982                 {.type = TOK_INTEGER,
983                         integer(0, 8, TOK_NOSUFFIX),
984                         txt("0")
985                 }, space,
986                 {.type = TOK_FLOATING,
987                         floating(.05, TOK_NOSUFFIX),
988                         txt(".05")
989                 }, space,
990                 {.type = TOK_INTEGER,
991                         integer(0, 8, TOK_NOSUFFIX),
992                         txt("0")
993                 }, space,
994                 {.type = TOK_INTEGER,
995                         integer(500, 10, TOK_NOSUFFIX),
996                         txt("500")
997                 }
998         ),
999         //Make sure a simple preprocessor directive works
1000         T("\t/*comment*/ #include \"include.h\"\n",
1001                 {.flags={1,0}, .type=TOK_WHITE, txt("\t")},
1002                 {.flags={1,0}, .type=TOK_CCOMMENT, txt("/*comment*/")},
1003                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1004                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1005                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(INCLUDE), txt("include")},
1006                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1007                 {.flags={1,0}, .type=TOK_STRING_IQUOTE, include("include.h"), txt("\"include.h\"")},
1008                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")}
1009         ),
1010         //Make sure __VA_ARGS__ is lexed correctly
1011         T("if #define __VA_ARGS__=0X5FULL;\n"
1012           " #define __VA_ARGS__(__VA_ARGS__, ...\t)__VA_ARGS__ bar int define",
1013                 {.type=TOK_KEYWORD, opkw(IF), txt("if")},
1014                 space,
1015                 {.type=TOK_OPERATOR, opkw('#'), txt("#")},
1016                 {.type=TOK_IDENTIFIER, txt("define")},
1017                 space,
1018                 {.type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1019                 {.type=TOK_OPERATOR, opkw('='), txt("=")},
1020                 {.type=TOK_INTEGER, integer(0x5F,16,TOK_ULL), txt("0X5FULL")},
1021                 {.type=TOK_OPERATOR, opkw(';'), txt(";")},
1022                 {.type=TOK_WHITE, txt("\n")},
1023                 {.flags={1,0}, .type=TOK_STARTLINE},
1024                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1025                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1026                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1027                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1028                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1029                 {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
1030                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1031                 {.flags={1,0}, .type=TOK_OPERATOR, opkw(','), txt(",")},
1032                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1033                 {.flags={1,0}, .type=TOK_OPERATOR, opkw(ELLIPSIS), txt("...")},
1034                 {.flags={1,0}, .type=TOK_WHITE, txt("\t")},
1035                 {.flags={1,0}, .type=TOK_OPERATOR, opkw(')'), txt(")")},
1036                 {.flags={1,0}, .type=TOK_KEYWORD, opkw(VA_ARGS), txt("__VA_ARGS__")},
1037                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1038                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
1039                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1040                 {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
1041                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1042                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")},
1043         ),
1044         //__VA_ARGS__ is an identifier if no ... operator is in the parameter list or if there is no parameter list
1045         T("#define foo __VA_ARGS__ bar int define\n#define foo() __VA_ARGS__ bar int define",
1046                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1047                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1048                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1049                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
1050                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1051                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1052                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1053                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
1054                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1055                 {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
1056                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1057                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")},
1058                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1059                 
1060                 {.flags={1,0}, .type=TOK_STARTLINE},
1061                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1062                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1063                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1064                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
1065                 {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
1066                 {.flags={1,0}, .type=TOK_OPERATOR, opkw(')'), txt(")")},
1067                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1068                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1069                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1070                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
1071                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1072                 {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
1073                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1074                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")}
1075         ),
1076         
1077         //Test various integer suffixen
1078         T("1 1u 1l 1ul 1lu 1ll 1ull 1llu 1U 1L 1UL 1LU 1LL 1ULL 1LLU "
1079           "1uq 1lq 1llq 1ulq 1luq 1f 1i",
1080                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1")}, space,
1081                 {.type=TOK_INTEGER, integer(1, 10, TOK_U), txt("1u")}, space,
1082                 {.type=TOK_INTEGER, integer(1, 10, TOK_L), txt("1l")}, space,
1083                 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1ul")}, space,
1084                 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1lu")}, space,
1085                 {.type=TOK_INTEGER, integer(1, 10, TOK_LL), txt("1ll")}, space,
1086                 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1ull")}, space,
1087                 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1llu")}, space,
1088                 {.type=TOK_INTEGER, integer(1, 10, TOK_U), txt("1U")}, space,
1089                 {.type=TOK_INTEGER, integer(1, 10, TOK_L), txt("1L")}, space,
1090                 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1UL")}, space,
1091                 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1LU")}, space,
1092                 {.type=TOK_INTEGER, integer(1, 10, TOK_LL), txt("1LL")}, space,
1093                 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1ULL")}, space,
1094                 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1LLU")}, space,
1095                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1uq")}, space,
1096                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1lq")}, space,
1097                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1llq")}, space,
1098                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1ulq")}, space,
1099                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1luq")}, space,
1100                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1f")}, space,
1101                 {.type=TOK_INTEGER, integer(1, 10, TOK_I), txt("1i")}
1102         ),
1103         //Test non-standard newlines
1104         T("\n\r\n \r\n\rint",
1105                 {.type=TOK_WHITE, txt("\n\r")}, startline,
1106                 {.type=TOK_WHITE, txt("\n")}, startline,
1107                 space,
1108                 {.type=TOK_WHITE, txt("\r\n")}, startline,
1109                 {.type=TOK_WHITE, txt("\r")}, startline,
1110                 {.type=TOK_KEYWORD, opkw(INT), txt("int")}
1111         ),
1112         //Test backslash-broken lines
1113         T("oner\\ \nous",
1114                 {.type=TOK_IDENTIFIER, txt("onerous")}
1115         ),
1116         T("\\\n\\\n\\\n\\",
1117                 {.type=TOK_STRAY, txt("\\")}
1118         ),
1119         T("in\\\nt i\\;\nf\\ \r\nor (i=0; i<10; i++) {\\",
1120                 {.type=TOK_KEYWORD, opkw(INT), txt("int")}, space,
1121                 {.type=TOK_IDENTIFIER, txt("i")},
1122                 {.type=TOK_STRAY, txt("\\")},
1123                 {.type=TOK_OPERATOR, opkw(';'), txt(";")},
1124                 {.type=TOK_WHITE, txt("\n")},
1125                 
1126                 startline,
1127                 {.type=TOK_KEYWORD, opkw(FOR), txt("for")}, space,
1128                 {.type=TOK_OPERATOR, opkw('('), txt("(")},
1129                 {.type=TOK_IDENTIFIER, txt("i")},
1130                 {.type=TOK_OPERATOR, opkw('='), txt("=")},
1131                 {.type=TOK_INTEGER, integer(0,8,0), txt("0")},
1132                 {.type=TOK_OPERATOR, opkw(';'), txt(";")}, space,
1133                 {.type=TOK_IDENTIFIER, txt("i")},
1134                 {.type=TOK_OPERATOR, opkw('<'), txt("<")},
1135                 {.type=TOK_INTEGER, integer(10,10,0), txt("10")},
1136                 {.type=TOK_OPERATOR, opkw(';'), txt(";")}, space,
1137                 {.type=TOK_IDENTIFIER, txt("i")},
1138                 {.type=TOK_OPERATOR, opkw(INC_OP), txt("++")},
1139                 {.type=TOK_OPERATOR, opkw(')'), txt(")")}, space,
1140                 {.type=TOK_OPERATOR, opkw('{'), txt("{")},
1141                 {.type=TOK_STRAY, txt("\\")}
1142         ),
1143         //More preprocessor directive tests
1144         T("#apple\n#pragma\n#const\n#define \t\n#define foo(x",
1145                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1146                 {.flags={1,1}, .type=TOK_IDENTIFIER, txt("apple")},
1147                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1148                 
1149                 {.flags={1,0}, .type=TOK_STARTLINE},
1150                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1151                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(PRAGMA), txt("pragma")},
1152                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1153                 
1154                 {.flags={1,0}, .type=TOK_STARTLINE},
1155                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1156                 {.flags={1,1}, .type=TOK_IDENTIFIER, txt("const")},
1157                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1158                 
1159                 {.flags={1,0}, .type=TOK_STARTLINE},
1160                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1161                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1162                 {.flags={1,0}, .type=TOK_WHITE, txt(" \t")},
1163                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1164                 
1165                 {.flags={1,0}, .type=TOK_STARTLINE},
1166                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1167                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1168                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1169                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
1170                 {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
1171                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("x")}
1172         ),
1173         T("#define",
1174                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1175                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")}
1176         ),
1177         T("#define foo",
1178                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1179                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1180                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1181                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")}
1182         ),
1183         T("`#define foo",
1184                 {.type=TOK_STRAY, txt("`")},
1185                 {.type=TOK_OPERATOR, opkw('#'), txt("#")},
1186                 {.type=TOK_IDENTIFIER, txt("define")},
1187                 space,
1188                 {.type=TOK_IDENTIFIER, txt("foo")}
1189         )
1190 };
1191
1192 struct tokenizer_msg_test tokenizer_msg_tests[] = {
1193         {T("/* Unterminated C comment",
1194                 {.type=TOK_CCOMMENT, txt("/* Unterminated C comment")}
1195         ), M(
1196                 "unterminated_comment"
1197         )},
1198         {T("\"\n\"\"\n",
1199                 {.type=TOK_STRING, string("\n"), txt("\"\n\"")},
1200                 {.type=TOK_STRING, string("\n"), txt("\"\n")}
1201         ), M(
1202                 "read_cstring/quote_newlines",
1203                 "read_cstring/missing_endquote"
1204         )},
1205 };
1206
1207 #undef T
1208 #undef string
1209 #undef opkw
1210 #undef txt
1211 #undef integer
1212 #undef floating
1213 #undef M
1214 #undef include
1215
1216 static void test_tokenizer_single(struct tokenizer_test *t, tok_message_queue *mq) {
1217         struct token_list *tl;
1218         size_t i, count = t->token_count, gen_count;
1219         const struct token *tok_gen, *tok_correct;
1220         int success = 1;
1221         char *txt = talloc_memdup(NULL, t->txt, t->txt_size);
1222         size_t txt_size = t->txt_size;
1223         #define failed(fmt, ...) do { \
1224                 printf("Error: " fmt "\n", ##__VA_ARGS__); \
1225                 success = 0; \
1226                 goto done; \
1227         } while(0)
1228         
1229         tl = tokenize(txt, txt, txt_size, mq);
1230         
1231         if (tl->orig != txt || tl->orig_size != txt_size)
1232                 failed("tokenize() did not replicate orig/orig_size from arguments");
1233         if (!token_list_sanity_check(tl, stdout))
1234                 failed("Sanity check failed");
1235         
1236         gen_count = token_list_count(tl);
1237         if (gen_count != count+1)
1238                 failed("Incorrect number of tokens (%zu, should be %zu)\n",
1239                         gen_count, count+1);
1240         
1241         tok_gen = tl->first->next; //skip the beginning TOK_STARTLINE
1242         tok_correct = t->tokens;
1243         for (i=0; i<count; i++, tok_gen=tok_gen->next, tok_correct++) {
1244                 if (tok_gen->type != tok_correct->type)
1245                         failed("Token \"%s\": Incorrect type", tok_correct->txt);
1246                 {
1247                         struct token_flags g=tok_gen->flags, c=tok_correct->flags;
1248                         if (g.pp!=c.pp || g.pp_directive!=c.pp_directive)
1249                                 failed("Token \"%s\": Incorrect flags", tok_correct->txt);
1250                 }
1251                 switch (tok_gen->type) {
1252                         case TOK_INTEGER:
1253                                 if (tok_gen->integer.v != tok_correct->integer.v ||
1254                                     tok_gen->integer.base != tok_correct->integer.base ||
1255                                     tok_gen->integer.suffix != tok_correct->integer.suffix)
1256                                         failed("Token \"%s\": Integer value/base/suffix incorrect", tok_correct->txt);;
1257                                 break;
1258                         case TOK_FLOATING:
1259                                 if (fabsl(tok_gen->floating.v - tok_correct->floating.v) > 0.00000000000000001 ||
1260                                     tok_gen->floating.suffix != tok_correct->floating.suffix)
1261                                         failed("Token \"%s\": Floating point value/suffix incorrect", tok_correct->txt);
1262                                 break;
1263                         case TOK_OPERATOR:
1264                                 if (tok_gen->opkw != tok_correct->opkw)
1265                                         failed("Token \"%s\": Operator opkw incorrect", tok_correct->txt);
1266                                 break;
1267                         case TOK_KEYWORD:
1268                                 if (tok_gen->opkw != tok_correct->opkw)
1269                                         failed("Token \"%s\": Keyword opkw incorrect", tok_correct->txt);
1270                                 break;
1271                         case TOK_CHAR:
1272                         case TOK_STRING:
1273                                 //anything using string
1274                                 if (tok_gen->string->size != tok_correct->string->size ||
1275                                         memcmp(tok_gen->string->item, tok_correct->string->item,
1276                                         tok_gen->string->size) ||
1277                                         tok_gen->string->item[tok_gen->string->size] != 0 )
1278                                         failed("Token \"%s\": String value incorrect", tok_correct->txt);
1279                                 break;
1280                         case TOK_STRING_IQUOTE:
1281                         case TOK_STRING_IANGLE:
1282                                 if (strcmp(tok_gen->include, tok_correct->include))
1283                                         failed("Token \"%s\": #include string incorrect", tok_correct->txt);
1284                                 break;
1285                         case TOK_IDENTIFIER:
1286                         case TOK_CCOMMENT:
1287                         case TOK_CPPCOMMENT:
1288                         case TOK_WHITE:
1289                         case TOK_STARTLINE:
1290                         case TOK_STRAY:
1291                                 break;
1292                 }
1293                 if (tok_gen->type!=TOK_STARTLINE && (
1294                         tok_gen->txt_size != tok_correct->txt_size ||
1295                         memcmp(tok_gen->txt, tok_correct->txt, tok_gen->txt_size))
1296                         )
1297                         failed("Token \"%s\": txt incorrect", tok_correct->txt);
1298         }
1299         
1300         #undef failed
1301 done:
1302         ok(success==1, "Tokenize %s", t->txt);
1303         
1304         if (!success)
1305                 token_list_dump(tl, stdout);
1306         
1307         talloc_free(txt);
1308 }
1309
1310 static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) {
1311         FILE *f = fopen(file_name, "rb");
1312         darray_char *text = talloc_darray(NULL);
1313         const size_t inc = 1024;
1314         struct token_list *tl;
1315         
1316         if (!f) {
1317                 fail("Could not read file '%s': %s", file_name, strerror(errno));
1318                 goto end;
1319         }
1320         
1321         for (;;) {
1322                 size_t read_len;
1323                 
1324                 darray_realloc(*text, text->size+inc+1);
1325                 read_len = fread(text->item+text->size, 1, inc, f);
1326                 text->size += read_len;
1327                 text->item[text->size] = 0;
1328                 
1329                 if (read_len < inc)
1330                         break;
1331                 
1332         }
1333         if (ferror(f)) {
1334                 fail("Error reading file '%s': %s", file_name, strerror(errno));
1335                 goto end;
1336         }
1337         
1338         tl = tokenize(text, text->item, text->size, mq);
1339         tl->filename = file_name;
1340         
1341         //printf("File '%s' has %zu tokens\n", file_name, token_list_count(tl));
1342         //token_list_dump(tl, stdout);
1343         
1344         if (!token_list_sanity_check(tl, stdout)) {
1345                 fail("Sanity check failed for file '%s'", file_name);
1346                 goto end;
1347         }
1348         
1349         pass("File '%s' has %zu tokens", file_name, token_list_count(tl));
1350         
1351         /*while (queue_count(*mq)) {
1352                 struct tok_message msg = dequeue(*mq);
1353                 tok_message_print(&msg, tl);
1354         }*/
1355         
1356 end:
1357         talloc_free(text);
1358         if (f)
1359                 fclose(f);
1360 }
1361
1362 static void test_tokenizer(void) {
1363         tok_message_queue mq;
1364         size_t i, count;
1365         int has_warn_or_worse = 0;
1366         
1367         queue_init(mq, NULL);
1368         
1369         count = sizeof(tokenizer_tests)/sizeof(*tokenizer_tests);
1370         for (i=0; i<count; i++) {
1371                 test_tokenizer_single(tokenizer_tests+i, &mq);
1372                 while (queue_count(mq)) {
1373                         struct tok_message msg = dequeue(mq);
1374                         (void) msg;
1375                         //tok_message_dump(&msg);
1376                 }
1377         }
1378         
1379         count = sizeof(tokenizer_msg_tests)/sizeof(*tokenizer_msg_tests);
1380         for (i=0; i<count; i++) {
1381                 size_t j;
1382                 test_tokenizer_single(&tokenizer_msg_tests[i].test, &mq);
1383                 
1384                 if (queue_count(mq) != tokenizer_msg_tests[i].message_count) {
1385                         fail("Incorrect number of messages from tokenize()");
1386                         while (queue_count(mq))
1387                                 (void) dequeue(mq);
1388                         goto msg_fail;
1389                 }
1390                 
1391                 for (j=0; queue_count(mq); j++) {
1392                         struct tok_message msg = dequeue(mq);
1393                         const char *base = "tokenize/";
1394                         size_t baselen = strlen(base);
1395                         //tok_message_dump(&msg);
1396                         
1397                         if (strncmp(msg.path, base, baselen)) {
1398                                 fail("Message from tokenize() doesn't start with \"%s\"",
1399                                         base);
1400                                 goto msg_fail;
1401                         }
1402                         if (strcmp(msg.path+baselen,
1403                                         tokenizer_msg_tests[i].messages[j])) {
1404                                 fail("Incorrect message %s, should be %s",
1405                                         msg.path+baselen, tokenizer_msg_tests[i].messages[j]);
1406                                 goto msg_fail;
1407                         }
1408                 }
1409                 
1410                 pass("Messages from tokenize() are correct");
1411         msg_fail:;
1412         }
1413         
1414         test_tokenizer_file("test/run.c", &mq);
1415         
1416         while (queue_count(mq)) {
1417                 struct tok_message msg = dequeue(mq);
1418                 if (msg.level >= TM_WARN) {
1419                         has_warn_or_worse = 1;
1420                         tok_message_dump(&msg);
1421                 }
1422                 //else tok_message_dump(&msg);
1423         }
1424         
1425         ok(has_warn_or_worse==0, "Tokenizing run.c generated%s warnings, errors, or bugs",
1426                 has_warn_or_worse ? "" : " no");
1427         
1428         queue_free(mq);
1429 }
1430
1431 #include <unistd.h>
1432
1433 int main(void)
1434 {
1435         plan_tests(195);
1436         
1437         diag("* Checking queue...");
1438         test_queue();
1439         
1440         diag("* Checking read_cstring...");
1441         test_read_cstring();
1442         
1443         diag("* Checking dict...");
1444         test_dict();
1445         
1446         diag("* Checking charflag...");
1447         test_charflag();
1448         
1449         diag("* Checking readui...");
1450         test_readui();
1451         
1452         diag("* Checking scan_number...");
1453         test_scan_number();
1454         
1455         diag("* Checking read_integer...");
1456         test_read_integer();
1457         
1458         diag("* Checking read_floating...");
1459         test_read_floating();
1460         
1461         diag("* Checking tokenizer...");
1462         test_tokenizer();
1463         
1464         /* This exits depending on whether all tests passed */
1465         return exit_status();
1466 }