2 Copyright (c) 2009 Joseph A. Adams
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
8 1. Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 2. Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 3. The name of the author may not be used to endorse or promote products
14 derived from this software without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include <ccan/ccan_tokenizer/read_cnumber.c>
29 #include <ccan/ccan_tokenizer/read_cstring.c>
30 #include <ccan/ccan_tokenizer/dict.c>
31 #include <ccan/ccan_tokenizer/ccan_tokenizer.c>
32 #include <ccan/ccan_tokenizer/queue.c>
33 #include <ccan/ccan_tokenizer/charflag.c>
35 #include <ccan/ccan_tokenizer/ccan_tokenizer.h>
37 #include <ccan/tap/tap.h>
41 #define array_count_pair(type, ...) (const type []){__VA_ARGS__}, sizeof((const type []){__VA_ARGS__})/sizeof(type)
43 static void test_read_cstring(void) {
44 #define next() do {darray_free(str); darray_init(str); csp++;} while(0)
46 #define verify_quotechar(correct, correct_continuation_offset, quotechar) do { \
47 const size_t s = sizeof(correct)-1; \
48 p = read_cstring(&str, cs, cs ? strchr(cs, 0) : NULL, quotechar, &mq); \
49 ok(str.size==s && str.alloc>s && str.item[s]==0 && \
50 !memcmp(str.item, correct, s), \
51 "\"%s: Is output correct?", cs); \
52 ok(p == cs+correct_continuation_offset, "\"%s: Is continuation pointer correct?", cs); \
55 #define verify(correct, correct_continuation_offset) verify_quotechar(correct, correct_continuation_offset, '"')
57 const char * const cstrings[] = {
63 "\\\\\\f\\e\\b\\0\\a\\r\\n\\w\\t\\v\\\'\\\"\"",
64 "\\\\\\f\\e\\b\\0\\a\\r\\n\\w\\t\\v\\\'\\\"\'",
67 "Tab: '\\011' Space: '\\040' Overflow: '\\777' Ambiguous: '\\1013'\"",
68 "\\x50\\x35\\x12\\xEF\\xFE\\x00012\\x345\""
70 const char * const *csp = cstrings;
72 darray_char str = darray_new();
80 //Check an empty input
83 //Check an empty quote-terminated string
86 //Check a simple string
87 verify("Hello world!", 12);
89 //Check a simple string without an end quote
90 verify("Hello world!", 12);
92 //Check a collection of single-character sequences
93 verify("\\\f\e\b\0\a\r\nw\t\v\'\"", 26);
95 //Check same collection of single-character sequences, this time using a single quote terminator
96 verify_quotechar("\\\f\e\b\0\a\r\nw\t\v\'\"", 26, '\'');
98 //Check a real UTF-8 string
99 verify("\xd8\xa7\xd9\x84\xd8\xa3\xd8\xaf\xd8\xa8\x20\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a", 23);
101 //Check string ending in backslash
102 verify("Ends with \\", 11);
104 //Check a series of octal escapes
105 verify("Tab: '\t' Space: ' ' Overflow: '\377' Ambiguous: 'A3'", 61);
107 //Check a series of hex escapes
108 verify("\x50\x35\x12\xEF\xFE\x12\x45", 32);
112 //tok_message_queue_dump(&mq);
114 //Verify the message queue
117 struct tok_message m;
118 struct tok_message correct_messages[] = {
119 {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
120 {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
121 {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
122 {.level=TM_WARN, .path="tokenize/read_cstring/unknown_escape"},
123 //{.level=TM_INFO, .path="tokenize/read_cstring/escaped_single_quote"},
124 {.level=TM_WARN, .path="tokenize/read_cstring/unknown_escape"},
125 //{.level=TM_INFO, .path="tokenize/read_cstring/escaped_double_quote"},
126 {.level=TM_ERROR, .path="tokenize/read_cstring/ended_in_backslash"},
127 {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
128 {.level=TM_WARN, .path="tokenize/read_cstring/octal_overflow"},
129 {.level=TM_INFO, .path="tokenize/read_cstring/ambiguous_octal"},
130 {.level=TM_WARN, .path="tokenize/read_cstring/ambiguous_hex"},
131 {.level=TM_WARN, .path="tokenize/read_cstring/ambiguous_hex"},
132 {.level=TM_WARN, .path="tokenize/read_cstring/hex_overflow"},
134 size_t i, e=sizeof(correct_messages)/sizeof(*correct_messages);
136 while(queue_count(mq) && queue_next(mq).level==TM_DEBUG)
138 for (i=0; i<e; i++) {
139 if (!queue_count(mq))
142 if (m.level != correct_messages[i].level)
144 if (strcmp(m.path, correct_messages[i].path))
146 while(queue_count(mq) && queue_next(mq).level==TM_DEBUG)
150 printf("Item %zu is incorrect\n", i);
151 ok(i==e, "Is message queue correct?");
152 ok(!queue_count(mq), "Message queue should be empty now.");
158 #undef verify_quotechar
163 static void p(const char *str) {
171 static void test_queue(void) {
172 #define next() do {queue_free(q); queue_init(q, NULL);} while(0)
174 const char * const s[] = {
192 queue(const char*) q;
201 ok(queue_count(q) == 6, "Checking queue count");
203 ok(dequeue_check(q)==s[0] &&
204 dequeue_check(q)==s[1] &&
205 dequeue_check(q)==s[2], "Dequeuing/checking 3 items");
206 ok(queue_count(q) == 3, "Checking queue count");
218 ok(queue_count(q) == 13, "Checking queue count");
220 ok(dequeue_check(q)==s[3] &&
221 dequeue_check(q)==s[4] &&
222 dequeue_check(q)==s[5] &&
223 dequeue_check(q)==s[6] &&
224 dequeue_check(q)==s[7] &&
225 dequeue_check(q)==s[8] &&
226 dequeue_check(q)==s[9] &&
227 dequeue_check(q)==s[10] &&
228 dequeue_check(q)==s[11] &&
229 dequeue_check(q)==s[12] &&
230 dequeue_check(q)==s[13] &&
231 dequeue_check(q)==s[14] &&
232 dequeue_check(q)==s[15], "Are queue items correct?");
233 ok(dequeue_check(q)==NULL && dequeue_check(q)==NULL && queue_count(q)==0, "Does queue run out correctly?");
240 #define test_dict_single() _test_dict_single(dict, str, sizeof(str)-1, correct, sizeof(correct)/sizeof(*correct))
241 static void _test_dict_single(struct dict *dict, const char *str, size_t len, int *correct, size_t correct_count) {
242 const char *s=str, *e=str+len;
244 struct dict_entry *entry;
246 for (i=0; s<e && i<correct_count; i++) {
247 const char *s_last = s;
248 entry = dict_lookup(dict, &s, e);
251 break; //dict_lookup should not modify *sp when it returns NULL
253 if (correct[i] != -100)
257 if (correct[i] != entry->id)
265 size_t len = strlen(entry->str);
268 if (strncmp(entry->str, s-len, len))
271 //printf("Correctly read %s\n", entry->str);
274 if (s!=e || i!=correct_count) {
275 printf("Tokenization failed at ");
276 fwrite(s, 1, e-s, stdout);
280 ok(s==e && i==correct_count, "All of the tokens are correct");
283 static void test_dict(void) {
284 struct dict_entry dict_orig[] = {
297 {11, "neighborhood"},
307 {21, "\x80\x12\x34"},
311 struct dict *dict = dict_build(NULL, dict_orig, sizeof(dict_orig)/sizeof(*dict_orig));
314 const char *s=NULL, *e=NULL;
315 ok(dict_lookup(dict, &s, e)==NULL && s==NULL && e==NULL, "dict_lookup does nothing and returns null on empty input");
319 const char str[] = "it's a beautiful day in the neighborhood\0won't you be my neighbor?";
320 int correct[] = {2,0, 3,0, 4,0, 6,0, 8,0, 9,0, 11,-1, 13, 14,0, 5,0, 15,0, 12, -100};
324 //check equal-length tokens
326 const char str[] = "it'sitem initip";
327 int correct[] = {2,17,0, 8,1,18};
331 //check mostly invalid tokens
333 const char str[] = "&^&beaumx yo youthx";
334 int correct[] = {-100,-100,-100, 5,3,-100,-100,-100, 0,-100,-100, 14,10,-100,-100};
338 //check tokens that start with a character greater than 0x7F
340 const char str[] = "\x80\x12\x34\x80\x32\x80\x32\x34\xFF\xFA\xFF\xEE";
341 int correct[] = {21, 22, 23, 19, 20};
347 //make sure dict_build doesn't blow up on an empty dictionary
348 dict = dict_build(NULL, NULL, 0);
352 static void test_charflag(void) {
356 #define CONTROL do { \
357 if (ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
358 !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
363 if (!ccontrol(i) && cspace(i) && !creturn(i) && cwhite(i) && \
364 !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
368 #define RETURN do { \
369 if (!ccontrol(i) && !cspace(i) && creturn(i) && cwhite(i) && \
370 !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
374 #define SYMBOL do { \
375 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
376 !cdigit(i) && !cletter(i) && !chex(i) && csymbol(i) && \
381 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
382 cdigit(i) && !cletter(i) && chex(i) && !csymbol(i) && \
386 #define LETTER_HEX do { \
387 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
388 !cdigit(i) && cletter(i) && chex(i) && !csymbol(i) && \
392 #define LETTER do { \
393 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
394 !cdigit(i) && cletter(i) && !chex(i) && !csymbol(i) && \
398 #define EXTENDED do { \
399 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
400 !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
405 for (i=0; i<'\t'; i++) CONTROL;
411 for (i='\r'+1; i<' '; i++) CONTROL;
413 for (i='!'; i<='/'; i++) SYMBOL;
414 for (i='0'; i<='9'; i++) DIGIT;
415 for (i=':'; i<='@'; i++) SYMBOL;
416 for (i='A'; i<='F'; i++) LETTER_HEX;
417 for (i='G'; i<='Z'; i++) LETTER;
418 for (i='['; i<='`'; i++) SYMBOL;
419 for (i='a'; i<='f'; i++) LETTER_HEX;
420 for (i='g'; i<='z'; i++) LETTER;
421 for (i='{'; i<='~'; i++) SYMBOL;
424 ok(correct==128, "ASCII characters have correct charflags");
427 //We do some goofy stuff here to make sure sign extension doesn't cause problems with charflags
432 for (ui=128; ui<=255; ui++) {
436 for (si=-128; si<0; si++) {
443 for (i=-128; i<0; i++) EXTENDED;
447 for (i=128; i<=255; i++) EXTENDED;
450 ok(correct==512, "Extended characters have correct charflags");
467 uint64_t correct_integer;
469 size_t correct_advance;
472 #define T(txt, ...) {txt, sizeof(txt)-1, __VA_ARGS__}
473 #define M (18446744073709551615ULL)
475 struct readui_test readui_tests[] = {
477 T("0",READUI_DEC, 0,0,1),
478 T(" \t42 ",READUI_DEC, 42,0,4),
481 T("BADBEEFDEADBAT",READUI_HEX, 0xBADBEEFDEADBAULL,0,13),
482 T("7559",READUI_OCT, 0755,0,3),
483 T("01010010110012",READUI_BIN, 2649,0,13),
484 T("1000000000",0x7F, 8594754748609397887ULL,0,10),
487 T("",READUI_DEC, 0,EINVAL,0),
488 T("18446744073709551616",
489 READUI_DEC,M,ERANGE,20),
490 T("1000000000000000000000000",
491 READUI_DEC,M,ERANGE,25),
492 T("10000000000000000",
493 READUI_HEX,M,ERANGE,17),
494 T("10000000000000000000000000000000000000000000000000000000000000000",
495 READUI_BIN,M,ERANGE,65),
498 T("9000000000",0x7F, M,ERANGE,10),
501 T("18446744073709551615",READUI_DEC, M,0,20),
504 static void test_readui_single(struct readui_test *test) {
505 uint64_t result_integer;
507 size_t result_advance;
509 const char *s = test->txt, *e = s+test->txt_size;
511 result_integer = readui(&s, e, test->base);
512 result_errno = errno;
513 result_advance = s-test->txt;
515 ok(result_integer == test->correct_integer &&
516 result_errno == test->correct_errno &&
517 result_advance == test->correct_advance,
518 "Testing \"%s\"", test->txt);
521 static void test_readui(void) {
522 size_t i, count = sizeof(readui_tests)/sizeof(*readui_tests);
524 for (i=0; i<count; i++)
525 test_readui_single(readui_tests+i);
531 static void scan_number_sanity_check(const struct scan_number *sn,
532 enum token_type type, const char *str_pipes, const char *msg) {
533 //If there is a prefix, it should follow
534 //the pattern (0 [B X b x]*0..1)
535 if (sn->prefix < sn->digits) {
536 int len = sn->digits - sn->prefix;
537 if (len!=1 && len!=2) {
538 fail("%s : Prefix length is %d; should be 1 or 2",
542 if (sn->prefix[0] != '0') {
543 fail("%s : Prefix does not start with 0",
547 if (len==2 && !strchr("BXbx", sn->prefix[1])) {
548 fail("%s : Prefix is 0%c; should be 0, 0b, or 0x",
549 str_pipes, sn->prefix[1]);
552 if (len==1 && type==TOK_FLOATING) {
553 fail("%s : Octal prefix appears on floating point number",
558 //if there is no prefix, the first digit should not be 0
559 // unless this is a floating point number
560 if (sn->digits < sn->exponent && sn->digits[0]=='0' &&
562 fail("%s : First digit of non-prefix integer is 0",
568 //Make sure sn->digits contains valid digits and is not empty
569 // (unless prefix is "0")
571 const char *s = sn->digits, *e = sn->exponent;
572 if (sn->prefix+1 < sn->digits) {
574 fail("%s : 0%c not followed by any digits",
575 str_pipes, sn->prefix[1]);
578 if (sn->prefix[1] == 'X' || sn->prefix[1] == 'x') {
579 while (s<e && strchr(
580 "0123456789ABCDEFabcdef.", *s)) s++;
582 if (s[0]!='0' && s[0]!='1') {
583 fail("%s: Binary prefix not followed by a 0 or 1",
587 while (s<e && strchr(
588 "0123456789.", *s)) s++;
591 if (type==TOK_FLOATING && s >= e) {
592 fail("%s : sn->digits is empty in a floating point number",
596 if (sn->prefix >= sn->digits && s >= e) {
597 fail("%s : both sn->prefix and sn->digits are empty",
601 while (s<e && strchr("0123456789.", *s)) s++;
604 fail("%s : sn->digits is not entirely valid", str_pipes);
609 //Make sure exponent follows the rules
610 if (sn->exponent < sn->suffix) {
611 char c = sn->exponent[0];
612 if (type==TOK_INTEGER) {
613 fail("%s : sn->exponent is not empty in an integer", str_pipes);
616 if (sn->prefix < sn->digits && (c=='E' || c=='e')) {
617 fail("%s : Exponent for hex/binary starts with %c", str_pipes, c);
620 if (sn->prefix >= sn->digits && (c=='P' || c=='p')) {
621 fail("%s : Exponent for decimal starts with %c", str_pipes, c);
626 pass("%s%s", str_pipes, msg);
630 static void test_scan_number_single(const char *str_pipes,
631 enum token_type type, size_t dots_found) {
632 char *str = malloc(strlen(str_pipes)+1);
633 const char *expected[5];
634 struct scan_number sn;
635 enum token_type given_type;
638 const char *s = str_pipes;
648 goto fail_too_many_pipes;
656 goto fail_not_enough_pipes;
661 given_type = scan_number(&sn, str, strchr(str,0));
663 if (sn.prefix != expected[0]) {
664 fail("%s : sn.prefix is wrong", str_pipes);
667 if (sn.digits != expected[1]) {
668 fail("%s : sn.digits is wrong", str_pipes);
671 if (sn.exponent != expected[2]) {
672 fail("%s : sn.exponent is wrong", str_pipes);
675 if (sn.suffix != expected[3]) {
676 fail("%s : sn.suffix is wrong", str_pipes);
679 if (sn.end != expected[4]) {
680 fail("%s : sn.end is wrong", str_pipes);
683 if (given_type != type) {
684 fail("%s : Type incorrect", str_pipes);
687 if (sn.dots_found != dots_found) {
688 fail("%s : sn.dots_found is %zu; should be %zu", str_pipes,
689 sn.dots_found, dots_found);
693 scan_number_sanity_check(&sn, type, str_pipes, "");
699 fail("Too many pipes in the test string \"%s\"; should be 3", str_pipes);
701 fail_not_enough_pipes:
702 fail("Not enough pipes in the test string \"%s\"; should be 3", str_pipes);
706 #define T(str, type, dots_found) test_scan_number_single(str,type,dots_found)
708 static void test_scan_number(void) {
709 T("0x | 50.1 | p+1 | f", TOK_FLOATING, 1);
710 T("| 100 || L", TOK_INTEGER, 0);
711 T("0 ||| b21", TOK_INTEGER, 0);
712 T("0b | 101 || L", TOK_INTEGER, 0);
713 T("0X | 7Af ||| \t2", TOK_INTEGER, 0);
714 T("0|||b", TOK_INTEGER, 0);
715 T("0|||x", TOK_INTEGER, 0);
720 #define T(string, value, theBase, theSuffix) do { \
721 queue_init(mq, NULL); \
723 type = scan_number(&sn, str, str+sizeof(string)-1); \
724 ok(type==TOK_INTEGER, "%s : type==TOK_INTEGER", str); \
725 scan_number_sanity_check(&sn, type, str, \
726 " : scan_number_sanity_check passed"); \
727 read_integer(&integer, &sn, &mq); \
728 ok(integer.v==(value) && integer.base==(theBase) && \
729 integer.suffix==(theSuffix), \
730 "%s : Correct value and suffix", str); \
732 #define Q(name) do { \
733 if (queue_count(mq)) { \
734 const char *path = dequeue(mq).path; \
735 ok(!strcmp(path, "tokenize/read_cnumber/" #name), \
736 "%s : Dequeued %s", str, path); \
740 ok(queue_count(mq)==0, "%s : Message queue empty", str); \
741 if (queue_count(mq)) \
742 tok_message_queue_dump(&mq); \
746 static void test_read_integer(void) {
747 struct scan_number sn;
748 tok_message_queue mq;
750 enum token_type type;
751 struct tok_integer integer;
753 T("0b0lu", 0, 8, TOK_UL);
756 T("1", 1, 10, TOK_NOSUFFIX);
759 T("32Q", 32, 10, TOK_NOSUFFIX);
760 Q(integer_suffix_invalid);
763 T("32i", 32, 10, TOK_I);
766 T("0755f", 493, 8, TOK_NOSUFFIX);
767 Q(suffix_float_only);
770 T("0xDeadBeef", 0xDEADBEEF, 16, TOK_NOSUFFIX);
773 T("12345678901234567890$1_LONG.SUFFIX", 12345678901234567890ULL, 10, TOK_NOSUFFIX);
774 ok1(sn.end == strchr(str, 0));
775 Q(integer_suffix_invalid);
778 T("0xDEADBEEFlull", 0xDEADBEEF, 16, TOK_NOSUFFIX);
779 Q(integer_suffix_invalid);
782 T("0xBALLuu", 0xBA, 16, TOK_NOSUFFIX);
783 Q(integer_suffix_invalid);
786 T("123456789012345678901", 18446744073709551615ULL, 10, TOK_NOSUFFIX);
787 Q(integer_out_of_range);
790 T("09", 0, 8, TOK_NOSUFFIX);
791 Q(integer_invalid_digits);
798 #define Teq(string, equals, theSuffix) do { \
799 queue_init(mq, NULL); \
800 str = malloc(sizeof(string)); \
801 memcpy(str, string, sizeof(string)); \
802 type = scan_number(&sn, str, str+sizeof(string)-1); \
803 ok(type==TOK_FLOATING, "%s : type==TOK_FLOATING", str); \
804 scan_number_sanity_check(&sn, type, str, \
805 " : scan_number_sanity_check passed"); \
806 read_floating(&floating, &sn, &mq); \
808 floating.suffix==(theSuffix), \
809 "%s : Correct value and suffix", str); \
811 #define T(string, value, theSuffix) \
812 Teq(string, fabsl(floating.v - (value)) <= 0.00000000000000001, theSuffix)
814 ok(queue_count(mq)==0, "%s : Message queue empty", str); \
815 if (queue_count(mq)) \
816 tok_message_queue_dump(&mq); \
821 static void test_read_floating(void) {
822 struct scan_number sn;
823 tok_message_queue mq;
824 char *str; //str is a malloced copy so read_floating can do its null terminator trick
825 enum token_type type;
826 struct tok_floating floating;
828 T("1.0", 1.0, TOK_NOSUFFIX);
831 T("0.0", 0.0, TOK_NOSUFFIX);
834 T("0755e1", 7550.0, TOK_NOSUFFIX);
837 T("0xD.Bp0", 0xD.Bp0, TOK_NOSUFFIX);
840 //GCC doesn't throw any errors or warnings for this odd case,
841 //but we call it an error to be consistent with strtold
842 T("0x.p0", 0.0, TOK_NOSUFFIX);
843 Q(floating_invalid_digits);
846 T("32.0Q", 32.0, TOK_NOSUFFIX);
847 Q(floating_suffix_invalid);
850 T("32.0Li", 32.0, TOK_IMAG_L);
853 T("32.0LL", 32.0, TOK_NOSUFFIX);
854 Q(suffix_integer_only);
857 Teq("0xDEAD.BEEF", floating.v==0.0, TOK_NOSUFFIX);
858 Q(hex_float_no_exponent);
861 T("0b101.0p0", 0, TOK_NOSUFFIX);
865 /* If any of the following three tests fails, consider increasing
866 the e+ and e- values. */
868 Teq("1.e+4933", isinf(floating.v), TOK_NOSUFFIX);
869 Q(floating_out_of_range);
872 /* for some reason, strtold sets errno=EDOM on x86, and
873 on my PowerPC G4 on Fedora 10, the same phenomenon occurs
874 but the exponents are e+309, e-324, and e-325 */
875 Teq("1.e-4951", floating.v==0.0, TOK_NOSUFFIX);
876 Q(floating_out_of_range);
879 Teq("1.e-4952", floating.v==0.0, TOK_NOSUFFIX);
880 Q(floating_out_of_range);
890 struct tokenizer_test {
894 const struct token *tokens;
898 #define T(txt, ...) {txt, sizeof(txt)-1, array_count_pair(struct token, __VA_ARGS__)}
899 #define string(txt) {.string=(darray_char[1]){{.item = (txt), .size = sizeof(txt)-1}}}
900 #define opkw(v) {.opkw = (v)}
901 #define txt(t) .txt = (t), .txt_size = sizeof(t)-1
902 #define integer(...) {.integer={__VA_ARGS__}}
903 #define floating(...) {.floating={__VA_ARGS__}}
904 #define space {.type = TOK_WHITE, .txt = " ", .txt_size = 1}
905 #define startline {.type = TOK_STARTLINE}
906 #define include(str) {.include = (str)}
908 struct tokenizer_msg_test {
909 struct tokenizer_test test;
911 const char * const *messages;
912 size_t message_count;
915 #define M(...) array_count_pair(const char *, __VA_ARGS__)
917 struct tokenizer_test tokenizer_tests[] = {
920 {.type = TOK_WHITE, txt("\n")}
923 {.type = TOK_WHITE, txt("\n")},
925 {.type = TOK_IDENTIFIER, txt("a")}
928 {.type = TOK_KEYWORD,
932 {.type = TOK_IDENTIFIER,
935 {.type = TOK_OPERATOR,
939 {.type = TOK_IDENTIFIER,
942 {.type = TOK_OPERATOR,
946 {.type = TOK_OPERATOR,
952 {.type = TOK_FLOATING,
953 floating(.5, TOK_NOSUFFIX),
956 {.type = TOK_INTEGER,
957 integer(42, 10, TOK_NOSUFFIX),
961 //Make sure TOK_STRAY doesn't take over the universe
966 {.type = TOK_IDENTIFIER,
969 {.type = TOK_IDENTIFIER,
976 {.type = TOK_IDENTIFIER,
980 //Make sure starting with 0 doesn't result in skipping whitespace
982 {.type = TOK_INTEGER,
983 integer(0, 8, TOK_NOSUFFIX),
986 {.type = TOK_FLOATING,
987 floating(.05, TOK_NOSUFFIX),
990 {.type = TOK_INTEGER,
991 integer(0, 8, TOK_NOSUFFIX),
994 {.type = TOK_INTEGER,
995 integer(500, 10, TOK_NOSUFFIX),
999 //Make sure a simple preprocessor directive works
1000 T("\t/*comment*/ #include \"include.h\"\n",
1001 {.flags={1,0}, .type=TOK_WHITE, txt("\t")},
1002 {.flags={1,0}, .type=TOK_CCOMMENT, txt("/*comment*/")},
1003 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1004 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1005 {.flags={1,1}, .type=TOK_KEYWORD, opkw(INCLUDE), txt("include")},
1006 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1007 {.flags={1,0}, .type=TOK_STRING_IQUOTE, include("include.h"), txt("\"include.h\"")},
1008 {.flags={1,0}, .type=TOK_WHITE, txt("\n")}
1010 //Make sure __VA_ARGS__ is lexed correctly
1011 T("if #define __VA_ARGS__=0X5FULL;\n"
1012 " #define __VA_ARGS__(__VA_ARGS__, ...\t)__VA_ARGS__ bar int define",
1013 {.type=TOK_KEYWORD, opkw(IF), txt("if")},
1015 {.type=TOK_OPERATOR, opkw('#'), txt("#")},
1016 {.type=TOK_IDENTIFIER, txt("define")},
1018 {.type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1019 {.type=TOK_OPERATOR, opkw('='), txt("=")},
1020 {.type=TOK_INTEGER, integer(0x5F,16,TOK_ULL), txt("0X5FULL")},
1021 {.type=TOK_OPERATOR, opkw(';'), txt(";")},
1022 {.type=TOK_WHITE, txt("\n")},
1023 {.flags={1,0}, .type=TOK_STARTLINE},
1024 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1025 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1026 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1027 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1028 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1029 {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
1030 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1031 {.flags={1,0}, .type=TOK_OPERATOR, opkw(','), txt(",")},
1032 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1033 {.flags={1,0}, .type=TOK_OPERATOR, opkw(ELLIPSIS), txt("...")},
1034 {.flags={1,0}, .type=TOK_WHITE, txt("\t")},
1035 {.flags={1,0}, .type=TOK_OPERATOR, opkw(')'), txt(")")},
1036 {.flags={1,0}, .type=TOK_KEYWORD, opkw(VA_ARGS), txt("__VA_ARGS__")},
1037 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1038 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
1039 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1040 {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
1041 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1042 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")},
1044 //__VA_ARGS__ is an identifier if no ... operator is in the parameter list or if there is no parameter list
1045 T("#define foo __VA_ARGS__ bar int define\n#define foo() __VA_ARGS__ bar int define",
1046 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1047 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1048 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1049 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
1050 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1051 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1052 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1053 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
1054 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1055 {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
1056 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1057 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")},
1058 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1060 {.flags={1,0}, .type=TOK_STARTLINE},
1061 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1062 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1063 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1064 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
1065 {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
1066 {.flags={1,0}, .type=TOK_OPERATOR, opkw(')'), txt(")")},
1067 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1068 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1069 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1070 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
1071 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1072 {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
1073 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1074 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")}
1077 //Test various integer suffixen
1078 T("1 1u 1l 1ul 1lu 1ll 1ull 1llu 1U 1L 1UL 1LU 1LL 1ULL 1LLU "
1079 "1uq 1lq 1llq 1ulq 1luq 1f 1i",
1080 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1")}, space,
1081 {.type=TOK_INTEGER, integer(1, 10, TOK_U), txt("1u")}, space,
1082 {.type=TOK_INTEGER, integer(1, 10, TOK_L), txt("1l")}, space,
1083 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1ul")}, space,
1084 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1lu")}, space,
1085 {.type=TOK_INTEGER, integer(1, 10, TOK_LL), txt("1ll")}, space,
1086 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1ull")}, space,
1087 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1llu")}, space,
1088 {.type=TOK_INTEGER, integer(1, 10, TOK_U), txt("1U")}, space,
1089 {.type=TOK_INTEGER, integer(1, 10, TOK_L), txt("1L")}, space,
1090 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1UL")}, space,
1091 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1LU")}, space,
1092 {.type=TOK_INTEGER, integer(1, 10, TOK_LL), txt("1LL")}, space,
1093 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1ULL")}, space,
1094 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1LLU")}, space,
1095 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1uq")}, space,
1096 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1lq")}, space,
1097 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1llq")}, space,
1098 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1ulq")}, space,
1099 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1luq")}, space,
1100 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1f")}, space,
1101 {.type=TOK_INTEGER, integer(1, 10, TOK_I), txt("1i")}
1103 //Test non-standard newlines
1104 T("\n\r\n \r\n\rint",
1105 {.type=TOK_WHITE, txt("\n\r")}, startline,
1106 {.type=TOK_WHITE, txt("\n")}, startline,
1108 {.type=TOK_WHITE, txt("\r\n")}, startline,
1109 {.type=TOK_WHITE, txt("\r")}, startline,
1110 {.type=TOK_KEYWORD, opkw(INT), txt("int")}
1112 //Test backslash-broken lines
1114 {.type=TOK_IDENTIFIER, txt("onerous")}
1117 {.type=TOK_STRAY, txt("\\")}
1119 T("in\\\nt i\\;\nf\\ \r\nor (i=0; i<10; i++) {\\",
1120 {.type=TOK_KEYWORD, opkw(INT), txt("int")}, space,
1121 {.type=TOK_IDENTIFIER, txt("i")},
1122 {.type=TOK_STRAY, txt("\\")},
1123 {.type=TOK_OPERATOR, opkw(';'), txt(";")},
1124 {.type=TOK_WHITE, txt("\n")},
1127 {.type=TOK_KEYWORD, opkw(FOR), txt("for")}, space,
1128 {.type=TOK_OPERATOR, opkw('('), txt("(")},
1129 {.type=TOK_IDENTIFIER, txt("i")},
1130 {.type=TOK_OPERATOR, opkw('='), txt("=")},
1131 {.type=TOK_INTEGER, integer(0,8,0), txt("0")},
1132 {.type=TOK_OPERATOR, opkw(';'), txt(";")}, space,
1133 {.type=TOK_IDENTIFIER, txt("i")},
1134 {.type=TOK_OPERATOR, opkw('<'), txt("<")},
1135 {.type=TOK_INTEGER, integer(10,10,0), txt("10")},
1136 {.type=TOK_OPERATOR, opkw(';'), txt(";")}, space,
1137 {.type=TOK_IDENTIFIER, txt("i")},
1138 {.type=TOK_OPERATOR, opkw(INC_OP), txt("++")},
1139 {.type=TOK_OPERATOR, opkw(')'), txt(")")}, space,
1140 {.type=TOK_OPERATOR, opkw('{'), txt("{")},
1141 {.type=TOK_STRAY, txt("\\")}
1143 //More preprocessor directive tests
1144 T("#apple\n#pragma\n#const\n#define \t\n#define foo(x",
1145 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1146 {.flags={1,1}, .type=TOK_IDENTIFIER, txt("apple")},
1147 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1149 {.flags={1,0}, .type=TOK_STARTLINE},
1150 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1151 {.flags={1,1}, .type=TOK_KEYWORD, opkw(PRAGMA), txt("pragma")},
1152 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1154 {.flags={1,0}, .type=TOK_STARTLINE},
1155 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1156 {.flags={1,1}, .type=TOK_IDENTIFIER, txt("const")},
1157 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1159 {.flags={1,0}, .type=TOK_STARTLINE},
1160 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1161 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1162 {.flags={1,0}, .type=TOK_WHITE, txt(" \t")},
1163 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1165 {.flags={1,0}, .type=TOK_STARTLINE},
1166 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1167 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1168 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1169 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
1170 {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
1171 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("x")}
1174 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1175 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")}
1178 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1179 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1180 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1181 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")}
1184 {.type=TOK_STRAY, txt("`")},
1185 {.type=TOK_OPERATOR, opkw('#'), txt("#")},
1186 {.type=TOK_IDENTIFIER, txt("define")},
1188 {.type=TOK_IDENTIFIER, txt("foo")}
1192 struct tokenizer_msg_test tokenizer_msg_tests[] = {
1193 {T("/* Unterminated C comment",
1194 {.type=TOK_CCOMMENT, txt("/* Unterminated C comment")}
1196 "unterminated_comment"
1199 {.type=TOK_STRING, string("\n"), txt("\"\n\"")},
1200 {.type=TOK_STRING, string("\n"), txt("\"\n")}
1202 "read_cstring/quote_newlines",
1203 "read_cstring/missing_endquote"
1216 static void test_tokenizer_single(struct tokenizer_test *t, tok_message_queue *mq) {
1217 struct token_list *tl;
1218 size_t i, count = t->token_count, gen_count;
1219 const struct token *tok_gen, *tok_correct;
1221 char *txt = talloc_memdup(NULL, t->txt, t->txt_size);
1222 size_t txt_size = t->txt_size;
1223 #define failed(fmt, ...) do { \
1224 printf("Error: " fmt "\n", ##__VA_ARGS__); \
1229 tl = tokenize(txt, txt, txt_size, mq);
1231 if (tl->orig != txt || tl->orig_size != txt_size)
1232 failed("tokenize() did not replicate orig/orig_size from arguments");
1233 if (!token_list_sanity_check(tl, stdout))
1234 failed("Sanity check failed");
1236 gen_count = token_list_count(tl);
1237 if (gen_count != count+1)
1238 failed("Incorrect number of tokens (%zu, should be %zu)\n",
1239 gen_count, count+1);
1241 tok_gen = tl->first->next; //skip the beginning TOK_STARTLINE
1242 tok_correct = t->tokens;
1243 for (i=0; i<count; i++, tok_gen=tok_gen->next, tok_correct++) {
1244 if (tok_gen->type != tok_correct->type)
1245 failed("Token \"%s\": Incorrect type", tok_correct->txt);
1247 struct token_flags g=tok_gen->flags, c=tok_correct->flags;
1248 if (g.pp!=c.pp || g.pp_directive!=c.pp_directive)
1249 failed("Token \"%s\": Incorrect flags", tok_correct->txt);
1251 switch (tok_gen->type) {
1253 if (tok_gen->integer.v != tok_correct->integer.v ||
1254 tok_gen->integer.base != tok_correct->integer.base ||
1255 tok_gen->integer.suffix != tok_correct->integer.suffix)
1256 failed("Token \"%s\": Integer value/base/suffix incorrect", tok_correct->txt);;
1259 if (fabsl(tok_gen->floating.v - tok_correct->floating.v) > 0.00000000000000001 ||
1260 tok_gen->floating.suffix != tok_correct->floating.suffix)
1261 failed("Token \"%s\": Floating point value/suffix incorrect", tok_correct->txt);
1264 if (tok_gen->opkw != tok_correct->opkw)
1265 failed("Token \"%s\": Operator opkw incorrect", tok_correct->txt);
1268 if (tok_gen->opkw != tok_correct->opkw)
1269 failed("Token \"%s\": Keyword opkw incorrect", tok_correct->txt);
1273 //anything using string
1274 if (tok_gen->string->size != tok_correct->string->size ||
1275 memcmp(tok_gen->string->item, tok_correct->string->item,
1276 tok_gen->string->size) ||
1277 tok_gen->string->item[tok_gen->string->size] != 0 )
1278 failed("Token \"%s\": String value incorrect", tok_correct->txt);
1280 case TOK_STRING_IQUOTE:
1281 case TOK_STRING_IANGLE:
1282 if (strcmp(tok_gen->include, tok_correct->include))
1283 failed("Token \"%s\": #include string incorrect", tok_correct->txt);
1285 case TOK_IDENTIFIER:
1287 case TOK_CPPCOMMENT:
1293 if (tok_gen->type!=TOK_STARTLINE && (
1294 tok_gen->txt_size != tok_correct->txt_size ||
1295 memcmp(tok_gen->txt, tok_correct->txt, tok_gen->txt_size))
1297 failed("Token \"%s\": txt incorrect", tok_correct->txt);
1302 ok(success==1, "Tokenize %s", t->txt);
1305 token_list_dump(tl, stdout);
1310 static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) {
1311 FILE *f = fopen(file_name, "rb");
1312 darray_char *text = talloc_darray(NULL);
1313 const size_t inc = 1024;
1314 struct token_list *tl;
1317 fail("Could not read file '%s': %s", file_name, strerror(errno));
1324 darray_realloc(*text, text->size+inc+1);
1325 read_len = fread(text->item+text->size, 1, inc, f);
1326 text->size += read_len;
1327 text->item[text->size] = 0;
1334 fail("Error reading file '%s': %s", file_name, strerror(errno));
1338 tl = tokenize(text, text->item, text->size, mq);
1339 tl->filename = file_name;
1341 //printf("File '%s' has %zu tokens\n", file_name, token_list_count(tl));
1342 //token_list_dump(tl, stdout);
1344 if (!token_list_sanity_check(tl, stdout)) {
1345 fail("Sanity check failed for file '%s'", file_name);
1349 pass("File '%s' has %zu tokens", file_name, token_list_count(tl));
1351 /*while (queue_count(*mq)) {
1352 struct tok_message msg = dequeue(*mq);
1353 tok_message_print(&msg, tl);
1362 static void test_tokenizer(void) {
1363 tok_message_queue mq;
1365 int has_warn_or_worse = 0;
1367 queue_init(mq, NULL);
1369 count = sizeof(tokenizer_tests)/sizeof(*tokenizer_tests);
1370 for (i=0; i<count; i++) {
1371 test_tokenizer_single(tokenizer_tests+i, &mq);
1372 while (queue_count(mq)) {
1373 struct tok_message msg = dequeue(mq);
1375 //tok_message_dump(&msg);
1379 count = sizeof(tokenizer_msg_tests)/sizeof(*tokenizer_msg_tests);
1380 for (i=0; i<count; i++) {
1382 test_tokenizer_single(&tokenizer_msg_tests[i].test, &mq);
1384 if (queue_count(mq) != tokenizer_msg_tests[i].message_count) {
1385 fail("Incorrect number of messages from tokenize()");
1386 while (queue_count(mq))
1391 for (j=0; queue_count(mq); j++) {
1392 struct tok_message msg = dequeue(mq);
1393 const char *base = "tokenize/";
1394 size_t baselen = strlen(base);
1395 //tok_message_dump(&msg);
1397 if (strncmp(msg.path, base, baselen)) {
1398 fail("Message from tokenize() doesn't start with \"%s\"",
1402 if (strcmp(msg.path+baselen,
1403 tokenizer_msg_tests[i].messages[j])) {
1404 fail("Incorrect message %s, should be %s",
1405 msg.path+baselen, tokenizer_msg_tests[i].messages[j]);
1410 pass("Messages from tokenize() are correct");
1414 test_tokenizer_file("test/run.c", &mq);
1416 while (queue_count(mq)) {
1417 struct tok_message msg = dequeue(mq);
1418 if (msg.level >= TM_WARN) {
1419 has_warn_or_worse = 1;
1420 tok_message_dump(&msg);
1422 //else tok_message_dump(&msg);
1425 ok(has_warn_or_worse==0, "Tokenizing run.c generated%s warnings, errors, or bugs",
1426 has_warn_or_worse ? "" : " no");
1437 diag("* Checking queue...");
1440 diag("* Checking read_cstring...");
1441 test_read_cstring();
1443 diag("* Checking dict...");
1446 diag("* Checking charflag...");
1449 diag("* Checking readui...");
1452 diag("* Checking scan_number...");
1455 diag("* Checking read_integer...");
1456 test_read_integer();
1458 diag("* Checking read_floating...");
1459 test_read_floating();
1461 diag("* Checking tokenizer...");
1464 /* This exits depending on whether all tests passed */
1465 return exit_status();