git.ozlabs.org Git - ccan/blob - ccan/ccan_tokenizer/test/run.c

   1 /*
   2         Copyright (c) 2009  Joseph A. Adams
   3         All rights reserved.
   4
   5         Redistribution and use in source and binary forms, with or without
   6         modification, are permitted provided that the following conditions
   7         are met:
   8         1. Redistributions of source code must retain the above copyright
   9            notice, this list of conditions and the following disclaimer.
  10         2. Redistributions in binary form must reproduce the above copyright
  11            notice, this list of conditions and the following disclaimer in the
  12            documentation and/or other materials provided with the distribution.
  13         3. The name of the author may not be used to endorse or promote products
  14            derived from this software without specific prior written permission.
  15
  16         THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  17         IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18         OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19         IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  20         INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  21         NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22         DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23         THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24         (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  25         THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26 */
  27
  28 #include <ccan/ccan_tokenizer/read_cnumber.c>
  29 #include <ccan/ccan_tokenizer/read_cstring.c>
  30 #include <ccan/ccan_tokenizer/dict.c>
  31 #include <ccan/ccan_tokenizer/ccan_tokenizer.c>
  32 #include <ccan/ccan_tokenizer/queue.c>
  33 #include <ccan/ccan_tokenizer/charflag.c>
  34
  35 #include <ccan/ccan_tokenizer/ccan_tokenizer.h>
  36
  37 #include <ccan/tap/tap.h>
  38
  39 #include <math.h>
  40
  41 #define array_count_pair(type, ...) (const type []){__VA_ARGS__}, sizeof((const type []){__VA_ARGS__})/sizeof(type)
  42
  43 static void test_read_cstring(void) {
  44         #define next() do {darray_free(str); darray_init(str); csp++;} while(0)
  45         #define cs (*csp)
  46         #define verify_quotechar(correct, correct_continuation_offset, quotechar) do { \
  47                 const size_t s = sizeof(correct)-1; \
  48                 p = read_cstring(&str, cs, cs ? strchr(cs, 0) : NULL, quotechar, &mq); \
  49                 ok(str.size==s && str.alloc>s && str.item[s]==0 && \
  50                 !memcmp(str.item, correct, s), \
  51                 "\"%s: Is output correct?", cs); \
  52                 ok(p == cs+correct_continuation_offset, "\"%s: Is continuation pointer correct?", cs); \
  53                 next(); \
  54         } while(0)
  55         #define verify(correct, correct_continuation_offset) verify_quotechar(correct, correct_continuation_offset, '"')
  56
  57         const char * const cstrings[] = {
  58                 NULL,
  59                 "",
  60                 "\"",
  61                 "Hello world!\"",
  62                 "Hello world!",
  63                 "\\\\\\f\\e\\b\\0\\a\\r\\n\\w\\t\\v\\\'\\\"\"",
  64                 "\\\\\\f\\e\\b\\0\\a\\r\\n\\w\\t\\v\\\'\\\"\'",
  65                 "الأدب العربي\"",
  66                 "Ends with \\",
  67                 "Tab: '\\011' Space: '\\040' Overflow: '\\777' Ambiguous: '\\1013'\"",
  68                 "\\x50\\x35\\x12\\xEF\\xFE\\x00012\\x345\""
  69         };
  70         const char * const *csp = cstrings;
  71         const char *p;
  72         darray_char str = darray_new();
  73         tok_message_queue mq;
  74
  75         queue_init(mq, NULL);
  76
  77         //check null input
  78         verify("", 0);
  79
  80         //Check an empty input
  81         verify("", 0);
  82
  83         //Check an empty quote-terminated string
  84         verify("", 0);
  85
  86         //Check a simple string
  87         verify("Hello world!", 12);
  88
  89         //Check a simple string without an end quote
  90         verify("Hello world!", 12);
  91
  92         //Check a collection of single-character sequences
  93         verify("\\\f\e\b\0\a\r\nw\t\v\'\"", 26);
  94
  95         //Check same collection of single-character sequences, this time using a single quote terminator
  96         verify_quotechar("\\\f\e\b\0\a\r\nw\t\v\'\"", 26, '\'');
  97
  98         //Check a real UTF-8 string
  99         verify("\xd8\xa7\xd9\x84\xd8\xa3\xd8\xaf\xd8\xa8\x20\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a", 23);
 100
 101         //Check string ending in backslash
 102         verify("Ends with \\", 11);
 103
 104         //Check a series of octal escapes
 105         verify("Tab: '\t' Space: ' ' Overflow: '\377' Ambiguous: 'A3'", 61);
 106
 107         //Check a series of hex escapes
 108         verify("\x50\x35\x12\xEF\xFE\x12\x45", 32);
 109
 110         darray_free(str);
 111
 112         //tok_message_queue_dump(&mq);
 113
 114         //Verify the message queue
 115         if (1)
 116         {
 117                 struct tok_message m;
 118                 struct tok_message correct_messages[] = {
 119                         {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
 120                         {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
 121                         {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
 122                         {.level=TM_WARN, .path="tokenize/read_cstring/unknown_escape"},
 123                         //{.level=TM_INFO, .path="tokenize/read_cstring/escaped_single_quote"},
 124                         {.level=TM_WARN, .path="tokenize/read_cstring/unknown_escape"},
 125                         //{.level=TM_INFO, .path="tokenize/read_cstring/escaped_double_quote"},
 126                         {.level=TM_ERROR, .path="tokenize/read_cstring/ended_in_backslash"},
 127                         {.level=TM_ERROR, .path="tokenize/read_cstring/missing_endquote"},
 128                         {.level=TM_WARN, .path="tokenize/read_cstring/octal_overflow"},
 129                         {.level=TM_INFO, .path="tokenize/read_cstring/ambiguous_octal"},
 130                         {.level=TM_WARN, .path="tokenize/read_cstring/ambiguous_hex"},
 131                         {.level=TM_WARN, .path="tokenize/read_cstring/ambiguous_hex"},
 132                         {.level=TM_WARN, .path="tokenize/read_cstring/hex_overflow"},
 133                 };
 134                 size_t i, e=sizeof(correct_messages)/sizeof(*correct_messages);
 135
 136                 while(queue_count(mq) && queue_next(mq).level==TM_DEBUG)
 137                         queue_skip(mq);
 138                 for (i=0; i<e; i++) {
 139                         if (!queue_count(mq))
 140                                 break;
 141                         m = dequeue(mq);
 142                         if (m.level != correct_messages[i].level)
 143                                 break;
 144                         if (strcmp(m.path, correct_messages[i].path))
 145                                 break;
 146                         while(queue_count(mq) && queue_next(mq).level==TM_DEBUG)
 147                                 queue_skip(mq);
 148                 }
 149                 if (i<e)
 150                         printf("Item %zu is incorrect\n", i);
 151                 ok(i==e, "Is message queue correct?");
 152                 ok(!queue_count(mq), "Message queue should be empty now.");
 153         }
 154
 155         queue_free(mq);
 156         #undef next
 157         #undef cs
 158         #undef verify_quotechar
 159         #undef verify
 160 }
 161
 162 #if 0
 163 static void p(const char *str) {
 164         if (str)
 165                 puts(str);
 166         else
 167                 puts("(null)");
 168 }
 169 #endif
 170
 171 static void test_queue(void) {
 172         #define next() do {queue_free(q); queue_init(q, NULL);} while(0)
 173
 174         const char * const s[] = {
 175                 "zero",
 176                 "one",
 177                 "two",
 178                 "three",
 179                 "four",
 180                 "five",
 181                 "six",
 182                 "seven",
 183                 "eight",
 184                 "nine",
 185                 "ten",
 186                 "eleven",
 187                 "twelve",
 188                 "thirteen",
 189                 "fourteen",
 190                 "fifteen"
 191         };
 192         queue(const char*) q;
 193         queue_init(q, NULL);
 194
 195         enqueue(q, s[0]);
 196         enqueue(q, s[1]);
 197         enqueue(q, s[2]);
 198         enqueue(q, s[3]);
 199         enqueue(q, s[4]);
 200         enqueue(q, s[5]);
 201         ok(queue_count(q) == 6, "Checking queue count");
 202
 203         ok(dequeue_check(q)==s[0] &&
 204                 dequeue_check(q)==s[1] &&
 205                 dequeue_check(q)==s[2], "Dequeuing/checking 3 items");
 206         ok(queue_count(q) == 3, "Checking queue count");
 207
 208         enqueue(q, s[6]);
 209         enqueue(q, s[7]);
 210         enqueue(q, s[8]);
 211         enqueue(q, s[9]);
 212         enqueue(q, s[10]);
 213         enqueue(q, s[11]);
 214         enqueue(q, s[12]);
 215         enqueue(q, s[13]);
 216         enqueue(q, s[14]);
 217         enqueue(q, s[15]);
 218         ok(queue_count(q) == 13, "Checking queue count");
 219
 220         ok(dequeue_check(q)==s[3] &&
 221                 dequeue_check(q)==s[4] &&
 222                 dequeue_check(q)==s[5] &&
 223                 dequeue_check(q)==s[6] &&
 224                 dequeue_check(q)==s[7] &&
 225                 dequeue_check(q)==s[8] &&
 226                 dequeue_check(q)==s[9] &&
 227                 dequeue_check(q)==s[10] &&
 228                 dequeue_check(q)==s[11] &&
 229                 dequeue_check(q)==s[12] &&
 230                 dequeue_check(q)==s[13] &&
 231                 dequeue_check(q)==s[14] &&
 232                 dequeue_check(q)==s[15], "Are queue items correct?");
 233         ok(dequeue_check(q)==NULL && dequeue_check(q)==NULL && queue_count(q)==0, "Does queue run out correctly?");
 234
 235         queue_free(q);
 236
 237         #undef next
 238 }
 239
 240 #define test_dict_single() _test_dict_single(dict, str, sizeof(str)-1, correct, sizeof(correct)/sizeof(*correct))
 241 static void _test_dict_single(struct dict *dict, const char *str, size_t len, int *correct, size_t correct_count) {
 242         const char *s=str, *e=str+len;
 243         size_t i;
 244         struct dict_entry *entry;
 245
 246         for (i=0; s<e && i<correct_count; i++) {
 247                 const char *s_last = s;
 248                 entry = dict_lookup(dict, &s, e);
 249                 if (!entry) {
 250                         if (s_last != s)
 251                                 break; //dict_lookup should not modify *sp when it returns NULL
 252                         s++;
 253                         if (correct[i] != -100)
 254                                 break;
 255                         continue;
 256                 }
 257                 if (correct[i] != entry->id)
 258                         break;
 259                 if (!*entry->str) {
 260                         if (s_last+1 != s)
 261                                 break;
 262                         if (s[-1] != 0)
 263                                 break;
 264                 } else {
 265                         size_t len = strlen(entry->str);
 266                         if (s_last+len != s)
 267                                 break;
 268                         if (strncmp(entry->str, s-len, len))
 269                                 break;
 270                 }
 271                 //printf("Correctly read %s\n", entry->str);
 272         }
 273
 274         if (s!=e || i!=correct_count) {
 275                 printf("Tokenization failed at ");
 276                 fwrite(s, 1, e-s, stdout);
 277                 printf("\n");
 278         }
 279
 280         ok(s==e && i==correct_count, "All of the tokens are correct");
 281 }
 282
 283 static void test_dict(void) {
 284         struct dict_entry dict_orig[] = {
 285                 {-1, ""},
 286                 {0, " "},
 287                 {1, "it"},
 288                 {2, "it's"},
 289                 {3, "a"},
 290                 {4, "beautiful"},
 291                 {5, "be"},
 292                 {6, "day"},
 293                 {7, "d"},
 294                 {8, "in"},
 295                 {9, "the"},
 296                 {10, "t"},
 297                 {11, "neighborhood"},
 298                 {12, "neighbor"},
 299                 {13, "won't"},
 300                 {14, " you"},
 301                 {15, "my"},
 302                 {16, "??"},
 303                 {17, "item"},
 304                 {18, "ip"},
 305                 {19, "\xFF\xFA"},
 306                 {20, "\xFF\xEE"},
 307                 {21, "\x80\x12\x34"},
 308                 {22, "\x80\x32"},
 309                 {23, "\x80\x32\x34"}
 310         };
 311         struct dict *dict = dict_build(NULL, dict_orig, sizeof(dict_orig)/sizeof(*dict_orig));
 312
 313         {
 314                 const char *s=NULL, *e=NULL;
 315                 ok(dict_lookup(dict, &s, e)==NULL && s==NULL && e==NULL, "dict_lookup does nothing and returns null on empty input");
 316         }
 317
 318         {
 319                 const char str[] = "it's a beautiful day in the neighborhood\0won't you be my neighbor?";
 320                 int correct[] = {2,0, 3,0, 4,0, 6,0, 8,0, 9,0, 11,-1, 13, 14,0, 5,0, 15,0, 12, -100};
 321                 test_dict_single();
 322         }
 323
 324         //check equal-length tokens
 325         {
 326                 const char str[] = "it'sitem initip";
 327                 int correct[] = {2,17,0, 8,1,18};
 328                 test_dict_single();
 329         }
 330
 331         //check mostly invalid tokens
 332         {
 333                 const char str[] = "&^&beaumx yo youthx";
 334                 int correct[] = {-100,-100,-100, 5,3,-100,-100,-100, 0,-100,-100, 14,10,-100,-100};
 335                 test_dict_single();
 336         }
 337
 338         //check tokens that start with a character greater than 0x7F
 339         {
 340                 const char str[] = "\x80\x12\x34\x80\x32\x80\x32\x34\xFF\xFA\xFF\xEE";
 341                 int correct[] = {21, 22, 23, 19, 20};
 342                 test_dict_single();
 343         }
 344
 345         talloc_free(dict);
 346
 347         //make sure dict_build doesn't blow up on an empty dictionary
 348         dict = dict_build(NULL, NULL, 0);
 349         talloc_free(dict);
 350 }
 351
 352 static void test_charflag(void) {
 353         char i;
 354         int correct = 0;
 355
 356         #define CONTROL do { \
 357                 if (ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
 358                         !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
 359                         !cextended(i) ) \
 360                         correct++; \
 361                 } while(0)
 362         #define SPACE do { \
 363                 if (!ccontrol(i) && cspace(i) && !creturn(i) && cwhite(i) && \
 364                         !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
 365                         !cextended(i) ) \
 366                         correct++; \
 367                 } while(0)
 368         #define RETURN do { \
 369                 if (!ccontrol(i) && !cspace(i) && creturn(i) && cwhite(i) && \
 370                         !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
 371                         !cextended(i) ) \
 372                         correct++; \
 373                 } while(0)
 374         #define SYMBOL do { \
 375                 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
 376                         !cdigit(i) && !cletter(i) && !chex(i) && csymbol(i) && \
 377                         !cextended(i) ) \
 378                         correct++; \
 379                 } while(0)
 380         #define DIGIT do { \
 381                 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
 382                         cdigit(i) && !cletter(i) && chex(i) && !csymbol(i) && \
 383                         !cextended(i) ) \
 384                         correct++; \
 385                 } while(0)
 386         #define LETTER_HEX do { \
 387                 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
 388                         !cdigit(i) && cletter(i) && chex(i) && !csymbol(i) && \
 389                         !cextended(i) ) \
 390                         correct++; \
 391                 } while(0)
 392         #define LETTER do { \
 393                 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
 394                         !cdigit(i) && cletter(i) && !chex(i) && !csymbol(i) && \
 395                         !cextended(i) ) \
 396                         correct++; \
 397                 } while(0)
 398         #define EXTENDED do { \
 399                 if (!ccontrol(i) && !cspace(i) && !creturn(i) && !cwhite(i) && \
 400                         !cdigit(i) && !cletter(i) && !chex(i) && !csymbol(i) && \
 401                         cextended(i) ) \
 402                         correct++; \
 403                 } while(0)
 404
 405         for (i=0; i<'\t'; i++) CONTROL;
 406         i = '\t'; SPACE;
 407         i = '\n'; RETURN;
 408         i = '\v'; SPACE;
 409         i = '\f'; SPACE;
 410         i = '\r'; RETURN;
 411         for (i='\r'+1; i<' '; i++) CONTROL;
 412         i = ' '; SPACE;
 413         for (i='!'; i<='/'; i++) SYMBOL;
 414         for (i='0'; i<='9'; i++) DIGIT;
 415         for (i=':'; i<='@'; i++) SYMBOL;
 416         for (i='A'; i<='F'; i++) LETTER_HEX;
 417         for (i='G'; i<='Z'; i++) LETTER;
 418         for (i='['; i<='`'; i++) SYMBOL;
 419         for (i='a'; i<='f'; i++) LETTER_HEX;
 420         for (i='g'; i<='z'; i++) LETTER;
 421         for (i='{'; i<='~'; i++) SYMBOL;
 422         i = '\x7F'; CONTROL;
 423
 424         ok(correct==128, "ASCII characters have correct charflags");
 425         correct = 0;
 426
 427         //We do some goofy stuff here to make sure sign extension doesn't cause problems with charflags
 428         {
 429                 unsigned int ui;
 430                 int si;
 431
 432                 for (ui=128; ui<=255; ui++) {
 433                         i = ui;
 434                         EXTENDED;
 435                 }
 436                 for (si=-128; si<0; si++) {
 437                         i = si;
 438                         EXTENDED;
 439                 }
 440         }
 441         {
 442                 int i;
 443                 for (i=-128; i<0; i++) EXTENDED;
 444         }
 445         {
 446                 unsigned int i;
 447                 for (i=128; i<=255; i++) EXTENDED;
 448         }
 449
 450         ok(correct==512, "Extended characters have correct charflags");
 451
 452         #undef CONTROL
 453         #undef SPACE
 454         #undef RETURN
 455         #undef SYMBOL
 456         #undef DIGIT
 457         #undef LETTER_HEX
 458         #undef LETTER
 459         #undef EXTENDED
 460 }
 461
 462 struct readui_test {
 463         const char *txt;
 464         size_t txt_size;
 465         readui_base base;
 466
 467         uint64_t correct_integer;
 468         int correct_errno;
 469         size_t correct_advance;
 470 };
 471
 472 #define T(txt, ...) {txt, sizeof(txt)-1, __VA_ARGS__}
 473 #define M (18446744073709551615ULL)
 474
 475 struct readui_test readui_tests[] = {
 476         //Basic reads
 477         T("0",READUI_DEC, 0,0,1),
 478         T(" \t42  ",READUI_DEC, 42,0,4),
 479
 480         //Different bases
 481         T("BADBEEFDEADBAT",READUI_HEX, 0xBADBEEFDEADBAULL,0,13),
 482         T("7559",READUI_OCT, 0755,0,3),
 483         T("01010010110012",READUI_BIN, 2649,0,13),
 484         T("1000000000",0x7F, 8594754748609397887ULL,0,10),
 485
 486         //Errors
 487         T("",READUI_DEC, 0,EINVAL,0),
 488         T("18446744073709551616",
 489                 READUI_DEC,M,ERANGE,20),
 490         T("1000000000000000000000000",
 491                 READUI_DEC,M,ERANGE,25),
 492         T("10000000000000000",
 493                 READUI_HEX,M,ERANGE,17),
 494         T("10000000000000000000000000000000000000000000000000000000000000000",
 495                 READUI_BIN,M,ERANGE,65),
 496         T("10000000000",
 497                 0x7D,M,ERANGE,11),
 498         T("9000000000",0x7F, M,ERANGE,10),
 499
 500         //Misc
 501         T("18446744073709551615",READUI_DEC, M,0,20),
 502 };
 503
 504 static void test_readui_single(struct readui_test *test) {
 505         uint64_t result_integer;
 506         int result_errno;
 507         size_t result_advance;
 508
 509         const char *s = test->txt, *e = s+test->txt_size;
 510         errno = 0;
 511         result_integer = readui(&s, e, test->base);
 512         result_errno = errno;
 513         result_advance = s-test->txt;
 514
 515         ok(result_integer == test->correct_integer &&
 516            result_errno   == test->correct_errno &&
 517            result_advance == test->correct_advance,
 518            "Testing \"%s\"", test->txt);
 519 }
 520
 521 static void test_readui(void) {
 522         size_t i, count = sizeof(readui_tests)/sizeof(*readui_tests);
 523
 524         for (i=0; i<count; i++)
 525                 test_readui_single(readui_tests+i);
 526 }
 527
 528 #undef T
 529 #undef M
 530
 531 static void scan_number_sanity_check(const struct scan_number *sn,
 532                 enum token_type type, const char *str_pipes, const char *msg) {
 533         //If there is a prefix, it should follow
 534         //the pattern (0 [B X b x]*0..1)
 535         if (sn->prefix < sn->digits) {
 536                 int len = sn->digits - sn->prefix;
 537                 if (len!=1 && len!=2) {
 538                         fail("%s : Prefix length is %d; should be 1 or 2",
 539                                 str_pipes, len);
 540                         return;
 541                 }
 542                 if (sn->prefix[0] != '0') {
 543                         fail("%s : Prefix does not start with 0",
 544                                 str_pipes);
 545                         return;
 546                 }
 547                 if (len==2 && !strchr("BXbx", sn->prefix[1])) {
 548                         fail("%s : Prefix is 0%c; should be 0, 0b, or 0x",
 549                                 str_pipes, sn->prefix[1]);
 550                         return;
 551                 }
 552                 if (len==1 && type==TOK_FLOATING) {
 553                         fail("%s : Octal prefix appears on floating point number",
 554                                 str_pipes);
 555                         return;
 556                 }
 557         } else {
 558         //if there is no prefix, the first digit should not be 0
 559         //  unless this is a floating point number
 560                 if (sn->digits < sn->exponent && sn->digits[0]=='0' &&
 561                                 type==TOK_INTEGER) {
 562                         fail("%s : First digit of non-prefix integer is 0",
 563                                 str_pipes);
 564                         return;
 565                 }
 566         }
 567
 568         //Make sure sn->digits contains valid digits and is not empty
 569         //  (unless prefix is "0")
 570         {
 571                 const char *s = sn->digits, *e = sn->exponent;
 572                 if (sn->prefix+1 < sn->digits) {
 573                         if (s >= e) {
 574                                 fail("%s : 0%c not followed by any digits",
 575                                         str_pipes, sn->prefix[1]);
 576                                 return;
 577                         }
 578                         if (sn->prefix[1] == 'X' || sn->prefix[1] == 'x') {
 579                                 while (s<e && strchr(
 580                                         "0123456789ABCDEFabcdef.", *s)) s++;
 581                         } else {
 582                                 if (s[0]!='0' && s[0]!='1') {
 583                                         fail("%s: Binary prefix not followed by a 0 or 1",
 584                                                 str_pipes);
 585                                         return;
 586                                 }
 587                                 while (s<e && strchr(
 588                                         "0123456789.", *s)) s++;
 589                         }
 590                 } else {
 591                         if (type==TOK_FLOATING && s >= e) {
 592                                 fail("%s : sn->digits is empty in a floating point number",
 593                                         str_pipes);
 594                                 return;
 595                         }
 596                         if (sn->prefix >= sn->digits && s >= e) {
 597                                 fail("%s : both sn->prefix and sn->digits are empty",
 598                                         str_pipes);
 599                                 return;
 600                         }
 601                         while (s<e && strchr("0123456789.", *s)) s++;
 602                 }
 603                 if (s != e) {
 604                         fail("%s : sn->digits is not entirely valid", str_pipes);
 605                         return;
 606                 }
 607         }
 608
 609         //Make sure exponent follows the rules
 610         if (sn->exponent < sn->suffix) {
 611                 char c = sn->exponent[0];
 612                 if (type==TOK_INTEGER) {
 613                         fail("%s : sn->exponent is not empty in an integer", str_pipes);
 614                         return;
 615                 }
 616                 if (sn->prefix < sn->digits && (c=='E' || c=='e')) {
 617                         fail("%s : Exponent for hex/binary starts with %c", str_pipes, c);
 618                         return;
 619                 }
 620                 if (sn->prefix >= sn->digits && (c=='P' || c=='p')) {
 621                         fail("%s : Exponent for decimal starts with %c", str_pipes, c);
 622                         return;
 623                 }
 624         }
 625
 626         pass("%s%s", str_pipes, msg);
 627         return;
 628 }
 629
 630 static void test_scan_number_single(const char *str_pipes,
 631                                 enum token_type type, size_t dots_found) {
 632         char *str = malloc(strlen(str_pipes)+1);
 633         const char *expected[5];
 634         struct scan_number sn;
 635         enum token_type given_type;
 636
 637         {
 638                 const char *s = str_pipes;
 639                 char *d = str;
 640                 size_t pipes = 0;
 641
 642                 expected[0] = d;
 643                 for (;*s;s++) {
 644                         if (*s == ' ')
 645                                 continue;
 646                         if (*s == '|') {
 647                                 if (++pipes > 4)
 648                                         goto fail_too_many_pipes;
 649                                 expected[pipes] = d;
 650                         } else
 651                                 *d++ = *s;
 652                 }
 653                 *d = 0;
 654
 655                 if (pipes < 3)
 656                         goto fail_not_enough_pipes;
 657                 if (pipes == 3)
 658                         expected[4] = d;
 659         }
 660
 661         given_type = scan_number(&sn, str, strchr(str,0));
 662
 663         if (sn.prefix != expected[0]) {
 664                 fail("%s : sn.prefix is wrong", str_pipes);
 665                 return;
 666         }
 667         if (sn.digits != expected[1]) {
 668                 fail("%s : sn.digits is wrong", str_pipes);
 669                 return;
 670         }
 671         if (sn.exponent != expected[2]) {
 672                 fail("%s : sn.exponent is wrong", str_pipes);
 673                 return;
 674         }
 675         if (sn.suffix != expected[3]) {
 676                 fail("%s : sn.suffix is wrong", str_pipes);
 677                 return;
 678         }
 679         if (sn.end != expected[4]) {
 680                 fail("%s : sn.end is wrong", str_pipes);
 681                 return;
 682         }
 683         if (given_type != type) {
 684                 fail("%s : Type incorrect", str_pipes);
 685                 return;
 686         }
 687         if (sn.dots_found != dots_found) {
 688                 fail("%s : sn.dots_found is %zu; should be %zu", str_pipes,
 689                         sn.dots_found, dots_found);
 690                 return;
 691         }
 692
 693         scan_number_sanity_check(&sn, type, str_pipes, "");
 694
 695         free(str);
 696         return;
 697
 698 fail_too_many_pipes:
 699         fail("Too many pipes in the test string \"%s\"; should be 3", str_pipes);
 700         return;
 701 fail_not_enough_pipes:
 702         fail("Not enough pipes in the test string \"%s\"; should be 3", str_pipes);
 703         return;
 704 }
 705
 706 #define T(str, type, dots_found) test_scan_number_single(str,type,dots_found)
 707
 708 static void test_scan_number(void) {
 709         T("0x | 50.1 | p+1 | f", TOK_FLOATING, 1);
 710         T("| 100 || L", TOK_INTEGER, 0);
 711         T("0 ||| b21", TOK_INTEGER, 0);
 712         T("0b | 101 || L", TOK_INTEGER, 0);
 713         T("0X | 7Af ||| \t2", TOK_INTEGER, 0);
 714         T("0|||b", TOK_INTEGER, 0);
 715         T("0|||x", TOK_INTEGER, 0);
 716 }
 717
 718 #undef T
 719
 720 #define T(string, value, theBase, theSuffix) do { \
 721         queue_init(mq, NULL); \
 722         str = (string); \
 723         type = scan_number(&sn, str, str+sizeof(string)-1); \
 724         ok(type==TOK_INTEGER, "%s : type==TOK_INTEGER", str); \
 725         scan_number_sanity_check(&sn, type, str, \
 726                 " : scan_number_sanity_check passed"); \
 727         read_integer(&integer, &sn, &mq); \
 728         ok(integer.v==(value) && integer.base==(theBase) && \
 729                 integer.suffix==(theSuffix), \
 730                 "%s : Correct value and suffix", str); \
 731         } while(0)
 732 #define Q(name) do { \
 733         if (queue_count(mq)) { \
 734                 const char *path = dequeue(mq).path; \
 735                 ok(!strcmp(path, "tokenize/read_cnumber/" #name), \
 736                         "%s : Dequeued %s", str, path); \
 737         } \
 738         } while(0)
 739 #define E() do { \
 740         ok(queue_count(mq)==0, "%s : Message queue empty", str); \
 741         if (queue_count(mq)) \
 742                 tok_message_queue_dump(&mq); \
 743         queue_free(mq); \
 744         } while(0)
 745
 746 static void test_read_integer(void) {
 747         struct scan_number sn;
 748         tok_message_queue mq;
 749         const char *str;
 750         enum token_type type;
 751         struct tok_integer integer;
 752
 753         T("0b0lu", 0, 8, TOK_UL);
 754         E();
 755
 756         T("1", 1, 10, TOK_NOSUFFIX);
 757         E();
 758
 759         T("32Q", 32, 10, TOK_NOSUFFIX);
 760         Q(integer_suffix_invalid);
 761         E();
 762
 763         T("32i", 32, 10, TOK_I);
 764         E();
 765
 766         T("0755f", 493, 8, TOK_NOSUFFIX);
 767         Q(suffix_float_only);
 768         E();
 769
 770         T("0xDeadBeef", 0xDEADBEEF, 16, TOK_NOSUFFIX);
 771         E();
 772
 773         T("12345678901234567890$1_LONG.SUFFIX", 12345678901234567890ULL, 10, TOK_NOSUFFIX);
 774         ok1(sn.end == strchr(str, 0));
 775         Q(integer_suffix_invalid);
 776         E();
 777
 778         T("0xDEADBEEFlull", 0xDEADBEEF, 16, TOK_NOSUFFIX);
 779         Q(integer_suffix_invalid);
 780         E();
 781
 782         T("0xBALLuu", 0xBA, 16, TOK_NOSUFFIX);
 783         Q(integer_suffix_invalid);
 784         E();
 785
 786         T("123456789012345678901", 18446744073709551615ULL, 10, TOK_NOSUFFIX);
 787         Q(integer_out_of_range);
 788         E();
 789
 790         T("09", 0, 8, TOK_NOSUFFIX);
 791         Q(integer_invalid_digits);
 792         E();
 793 }
 794
 795 #undef T
 796 #undef E
 797
 798 #define Teq(string, equals, theSuffix) do { \
 799         queue_init(mq, NULL); \
 800         str = malloc(sizeof(string)); \
 801         memcpy(str, string, sizeof(string)); \
 802         type = scan_number(&sn, str, str+sizeof(string)-1); \
 803         ok(type==TOK_FLOATING, "%s : type==TOK_FLOATING", str); \
 804         scan_number_sanity_check(&sn, type, str, \
 805                 " : scan_number_sanity_check passed"); \
 806         read_floating(&floating, &sn, &mq); \
 807         ok((equals) && \
 808                 floating.suffix==(theSuffix), \
 809                 "%s : Correct value and suffix", str); \
 810         } while(0)
 811 #define T(string, value, theSuffix) \
 812         Teq(string, fabsl(floating.v - (value)) <= 0.00000000000000001, theSuffix)
 813 #define E() do { \
 814         ok(queue_count(mq)==0, "%s : Message queue empty", str); \
 815         if (queue_count(mq)) \
 816                 tok_message_queue_dump(&mq); \
 817         queue_free(mq); \
 818         free(str); \
 819         } while(0)
 820
 821 static void test_read_floating(void) {
 822         struct scan_number sn;
 823         tok_message_queue mq;
 824         char *str; //str is a malloced copy so read_floating can do its null terminator trick
 825         enum token_type type;
 826         struct tok_floating floating;
 827
 828         T("1.0", 1.0, TOK_NOSUFFIX);
 829         E();
 830
 831         T("0.0", 0.0, TOK_NOSUFFIX);
 832         E();
 833
 834         T("0755e1", 7550.0, TOK_NOSUFFIX);
 835         E();
 836
 837         T("0xD.Bp0", 0xD.Bp0, TOK_NOSUFFIX);
 838         E();
 839
 840         //GCC doesn't throw any errors or warnings for this odd case,
 841         //but we call it an error to be consistent with strtold
 842         T("0x.p0", 0.0, TOK_NOSUFFIX);
 843         Q(floating_invalid_digits);
 844         E();
 845
 846         T("32.0Q", 32.0, TOK_NOSUFFIX);
 847         Q(floating_suffix_invalid);
 848         E();
 849
 850         T("32.0Li", 32.0, TOK_IMAG_L);
 851         E();
 852
 853         T("32.0LL", 32.0, TOK_NOSUFFIX);
 854         Q(suffix_integer_only);
 855         E();
 856
 857         Teq("0xDEAD.BEEF", floating.v==0.0, TOK_NOSUFFIX);
 858         Q(hex_float_no_exponent);
 859         E();
 860
 861         T("0b101.0p0", 0, TOK_NOSUFFIX);
 862         Q(binary_float);
 863         E();
 864
 865         /* If any of the following three tests fails, consider increasing
 866            the e+ and e- values. */
 867
 868         Teq("1.e+4933", isinf(floating.v), TOK_NOSUFFIX);
 869         Q(floating_out_of_range);
 870         E();
 871
 872         /* for some reason, strtold sets errno=EDOM on x86, and
 873            on my PowerPC G4 on Fedora 10, the same phenomenon occurs
 874            but the exponents are e+309, e-324, and e-325 */
 875         Teq("1.e-4951", floating.v==0.0, TOK_NOSUFFIX);
 876         Q(floating_out_of_range);
 877         E();
 878
 879         Teq("1.e-4952", floating.v==0.0, TOK_NOSUFFIX);
 880         Q(floating_out_of_range);
 881         E();
 882
 883 }
 884
 885 #undef Teq
 886 #undef T
 887 #undef Q
 888 #undef E
 889
 890 struct tokenizer_test {
 891         const char *txt;
 892         size_t txt_size;
 893
 894         const struct token *tokens;
 895         size_t token_count;
 896 };
 897
 898 #define T(txt, ...) {txt, sizeof(txt)-1, array_count_pair(struct token, __VA_ARGS__)}
 899 #define string(txt) {.string=(darray_char[1]){{.item = (char *)(txt), .size = sizeof(txt)-1}}}
 900 #define opkw(v) {.opkw = (v)}
 901 #define txt(t) .txt = (t), .txt_size = sizeof(t)-1
 902 #define integer(...) {.integer={__VA_ARGS__}}
 903 #define floating(...) {.floating={__VA_ARGS__}}
 904 #define space {.type = TOK_WHITE, .txt = " ", .txt_size = 1}
 905 #define startline {.type = TOK_STARTLINE}
 906 #define include(str) {.include = (char *)(str)}
 907
 908 struct tokenizer_msg_test {
 909         struct tokenizer_test test;
 910
 911         const char * const *messages;
 912         size_t message_count;
 913 };
 914
 915 #define M(...) array_count_pair(const char *, __VA_ARGS__)
 916
 917 struct tokenizer_test tokenizer_tests[] = {
 918         { "", 0, 0 },
 919         T("\n",
 920                 {.type = TOK_WHITE, txt("\n")}
 921         ),
 922         T("\na",
 923                 {.type = TOK_WHITE, txt("\n")},
 924                 startline,
 925                 {.type = TOK_IDENTIFIER, txt("a")}
 926         ),
 927         T("int n = c++;",
 928                 {.type = TOK_KEYWORD,
 929                         opkw(INT),
 930                         txt("int")
 931                 }, space,
 932                 {.type = TOK_IDENTIFIER,
 933                         txt("n")
 934                 }, space,
 935                 {.type = TOK_OPERATOR,
 936                         opkw('='),
 937                         txt("=")
 938                 }, space,
 939                 {.type = TOK_IDENTIFIER,
 940                         txt("c")
 941                 },
 942                 {.type = TOK_OPERATOR,
 943                         opkw(INC_OP),
 944                         txt("++")
 945                 },
 946                 {.type = TOK_OPERATOR,
 947                         opkw(';'),
 948                         txt(";")
 949                 }
 950         ),
 951         T(".5 42 ",
 952                 {.type = TOK_FLOATING,
 953                         floating(.5, TOK_NOSUFFIX),
 954                         txt(".5")
 955                 }, space,
 956                 {.type = TOK_INTEGER,
 957                         integer(42, 10, TOK_NOSUFFIX),
 958                         txt("42")
 959                 }, space,
 960         ),
 961         //Make sure TOK_STRAY doesn't take over the universe
 962         T("``AS IS'' AND",
 963                 {.type = TOK_STRAY,
 964                         txt("``")
 965                 },
 966                 {.type = TOK_IDENTIFIER,
 967                         txt("AS")
 968                 }, space,
 969                 {.type = TOK_IDENTIFIER,
 970                         txt("IS")
 971                 },
 972                 {.type = TOK_CHAR,
 973                         string(""),
 974                         txt("\'\'")
 975                 }, space,
 976                 {.type = TOK_IDENTIFIER,
 977                         txt("AND")
 978                 }
 979         ),
 980         //Make sure starting with 0 doesn't result in skipping whitespace
 981         T("0 .05 0 500",
 982                 {.type = TOK_INTEGER,
 983                         integer(0, 8, TOK_NOSUFFIX),
 984                         txt("0")
 985                 }, space,
 986                 {.type = TOK_FLOATING,
 987                         floating(.05, TOK_NOSUFFIX),
 988                         txt(".05")
 989                 }, space,
 990                 {.type = TOK_INTEGER,
 991                         integer(0, 8, TOK_NOSUFFIX),
 992                         txt("0")
 993                 }, space,
 994                 {.type = TOK_INTEGER,
 995                         integer(500, 10, TOK_NOSUFFIX),
 996                         txt("500")
 997                 }
 998         ),
 999         //Make sure a simple preprocessor directive works
1000         T("\t/*comment*/ #include \"include.h\"\n",
1001                 {.flags={1,0}, .type=TOK_WHITE, txt("\t")},
1002                 {.flags={1,0}, .type=TOK_CCOMMENT, txt("/*comment*/")},
1003                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1004                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1005                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(INCLUDE), txt("include")},
1006                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1007                 {.flags={1,0}, .type=TOK_STRING_IQUOTE, include("include.h"), txt("\"include.h\"")},
1008                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")}
1009         ),
1010         //Make sure __VA_ARGS__ is lexed correctly
1011         T("if #define __VA_ARGS__=0X5FULL;\n"
1012           " #define __VA_ARGS__(__VA_ARGS__, ...\t)__VA_ARGS__ bar int define",
1013                 {.type=TOK_KEYWORD, opkw(IF), txt("if")},
1014                 space,
1015                 {.type=TOK_OPERATOR, opkw('#'), txt("#")},
1016                 {.type=TOK_IDENTIFIER, txt("define")},
1017                 space,
1018                 {.type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1019                 {.type=TOK_OPERATOR, opkw('='), txt("=")},
1020                 {.type=TOK_INTEGER, integer(0x5F,16,TOK_ULL), txt("0X5FULL")},
1021                 {.type=TOK_OPERATOR, opkw(';'), txt(";")},
1022                 {.type=TOK_WHITE, txt("\n")},
1023                 {.flags={1,0}, .type=TOK_STARTLINE},
1024                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1025                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1026                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1027                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1028                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1029                 {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
1030                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1031                 {.flags={1,0}, .type=TOK_OPERATOR, opkw(','), txt(",")},
1032                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1033                 {.flags={1,0}, .type=TOK_OPERATOR, opkw(ELLIPSIS), txt("...")},
1034                 {.flags={1,0}, .type=TOK_WHITE, txt("\t")},
1035                 {.flags={1,0}, .type=TOK_OPERATOR, opkw(')'), txt(")")},
1036                 {.flags={1,0}, .type=TOK_KEYWORD, opkw(VA_ARGS), txt("__VA_ARGS__")},
1037                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1038                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
1039                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1040                 {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
1041                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1042                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")},
1043         ),
1044         //__VA_ARGS__ is an identifier if no ... operator is in the parameter list or if there is no parameter list
1045         T("#define foo __VA_ARGS__ bar int define\n#define foo() __VA_ARGS__ bar int define",
1046                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1047                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1048                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1049                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
1050                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1051                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1052                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1053                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
1054                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1055                 {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
1056                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1057                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")},
1058                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1059
1060                 {.flags={1,0}, .type=TOK_STARTLINE},
1061                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1062                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1063                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1064                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
1065                 {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
1066                 {.flags={1,0}, .type=TOK_OPERATOR, opkw(')'), txt(")")},
1067                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1068                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("__VA_ARGS__")},
1069                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1070                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("bar")},
1071                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1072                 {.flags={1,0}, .type=TOK_KEYWORD, opkw(INT), txt("int")},
1073                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1074                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("define")}
1075         ),
1076
1077         //Test various integer suffixen
1078         T("1 1u 1l 1ul 1lu 1ll 1ull 1llu 1U 1L 1UL 1LU 1LL 1ULL 1LLU "
1079           "1uq 1lq 1llq 1ulq 1luq 1f 1i",
1080                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1")}, space,
1081                 {.type=TOK_INTEGER, integer(1, 10, TOK_U), txt("1u")}, space,
1082                 {.type=TOK_INTEGER, integer(1, 10, TOK_L), txt("1l")}, space,
1083                 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1ul")}, space,
1084                 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1lu")}, space,
1085                 {.type=TOK_INTEGER, integer(1, 10, TOK_LL), txt("1ll")}, space,
1086                 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1ull")}, space,
1087                 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1llu")}, space,
1088                 {.type=TOK_INTEGER, integer(1, 10, TOK_U), txt("1U")}, space,
1089                 {.type=TOK_INTEGER, integer(1, 10, TOK_L), txt("1L")}, space,
1090                 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1UL")}, space,
1091                 {.type=TOK_INTEGER, integer(1, 10, TOK_UL), txt("1LU")}, space,
1092                 {.type=TOK_INTEGER, integer(1, 10, TOK_LL), txt("1LL")}, space,
1093                 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1ULL")}, space,
1094                 {.type=TOK_INTEGER, integer(1, 10, TOK_ULL), txt("1LLU")}, space,
1095                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1uq")}, space,
1096                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1lq")}, space,
1097                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1llq")}, space,
1098                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1ulq")}, space,
1099                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1luq")}, space,
1100                 {.type=TOK_INTEGER, integer(1, 10, TOK_NOSUFFIX), txt("1f")}, space,
1101                 {.type=TOK_INTEGER, integer(1, 10, TOK_I), txt("1i")}
1102         ),
1103         //Test non-standard newlines
1104         T("\n\r\n \r\n\rint",
1105                 {.type=TOK_WHITE, txt("\n\r")}, startline,
1106                 {.type=TOK_WHITE, txt("\n")}, startline,
1107                 space,
1108                 {.type=TOK_WHITE, txt("\r\n")}, startline,
1109                 {.type=TOK_WHITE, txt("\r")}, startline,
1110                 {.type=TOK_KEYWORD, opkw(INT), txt("int")}
1111         ),
1112         //Test backslash-broken lines
1113         T("oner\\ \nous",
1114                 {.type=TOK_IDENTIFIER, txt("onerous")}
1115         ),
1116         T("\\\n\\\n\\\n\\",
1117                 {.type=TOK_STRAY, txt("\\")}
1118         ),
1119         T("in\\\nt i\\;\nf\\ \r\nor (i=0; i<10; i++) {\\",
1120                 {.type=TOK_KEYWORD, opkw(INT), txt("int")}, space,
1121                 {.type=TOK_IDENTIFIER, txt("i")},
1122                 {.type=TOK_STRAY, txt("\\")},
1123                 {.type=TOK_OPERATOR, opkw(';'), txt(";")},
1124                 {.type=TOK_WHITE, txt("\n")},
1125
1126                 startline,
1127                 {.type=TOK_KEYWORD, opkw(FOR), txt("for")}, space,
1128                 {.type=TOK_OPERATOR, opkw('('), txt("(")},
1129                 {.type=TOK_IDENTIFIER, txt("i")},
1130                 {.type=TOK_OPERATOR, opkw('='), txt("=")},
1131                 {.type=TOK_INTEGER, integer(0,8,0), txt("0")},
1132                 {.type=TOK_OPERATOR, opkw(';'), txt(";")}, space,
1133                 {.type=TOK_IDENTIFIER, txt("i")},
1134                 {.type=TOK_OPERATOR, opkw('<'), txt("<")},
1135                 {.type=TOK_INTEGER, integer(10,10,0), txt("10")},
1136                 {.type=TOK_OPERATOR, opkw(';'), txt(";")}, space,
1137                 {.type=TOK_IDENTIFIER, txt("i")},
1138                 {.type=TOK_OPERATOR, opkw(INC_OP), txt("++")},
1139                 {.type=TOK_OPERATOR, opkw(')'), txt(")")}, space,
1140                 {.type=TOK_OPERATOR, opkw('{'), txt("{")},
1141                 {.type=TOK_STRAY, txt("\\")}
1142         ),
1143         //More preprocessor directive tests
1144         T("#apple\n#pragma\n#const\n#define \t\n#define foo(x",
1145                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1146                 {.flags={1,1}, .type=TOK_IDENTIFIER, txt("apple")},
1147                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1148
1149                 {.flags={1,0}, .type=TOK_STARTLINE},
1150                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1151                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(PRAGMA), txt("pragma")},
1152                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1153
1154                 {.flags={1,0}, .type=TOK_STARTLINE},
1155                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1156                 {.flags={1,1}, .type=TOK_IDENTIFIER, txt("const")},
1157                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1158
1159                 {.flags={1,0}, .type=TOK_STARTLINE},
1160                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1161                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1162                 {.flags={1,0}, .type=TOK_WHITE, txt(" \t")},
1163                 {.flags={1,0}, .type=TOK_WHITE, txt("\n")},
1164
1165                 {.flags={1,0}, .type=TOK_STARTLINE},
1166                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1167                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1168                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1169                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")},
1170                 {.flags={1,0}, .type=TOK_OPERATOR, opkw('('), txt("(")},
1171                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("x")}
1172         ),
1173         T("#define",
1174                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1175                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")}
1176         ),
1177         T("#define foo",
1178                 {.flags={1,0}, .type=TOK_LEADING_POUND, txt("#")},
1179                 {.flags={1,1}, .type=TOK_KEYWORD, opkw(DEFINE), txt("define")},
1180                 {.flags={1,0}, .type=TOK_WHITE, txt(" ")},
1181                 {.flags={1,0}, .type=TOK_IDENTIFIER, txt("foo")}
1182         ),
1183         T("`#define foo",
1184                 {.type=TOK_STRAY, txt("`")},
1185                 {.type=TOK_OPERATOR, opkw('#'), txt("#")},
1186                 {.type=TOK_IDENTIFIER, txt("define")},
1187                 space,
1188                 {.type=TOK_IDENTIFIER, txt("foo")}
1189         )
1190 };
1191
1192 struct tokenizer_msg_test tokenizer_msg_tests[] = {
1193         {T("/* Unterminated C comment",
1194                 {.type=TOK_CCOMMENT, txt("/* Unterminated C comment")}
1195         ), M(
1196                 "unterminated_comment"
1197         )},
1198         {T("\"\n\"\"\n",
1199                 {.type=TOK_STRING, string("\n"), txt("\"\n\"")},
1200                 {.type=TOK_STRING, string("\n"), txt("\"\n")}
1201         ), M(
1202                 "read_cstring/quote_newlines",
1203                 "read_cstring/missing_endquote"
1204         )},
1205 };
1206
1207 #undef T
1208 #undef string
1209 #undef opkw
1210 #undef txt
1211 #undef integer
1212 #undef floating
1213 #undef M
1214 #undef include
1215
1216 static void test_tokenizer_single(struct tokenizer_test *t, tok_message_queue *mq) {
1217         struct token_list *tl;
1218         size_t i, count = t->token_count, gen_count;
1219         const struct token *tok_gen, *tok_correct;
1220         int success = 1;
1221         char *txt = talloc_memdup(NULL, t->txt, t->txt_size);
1222         size_t txt_size = t->txt_size;
1223         #define failed(fmt, ...) do { \
1224                 printf("Error: " fmt "\n", ##__VA_ARGS__); \
1225                 success = 0; \
1226                 goto done; \
1227         } while(0)
1228
1229         tl = tokenize(txt, txt, txt_size, mq);
1230
1231         if (tl->orig != txt || tl->orig_size != txt_size)
1232                 failed("tokenize() did not replicate orig/orig_size from arguments");
1233         if (!token_list_sanity_check(tl, stdout))
1234                 failed("Sanity check failed");
1235
1236         gen_count = token_list_count(tl);
1237         if (gen_count != count+1)
1238                 failed("Incorrect number of tokens (%zu, should be %zu)\n",
1239                         gen_count, count+1);
1240
1241         tok_gen = tl->first->next; //skip the beginning TOK_STARTLINE
1242         tok_correct = t->tokens;
1243         for (i=0; i<count; i++, tok_gen=tok_gen->next, tok_correct++) {
1244                 if (tok_gen->type != tok_correct->type)
1245                         failed("Token \"%s\": Incorrect type", tok_correct->txt);
1246                 {
1247                         struct token_flags g=tok_gen->flags, c=tok_correct->flags;
1248                         if (g.pp!=c.pp || g.pp_directive!=c.pp_directive)
1249                                 failed("Token \"%s\": Incorrect flags", tok_correct->txt);
1250                 }
1251                 switch (tok_gen->type) {
1252                         case TOK_INTEGER:
1253                                 if (tok_gen->integer.v != tok_correct->integer.v ||
1254                                     tok_gen->integer.base != tok_correct->integer.base ||
1255                                     tok_gen->integer.suffix != tok_correct->integer.suffix)
1256                                         failed("Token \"%s\": Integer value/base/suffix incorrect", tok_correct->txt);;
1257                                 break;
1258                         case TOK_FLOATING:
1259                                 if (fabsl(tok_gen->floating.v - tok_correct->floating.v) > 0.00000000000000001 ||
1260                                     tok_gen->floating.suffix != tok_correct->floating.suffix)
1261                                         failed("Token \"%s\": Floating point value/suffix incorrect", tok_correct->txt);
1262                                 break;
1263                         case TOK_OPERATOR:
1264                                 if (tok_gen->opkw != tok_correct->opkw)
1265                                         failed("Token \"%s\": Operator opkw incorrect", tok_correct->txt);
1266                                 break;
1267                         case TOK_KEYWORD:
1268                                 if (tok_gen->opkw != tok_correct->opkw)
1269                                         failed("Token \"%s\": Keyword opkw incorrect", tok_correct->txt);
1270                                 break;
1271                         case TOK_CHAR:
1272                         case TOK_STRING:
1273                                 //anything using string
1274                                 if (tok_gen->string->size != tok_correct->string->size ||
1275                                         memcmp(tok_gen->string->item, tok_correct->string->item,
1276                                         tok_gen->string->size) ||
1277                                         tok_gen->string->item[tok_gen->string->size] != 0 )
1278                                         failed("Token \"%s\": String value incorrect", tok_correct->txt);
1279                                 break;
1280                         case TOK_STRING_IQUOTE:
1281                         case TOK_STRING_IANGLE:
1282                                 if (strcmp(tok_gen->include, tok_correct->include))
1283                                         failed("Token \"%s\": #include string incorrect", tok_correct->txt);
1284                                 break;
1285                         case TOK_IDENTIFIER:
1286                         case TOK_CCOMMENT:
1287                         case TOK_CPPCOMMENT:
1288                         case TOK_WHITE:
1289                         case TOK_STARTLINE:
1290                         case TOK_STRAY:
1291                                 break;
1292                 }
1293                 if (tok_gen->type!=TOK_STARTLINE && (
1294                         tok_gen->txt_size != tok_correct->txt_size ||
1295                         memcmp(tok_gen->txt, tok_correct->txt, tok_gen->txt_size))
1296                         )
1297                         failed("Token \"%s\": txt incorrect", tok_correct->txt);
1298         }
1299
1300         #undef failed
1301 done:
1302         ok(success==1, "Tokenize %s", t->txt);
1303
1304         if (!success)
1305                 token_list_dump(tl, stdout);
1306
1307         talloc_free(txt);
1308 }
1309
1310 static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) {
1311         FILE *f = fopen(file_name, "rb");
1312         darray_char *text = talloc_darray(NULL);
1313         const size_t inc = 1024;
1314         struct token_list *tl;
1315
1316         if (!f) {
1317                 fail("Could not read file '%s': %s", file_name, strerror(errno));
1318                 goto end;
1319         }
1320
1321         for (;;) {
1322                 size_t read_len;
1323
1324                 darray_realloc(*text, text->size+inc+1);
1325                 read_len = fread(text->item+text->size, 1, inc, f);
1326                 text->size += read_len;
1327                 text->item[text->size] = 0;
1328
1329                 if (read_len < inc)
1330                         break;
1331
1332         }
1333         if (ferror(f)) {
1334                 fail("Error reading file '%s': %s", file_name, strerror(errno));
1335                 goto end;
1336         }
1337
1338         tl = tokenize(text, text->item, text->size, mq);
1339         tl->filename = file_name;
1340
1341         //printf("File '%s' has %zu tokens\n", file_name, token_list_count(tl));
1342         //token_list_dump(tl, stdout);
1343
1344         if (!token_list_sanity_check(tl, stdout)) {
1345                 fail("Sanity check failed for file '%s'", file_name);
1346                 goto end;
1347         }
1348
1349         pass("File '%s' has %zu tokens", file_name, token_list_count(tl));
1350
1351         /*while (queue_count(*mq)) {
1352                 struct tok_message msg = dequeue(*mq);
1353                 tok_message_print(&msg, tl);
1354         }*/
1355
1356 end:
1357         talloc_free(text);
1358         if (f)
1359                 fclose(f);
1360 }
1361
1362 static void test_tokenizer(void) {
1363         tok_message_queue mq;
1364         size_t i, count;
1365         int has_warn_or_worse = 0;
1366
1367         queue_init(mq, NULL);
1368
1369         count = sizeof(tokenizer_tests)/sizeof(*tokenizer_tests);
1370         for (i=0; i<count; i++) {
1371                 test_tokenizer_single(tokenizer_tests+i, &mq);
1372                 while (queue_count(mq)) {
1373                         struct tok_message msg = dequeue(mq);
1374                         (void) msg;
1375                         //tok_message_dump(&msg);
1376                 }
1377         }
1378
1379         count = sizeof(tokenizer_msg_tests)/sizeof(*tokenizer_msg_tests);
1380         for (i=0; i<count; i++) {
1381                 size_t j;
1382                 test_tokenizer_single(&tokenizer_msg_tests[i].test, &mq);
1383
1384                 if (queue_count(mq) != tokenizer_msg_tests[i].message_count) {
1385                         fail("Incorrect number of messages from tokenize()");
1386                         while (queue_count(mq))
1387                                 (void) dequeue(mq);
1388                         goto msg_fail;
1389                 }
1390
1391                 for (j=0; queue_count(mq); j++) {
1392                         struct tok_message msg = dequeue(mq);
1393                         const char *base = "tokenize/";
1394                         size_t baselen = strlen(base);
1395                         //tok_message_dump(&msg);
1396
1397                         if (strncmp(msg.path, base, baselen)) {
1398                                 fail("Message from tokenize() doesn't start with \"%s\"",
1399                                         base);
1400                                 goto msg_fail;
1401                         }
1402                         if (strcmp(msg.path+baselen,
1403                                         tokenizer_msg_tests[i].messages[j])) {
1404                                 fail("Incorrect message %s, should be %s",
1405                                         msg.path+baselen, tokenizer_msg_tests[i].messages[j]);
1406                                 goto msg_fail;
1407                         }
1408                 }
1409
1410                 pass("Messages from tokenize() are correct");
1411         msg_fail:;
1412         }
1413
1414         test_tokenizer_file("test/run.c", &mq);
1415
1416         while (queue_count(mq)) {
1417                 struct tok_message msg = dequeue(mq);
1418                 if (msg.level >= TM_WARN) {
1419                         has_warn_or_worse = 1;
1420                         tok_message_dump(&msg);
1421                 }
1422                 //else tok_message_dump(&msg);
1423         }
1424
1425         ok(has_warn_or_worse==0, "Tokenizing run.c generated%s warnings, errors, or bugs",
1426                 has_warn_or_worse ? "" : " no");
1427
1428         queue_free(mq);
1429 }
1430
1431 #include <unistd.h>
1432
1433 int main(void)
1434 {
1435         plan_tests(195);
1436
1437         diag("* Checking queue...");
1438         test_queue();
1439
1440         diag("* Checking read_cstring...");
1441         test_read_cstring();
1442
1443         diag("* Checking dict...");
1444         test_dict();
1445
1446         diag("* Checking charflag...");
1447         test_charflag();
1448
1449         diag("* Checking readui...");
1450         test_readui();
1451
1452         diag("* Checking scan_number...");
1453         test_scan_number();
1454
1455         diag("* Checking read_integer...");
1456         test_read_integer();
1457
1458         diag("* Checking read_floating...");
1459         test_read_floating();
1460
1461         diag("* Checking tokenizer...");
1462         test_tokenizer();
1463
1464         /* This exits depending on whether all tests passed */
1465         return exit_status();
1466 }