From: Joey Adams Date: Sun, 12 Jul 2009 03:10:39 +0000 (-0400) Subject: Make tokenizer throw error on empty char literal, along with some slight cleanups... X-Git-Url: https://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=2356c14ecbb57ae1e335eb46b3c8ea78ea3f28bb Make tokenizer throw error on empty char literal, along with some slight cleanups in ccan_tokenizer.h --- diff --git a/ccan/ccan_tokenizer/ccan_tokenizer.c b/ccan/ccan_tokenizer/ccan_tokenizer.c index 7d29e025..52858fea 100644 --- a/ccan/ccan_tokenizer/ccan_tokenizer.c +++ b/ccan/ccan_tokenizer/ccan_tokenizer.c @@ -473,12 +473,19 @@ struct token_list *tokenize(const char *orig, size_t orig_size, add(.type = type, {.include = include}); + } else if (c=='\'' || c=='\"') { //character or string literal array_char string = array_new(tl); s = read_cstring(&string, s, e, c, mq); if (s=TOK_KEYWORD && (type)<=TOK_IDENTIFIER) TOK_KEYWORD, //keyword (e.g. char, _Bool, ifdef) TOK_IDENTIFIER, //identifier or unprocessed keyword (e.g. int, token, pp_conditions) + TOK_CHAR, //character literal (e.g. 'a' or even '1234') TOK_STRING, //string literal (e.g. "hello" or "zero\0inside") TOK_LEADING_POUND, //leading # in a preprocessor directive (e.g. # include) @@ -54,6 +57,7 @@ enum token_type { TOK_CCOMMENT, //C comment (e.g. /* comment */) TOK_CPPCOMMENT, //C++ comment (e.g. //comment ) TOK_WHITE, //whitespace (span of \t\n\v\f\r and space) + TOK_STARTLINE, //beginning of line (txt/txtsize is always empty) TOK_STRAY, //control characters, weird characters, and extended characters where they shouldn't be }; @@ -192,6 +196,11 @@ struct token { size_t line, col; }; +//keywords such as int, long, etc. may be defined over, making them identifiers in a sense +static inline int token_is_identifier(const struct token *tok) { + return token_type_is_identifier(tok->type); +} + static inline int token_is_ignored(const struct token *tok) { return token_type_is_ignored(tok->type); } @@ -204,6 +213,11 @@ static inline int token_is_kw(const struct token *tok, int opkw) { return tok->type==TOK_KEYWORD && tok->opkw==opkw; } +static inline int token_txt_is(const struct token *tok, const char *str) { + size_t len = strlen(str); + return tok->txt_size==len && !memcmp(tok->txt, str, len); +} + struct token_list { struct token *first, *last; diff --git a/ccan/ccan_tokenizer/todo b/ccan/ccan_tokenizer/todo index 0203d054..4b6d90f0 100644 --- a/ccan/ccan_tokenizer/todo +++ b/ccan/ccan_tokenizer/todo @@ -1,3 +1,8 @@ +Write test for empty_char_constant + +defined cannot be used as a macro name +Add "defined" and only accept it in appropriate circumstances + Update that simple tokenizer compulsory test so things will compile Handle cases like escaped question marks and pound symbols that I don't understand yet.