X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fccan_tokenizer%2Fccan_tokenizer.h;h=9c40ae20973c026f162a8e8fecbadd76c6b12565;hp=7634501f84f455774364dfe9aabb0961c0f087a7;hb=f725bbb1987284933e0f21dfb8f2ce7a1f0806e5;hpb=69cc1b45b4921c0be738902fe0d5225f135e2aae diff --git a/ccan/ccan_tokenizer/ccan_tokenizer.h b/ccan/ccan_tokenizer/ccan_tokenizer.h index 7634501f..9c40ae20 100644 --- a/ccan/ccan_tokenizer/ccan_tokenizer.h +++ b/ccan/ccan_tokenizer/ccan_tokenizer.h @@ -28,7 +28,7 @@ #ifndef CCAN_TOKENIZER_H #define CCAN_TOKENIZER_H -#include +#include #include "charflag.h" #include "dict.h" #include "queue.h" @@ -41,8 +41,11 @@ enum token_type { TOK_INTEGER, //integer (e.g. 5, 1000L, 0x5) TOK_FLOATING, //floating point number (e.g. 5.0, 7.0f, etc.) TOK_OPERATOR, //operator (e.g. +, -, (, ), ++, etc.) + + #define token_type_is_identifier(type) ((type)>=TOK_KEYWORD && (type)<=TOK_IDENTIFIER) TOK_KEYWORD, //keyword (e.g. char, _Bool, ifdef) TOK_IDENTIFIER, //identifier or unprocessed keyword (e.g. int, token, pp_conditions) + TOK_CHAR, //character literal (e.g. 'a' or even '1234') TOK_STRING, //string literal (e.g. "hello" or "zero\0inside") TOK_LEADING_POUND, //leading # in a preprocessor directive (e.g. # include) @@ -54,6 +57,7 @@ enum token_type { TOK_CCOMMENT, //C comment (e.g. /* comment */) TOK_CPPCOMMENT, //C++ comment (e.g. //comment ) TOK_WHITE, //whitespace (span of \t\n\v\f\r and space) + TOK_STARTLINE, //beginning of line (txt/txtsize is always empty) TOK_STRAY, //control characters, weird characters, and extended characters where they shouldn't be }; @@ -176,7 +180,7 @@ struct token { struct tok_integer integer; struct tok_floating floating; int opkw; //operator or keyword ID (e.g. '+', INC_OP (++), ADD_ASSIGN (+=)) - array_char string; //applies to TOK_CHAR and TOK_STRING + darray_char *string; //applies to TOK_CHAR and TOK_STRING char *include; //applies to TOK_STRING_IQUOTE and TOK_STRING_IANGLE }; @@ -192,6 +196,11 @@ struct token { size_t line, col; }; +//keywords such as int, long, etc. may be defined over, making them identifiers in a sense +static inline int token_is_identifier(const struct token *tok) { + return token_type_is_identifier(tok->type); +} + static inline int token_is_ignored(const struct token *tok) { return token_type_is_ignored(tok->type); } @@ -204,6 +213,11 @@ static inline int token_is_kw(const struct token *tok, int opkw) { return tok->type==TOK_KEYWORD && tok->opkw==opkw; } +static inline int token_txt_is(const struct token *tok, const char *str) { + size_t len = strlen(str); + return tok->txt_size==len && !memcmp(tok->txt, str, len); +} + struct token_list { struct token *first, *last; @@ -231,8 +245,8 @@ extern struct dict *tokenizer_dict; typedef queue(struct tok_message) tok_message_queue; -//the token_list is allocated as a child of orig -struct token_list *tokenize(const char *orig, size_t orig_size, tok_message_queue *mq); +//the token_list is allocated as a child of tcontext +struct token_list *tokenize(const void *tcontext, const char *orig, size_t orig_size, tok_message_queue *mq); size_t token_list_count(const struct token_list *tl); @@ -286,7 +300,7 @@ void tok_message_queue_dump(const tok_message_queue *mq); /* Miscellaneous internal components */ -char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq); +char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq); char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq);