#ifndef CCAN_TOKENIZER_H
#define CCAN_TOKENIZER_H
-#include <ccan/array/array.h>
+#include <ccan/darray/darray.h>
#include "charflag.h"
#include "dict.h"
#include "queue.h"
TOK_INTEGER, //integer (e.g. 5, 1000L, 0x5)
TOK_FLOATING, //floating point number (e.g. 5.0, 7.0f, etc.)
TOK_OPERATOR, //operator (e.g. +, -, (, ), ++, etc.)
+
+ #define token_type_is_identifier(type) ((type)>=TOK_KEYWORD && (type)<=TOK_IDENTIFIER)
TOK_KEYWORD, //keyword (e.g. char, _Bool, ifdef)
TOK_IDENTIFIER, //identifier or unprocessed keyword (e.g. int, token, pp_conditions)
+
TOK_CHAR, //character literal (e.g. 'a' or even '1234')
TOK_STRING, //string literal (e.g. "hello" or "zero\0inside")
TOK_LEADING_POUND, //leading # in a preprocessor directive (e.g. # include)
TOK_CCOMMENT, //C comment (e.g. /* comment */)
TOK_CPPCOMMENT, //C++ comment (e.g. //comment )
TOK_WHITE, //whitespace (span of \t\n\v\f\r and space)
+
TOK_STARTLINE, //beginning of line (txt/txtsize is always empty)
TOK_STRAY, //control characters, weird characters, and extended characters where they shouldn't be
};
struct tok_integer integer;
struct tok_floating floating;
int opkw; //operator or keyword ID (e.g. '+', INC_OP (++), ADD_ASSIGN (+=))
- array_char string; //applies to TOK_CHAR and TOK_STRING
+ darray_char *string; //applies to TOK_CHAR and TOK_STRING
char *include; //applies to TOK_STRING_IQUOTE and TOK_STRING_IANGLE
};
size_t line, col;
};
+//keywords such as int, long, etc. may be defined over, making them identifiers in a sense
+static inline int token_is_identifier(const struct token *tok) {
+ return token_type_is_identifier(tok->type);
+}
+
static inline int token_is_ignored(const struct token *tok) {
return token_type_is_ignored(tok->type);
}
return tok->type==TOK_KEYWORD && tok->opkw==opkw;
}
+static inline int token_txt_is(const struct token *tok, const char *str) {
+ size_t len = strlen(str);
+ return tok->txt_size==len && !memcmp(tok->txt, str, len);
+}
+
struct token_list {
struct token *first, *last;
typedef queue(struct tok_message) tok_message_queue;
-//the token_list is allocated as a child of orig
-struct token_list *tokenize(const char *orig, size_t orig_size, tok_message_queue *mq);
+//the token_list is allocated as a child of tcontext
+struct token_list *tokenize(const void *tcontext, const char *orig, size_t orig_size, tok_message_queue *mq);
size_t token_list_count(const struct token_list *tl);
/* Miscellaneous internal components */
-char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq);
+char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq);
char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq);