--- /dev/null
+../../licenses/BSD-MIT
\ No newline at end of file
* err(1, "Could not read file %s", argv[1]);
*
* //tokenize the contents
- * tl = tokenize(file, len, &mq);
+ * tl = tokenize(file, file, len, &mq);
*
* //print warnings, errors, etc.
* while (queue_count(mq)) {
*
* return 0;
* }
+ *
+ * License: BSD
*/
int main(int argc, char *argv[])
{
return 1;
if (strcmp(argv[1], "depends") == 0) {
- printf("ccan/array\n");
+ printf("ccan/darray\n");
+ printf("ccan/talloc\n");
return 0;
}
#endif
+static int talloc_darray_destructor(void *ptr);
+
+/*
+ * darray(T) *talloc_darray(const void *context);
+ *
+ * Create a new darray anchored in a talloc buffer.
+ * When this pointer is freed, the darray will be freed as well.
+ */
+static void *talloc_darray(const void *context)
+{
+ void *ret = talloc(context, darray(void));
+ darray_init(*(darray(void)*)ret);
+ talloc_set_destructor(ret, talloc_darray_destructor);
+ return ret;
+}
+
+static int talloc_darray_destructor(void *ptr)
+{
+ darray(void) *arr = ptr;
+ free(arr->item);
+ return 0;
+}
+
#define MESSAGE_PATH "tokenize/"
static void unbreak_backslash_broken_lines(struct token_list *tl, tok_message_queue *mq) {
const char *s = tl->orig, *e = s+tl->orig_size;
- array_char txt = array_new(tl);
- array(const char*) olines = array_new(tl);
- array(const char*) tlines = array_new(tl);
+ darray_char *txt = talloc_darray(tl);
+ darray(const char*) *olines = talloc_darray(tl);
+ darray(const char*) *tlines = talloc_darray(tl);
do {
const char *line_start = s, *line_end;
const char *lnw; //last non-white
- size_t start_offset = txt.size;
+ size_t start_offset = txt->size;
//scan to the next line and find the last non-white character in the line
while (s<e && !creturn(*s)) s++;
//add the backslash-break-free version of the text
if (lnw>line_start && lnw[-1]=='\\' && line_end<e) {
- array_append_items(txt, line_start, lnw-1-line_start);
+ darray_append_items(*txt, line_start, lnw-1-line_start);
if (lnw<e && cspace(*lnw)) {
tok_msg_warn(spaces_after_backslash_break, lnw,
"Trailing spaces after backslash-broken line");
}
} else
- array_append_items(txt, line_start, s-line_start);
+ darray_append_items(*txt, line_start, s-line_start);
//add the line starts for this line
- array_append(olines, line_start);
- array_append(tlines, (const char*)start_offset);
+ darray_append(*olines, line_start);
+ darray_append(*tlines, (const char*)start_offset);
//Since the txt buffer moves when expanded, we're storing offsets
// for now. Once we're done building txt, we can add the base
// of it to all the offsets to make them pointers.
} while (s<e);
//stick a null terminator at the end of the text
- array_realloc(txt, txt.size+1);
- txt.item[txt.size] = 0;
+ darray_realloc(*txt, txt->size+1);
+ txt->item[txt->size] = 0;
//convert the line start offsets to pointers
- array_for_t(i, tlines, const char *, *i = txt.item + (size_t)*i);
-
- tl->olines = olines.item;
- tl->olines_size = olines.size;
- tl->txt = txt.item;
- tl->txt_size = txt.size;
- tl->tlines = tlines.item;
- tl->tlines_size = tlines.size;
+ {
+ const char **i;
+ darray_foreach(i, *tlines)
+ *i = txt->item + (size_t)(*i);
+ }
+
+ tl->olines = olines->item;
+ tl->olines_size = olines->size;
+ tl->txt = txt->item;
+ tl->txt_size = txt->size;
+ tl->tlines = tlines->item;
+ tl->tlines_size = tlines->size;
}
static void normal_keyword(struct token *tok) {
struct token tok = {__VA_ARGS__}; \
tok.txt = orig; \
tok.txt_size = s-orig; \
- array_append(array, tok); \
+ darray_append(*arr, tok); \
} while (0)
#define cstray(c) (ccontrol(c) || cextended(c) || (c)=='@' || (c)=='`' || (c)=='\\')
talloc_free(tokenizer_dict);
}
-struct token_list *tokenize(const char *orig, size_t orig_size,
+struct token_list *tokenize(const void *tcontext, const char *orig, size_t orig_size,
tok_message_queue *mq) {
- struct token_list *tl = talloc(orig, struct token_list);
+ struct token_list *tl = talloc(tcontext, struct token_list);
const char *s, *e;
size_t stray_count=0, cr_count=0;
- array(struct token) array = array_new(tl);
+ darray(struct token) *arr = talloc_darray(tl);
int only_pound_include = 0;
if (!tokenizer_dict) {
s = tl->txt;
e = s + tl->txt_size;
- array_appends_t(array, struct token, {
+ darray_appends_t(*arr, struct token, {
.type = TOK_STARTLINE,
.txt = s,
.txt_size = 0
s = read_cnumber(&tok, s-1, e, mq);
tok.txt = orig;
tok.txt_size = s-orig;
- array_append(array, tok);
+ darray_append(*arr, tok);
} else if (csymbol(c) || cident(c)) {
if (only_pound_include && (c=='"' || c=='<')) { //include string
{.include = include});
} else if (c=='\'' || c=='\"') { //character or string literal
- array_char string = array_new(tl);
- s = read_cstring(&string, s, e, c, mq);
+ darray_char *string = talloc_darray(tl);
+ s = read_cstring(string, s, e, c, mq);
if (s<e) s++; //advance past endquote (if available)
add(.type = c=='\'' ? TOK_CHAR : TOK_STRING,
{.string = string});
- if (c=='\'' && string.size==0) {
+ if (c=='\'' && string->size==0) {
tok_msg_error(empty_char_constant, orig,
"Empty character constant");
}
{.opkw = ent->id});
if (ent->id == INCLUDE) {
//hacky way to lex #include string properly
- struct token *ts = array.item;
- struct token *tp = ts+array.size-1;
+ struct token *ts = arr->item;
+ struct token *tp = ts+arr->size-1;
while (tp>ts && token_is_ignored(tp-1))
tp--;
if (tp>ts && token_is_op(tp-1, '#')) {
"Text contains non-standard line terminators");
}
- finalize(tl, array.item, array.item+array.size);
+ finalize(tl, arr->item, arr->item+arr->size);
return tl;
}
}
}
-static char *escape_string(array_char *buf, const char *str, size_t size) {
+static char *escape_string(darray_char *buf, const char *str, size_t size) {
const char *s = str, *e = s+size;
- array_from_lit(*buf, "");
+ darray_from_lit(*buf, "");
for (;s<e;s++) {
char buffer[8];
buffer[0] = c;
buffer[1] = 0;
}
- array_append_string(*buf, esc);
+ darray_append_string(*buf, esc);
}
return buf->item;
//Make sure txt and orig match exactly except for backslash line breaks
if (!txt_orig_matches(i->txt, i->txt_size, i->orig, i->orig_size)) {
- array_char buf = array_new(NULL);
+ darray_char buf = darray_new();
fprintf(err,
"txt and orig do not match:\n"
"\ttxt = \"%s\"\n",
fprintf(err, "\torig = \"%s\"\n",
escape_string(&buf, i->orig, i->orig_size) );
- array_free(buf);
+ darray_free(buf);
return 0;
}
void token_list_dump(const struct token_list *tl, FILE *f) {
struct token *tok;
- array_char buf = array_new(NULL);
+ darray_char buf = darray_new();
size_t i = 0;
char buf2[8];
const char *token_type_str[] = {
#endif
}
- array_free(buf);
+ darray_free(buf);
}
void tok_message_print(struct tok_message *m, struct token_list *tl) {
#ifndef CCAN_TOKENIZER_H
#define CCAN_TOKENIZER_H
-#include <ccan/array/array.h>
+#include <ccan/darray/darray.h>
#include "charflag.h"
#include "dict.h"
#include "queue.h"
struct tok_integer integer;
struct tok_floating floating;
int opkw; //operator or keyword ID (e.g. '+', INC_OP (++), ADD_ASSIGN (+=))
- array_char string; //applies to TOK_CHAR and TOK_STRING
+ darray_char *string; //applies to TOK_CHAR and TOK_STRING
char *include; //applies to TOK_STRING_IQUOTE and TOK_STRING_IANGLE
};
typedef queue(struct tok_message) tok_message_queue;
-//the token_list is allocated as a child of orig
-struct token_list *tokenize(const char *orig, size_t orig_size, tok_message_queue *mq);
+//the token_list is allocated as a child of tcontext
+struct token_list *tokenize(const void *tcontext, const char *orig, size_t orig_size, tok_message_queue *mq);
size_t token_list_count(const struct token_list *tl);
/* Miscellaneous internal components */
-char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq);
+char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq);
char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq);
//Reads a C string starting at s until quoteChar is found or e is reached
// Returns the pointer to the terminating quote character or e if none was found
-char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) {
+char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) {
const char * const tokstart = s;
const char *p;
int has_endquote=0, has_newlines=0;
//tok_msg_debug(called, s, "Called read_cstring on `%s`", s);
- #define append(startptr,endptr) array_append_items(*out, startptr, (endptr)-(startptr))
- #define append_char(theChar) array_append(*out, theChar)
- #define append_zero() do {array_append(*out, 0); out->size--;} while(0)
+ #define append(startptr,endptr) darray_append_items(*out, startptr, (endptr)-(startptr))
+ #define append_char(theChar) darray_append(*out, theChar)
+ #define append_zero() do {darray_append(*out, 0); out->size--;} while(0)
p = s;
while (p<e) {
char *string = talloc_strdup(NULL, orig);
unsigned int i;
- toks = tokenize(string, strlen(string), MQ);
+ toks = tokenize(string, string, strlen(string), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == strlen(string)/size + 1);
char *string = spacify(orig, size);
unsigned int i;
- toks = tokenize(string, strlen(string), MQ);
+ toks = tokenize(string, string, strlen(string), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == strlen(orig)/size*2 + 1);
const char *string = backslashify(orig);
unsigned int i;
- toks = tokenize(string, strlen(string), MQ);
+ toks = tokenize(string, string, strlen(string), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == strlen(orig)/size + 1);
/* char literal */
str = talloc_strdup(NULL, char_token);
- toks = tokenize(str, strlen(str), MQ);
+ toks = tokenize(str, str, strlen(str), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == 2);
ok1(item(0).type == TOK_STARTLINE);
/* string literal */
str = talloc_strdup(NULL, string_token);
- toks = tokenize(str, strlen(str), MQ);
+ toks = tokenize(str, str, strlen(str), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == 2);
ok1(item(0).type == TOK_STARTLINE);
/* Identifiers */
str = talloc_strdup(NULL, ident_tokens);
- toks = tokenize(str, strlen(str), MQ);
+ toks = tokenize(str, str, strlen(str), MQ);
ok1(token_list_sanity_check(toks, stdout));
token_list_dump(toks, stdout);
ok1(token_list_count(toks) == 10);
/* Identifiers */
backslashed_idents = backslashify(ident_tokens);
- toks = tokenize(backslashed_idents, strlen(backslashed_idents), MQ);
+ toks = tokenize(backslashed_idents, backslashed_idents, strlen(backslashed_idents), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == 10);
ok1(item(0).type == TOK_STARTLINE);
#define array_count_pair(type, ...) (const type []){__VA_ARGS__}, sizeof((const type []){__VA_ARGS__})/sizeof(type)
static void test_read_cstring(void) {
- #define next() do {array_free(str); array_init(str, NULL); csp++;} while(0)
+ #define next() do {darray_free(str); darray_init(str); csp++;} while(0)
#define cs (*csp)
#define verify_quotechar(correct, correct_continuation_offset, quotechar) do { \
const size_t s = sizeof(correct)-1; \
};
const char * const *csp = cstrings;
const char *p;
- array_char str = array_new(NULL);
+ darray_char str = darray_new();
tok_message_queue mq;
queue_init(mq, NULL);
//Check a series of hex escapes
verify("\x50\x35\x12\xEF\xFE\x12\x45", 32);
- array_free(str);
+ darray_free(str);
//tok_message_queue_dump(&mq);
};
#define T(txt, ...) {txt, sizeof(txt)-1, array_count_pair(struct token, __VA_ARGS__)}
-#define string(txt) {.string={.item = (txt), .size = sizeof(txt)-1}}
+#define string(txt) {.string=(darray_char[1]){{.item = (txt), .size = sizeof(txt)-1}}}
#define opkw(v) {.opkw = (v)}
#define txt(t) .txt = (t), .txt_size = sizeof(t)-1
#define integer(...) {.integer={__VA_ARGS__}}
goto done; \
} while(0)
- tl = tokenize(txt, txt_size, mq);
+ tl = tokenize(txt, txt, txt_size, mq);
if (tl->orig != txt || tl->orig_size != txt_size)
failed("tokenize() did not replicate orig/orig_size from arguments");
case TOK_CHAR:
case TOK_STRING:
//anything using string
- if (tok_gen->string.size != tok_correct->string.size ||
- memcmp(tok_gen->string.item, tok_correct->string.item,
- tok_gen->string.size) ||
- tok_gen->string.item[tok_gen->string.size] != 0 )
+ if (tok_gen->string->size != tok_correct->string->size ||
+ memcmp(tok_gen->string->item, tok_correct->string->item,
+ tok_gen->string->size) ||
+ tok_gen->string->item[tok_gen->string->size] != 0 )
failed("Token \"%s\": String value incorrect", tok_correct->txt);
break;
case TOK_STRING_IQUOTE:
static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) {
FILE *f = fopen(file_name, "rb");
- array_char text = array_new(NULL);
+ darray_char *text = talloc_darray(NULL);
const size_t inc = 1024;
struct token_list *tl;
for (;;) {
size_t read_len;
- array_realloc(text, text.size+inc+1);
- read_len = fread(text.item+text.size, 1, inc, f);
- text.size += read_len;
- text.item[text.size] = 0;
+ darray_realloc(*text, text->size+inc+1);
+ read_len = fread(text->item+text->size, 1, inc, f);
+ text->size += read_len;
+ text->item[text->size] = 0;
if (read_len < inc)
break;
goto end;
}
- tl = tokenize(text.item, text.size, mq);
+ tl = tokenize(text, text->item, text->size, mq);
tl->filename = file_name;
//printf("File '%s' has %zu tokens\n", file_name, token_list_count(tl));
}*/
end:
- array_free(text);
+ talloc_free(text);
if (f)
fclose(f);
}