From: Joey Adams Date: Sat, 19 Feb 2011 10:53:04 +0000 (-0500) Subject: ccan_tokenizer: update to be compatible with darray. X-Git-Url: https://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=af052b1a93f91b6e0888d5ece29786243d57cf2e ccan_tokenizer: update to be compatible with darray. --- diff --git a/ccan/ccan_tokenizer/LICENSE b/ccan/ccan_tokenizer/LICENSE new file mode 120000 index 00000000..2354d129 --- /dev/null +++ b/ccan/ccan_tokenizer/LICENSE @@ -0,0 +1 @@ +../../licenses/BSD-MIT \ No newline at end of file diff --git a/ccan/ccan_tokenizer/_info b/ccan/ccan_tokenizer/_info index 754b3cf6..4801a13b 100644 --- a/ccan/ccan_tokenizer/_info +++ b/ccan/ccan_tokenizer/_info @@ -64,7 +64,7 @@ * err(1, "Could not read file %s", argv[1]); * * //tokenize the contents - * tl = tokenize(file, len, &mq); + * tl = tokenize(file, file, len, &mq); * * //print warnings, errors, etc. * while (queue_count(mq)) { @@ -81,6 +81,8 @@ * * return 0; * } + * + * License: BSD */ int main(int argc, char *argv[]) { @@ -89,7 +91,8 @@ int main(int argc, char *argv[]) return 1; if (strcmp(argv[1], "depends") == 0) { - printf("ccan/array\n"); + printf("ccan/darray\n"); + printf("ccan/talloc\n"); return 0; } diff --git a/ccan/ccan_tokenizer/ccan_tokenizer.c b/ccan/ccan_tokenizer/ccan_tokenizer.c index cb1b2716..47282e93 100644 --- a/ccan/ccan_tokenizer/ccan_tokenizer.c +++ b/ccan/ccan_tokenizer/ccan_tokenizer.c @@ -170,18 +170,41 @@ struct tokenizer *tokenizer_new(void *ctx) { #endif +static int talloc_darray_destructor(void *ptr); + +/* + * darray(T) *talloc_darray(const void *context); + * + * Create a new darray anchored in a talloc buffer. + * When this pointer is freed, the darray will be freed as well. + */ +static void *talloc_darray(const void *context) +{ + void *ret = talloc(context, darray(void)); + darray_init(*(darray(void)*)ret); + talloc_set_destructor(ret, talloc_darray_destructor); + return ret; +} + +static int talloc_darray_destructor(void *ptr) +{ + darray(void) *arr = ptr; + free(arr->item); + return 0; +} + #define MESSAGE_PATH "tokenize/" static void unbreak_backslash_broken_lines(struct token_list *tl, tok_message_queue *mq) { const char *s = tl->orig, *e = s+tl->orig_size; - array_char txt = array_new(tl); - array(const char*) olines = array_new(tl); - array(const char*) tlines = array_new(tl); + darray_char *txt = talloc_darray(tl); + darray(const char*) *olines = talloc_darray(tl); + darray(const char*) *tlines = talloc_darray(tl); do { const char *line_start = s, *line_end; const char *lnw; //last non-white - size_t start_offset = txt.size; + size_t start_offset = txt->size; //scan to the next line and find the last non-white character in the line while (sline_start && lnw[-1]=='\\' && line_endsize+1); + txt->item[txt->size] = 0; //convert the line start offsets to pointers - array_for_t(i, tlines, const char *, *i = txt.item + (size_t)*i); - - tl->olines = olines.item; - tl->olines_size = olines.size; - tl->txt = txt.item; - tl->txt_size = txt.size; - tl->tlines = tlines.item; - tl->tlines_size = tlines.size; + { + const char **i; + darray_foreach(i, *tlines) + *i = txt->item + (size_t)(*i); + } + + tl->olines = olines->item; + tl->olines_size = olines->size; + tl->txt = txt->item; + tl->txt_size = txt->size; + tl->tlines = tlines->item; + tl->tlines_size = tlines->size; } static void normal_keyword(struct token *tok) { @@ -366,7 +393,7 @@ static void finalize(struct token_list *tl, struct token *start, struct token *e struct token tok = {__VA_ARGS__}; \ tok.txt = orig; \ tok.txt_size = s-orig; \ - array_append(array, tok); \ + darray_append(*arr, tok); \ } while (0) #define cstray(c) (ccontrol(c) || cextended(c) || (c)=='@' || (c)=='`' || (c)=='\\') @@ -379,12 +406,12 @@ static void free_tokenizer_dict(void) { talloc_free(tokenizer_dict); } -struct token_list *tokenize(const char *orig, size_t orig_size, +struct token_list *tokenize(const void *tcontext, const char *orig, size_t orig_size, tok_message_queue *mq) { - struct token_list *tl = talloc(orig, struct token_list); + struct token_list *tl = talloc(tcontext, struct token_list); const char *s, *e; size_t stray_count=0, cr_count=0; - array(struct token) array = array_new(tl); + darray(struct token) *arr = talloc_darray(tl); int only_pound_include = 0; if (!tokenizer_dict) { @@ -401,7 +428,7 @@ struct token_list *tokenize(const char *orig, size_t orig_size, s = tl->txt; e = s + tl->txt_size; - array_appends_t(array, struct token, { + darray_appends_t(*arr, struct token, { .type = TOK_STARTLINE, .txt = s, .txt_size = 0 @@ -453,7 +480,7 @@ struct token_list *tokenize(const char *orig, size_t orig_size, s = read_cnumber(&tok, s-1, e, mq); tok.txt = orig; tok.txt_size = s-orig; - array_append(array, tok); + darray_append(*arr, tok); } else if (csymbol(c) || cident(c)) { if (only_pound_include && (c=='"' || c=='<')) { //include string @@ -475,13 +502,13 @@ struct token_list *tokenize(const char *orig, size_t orig_size, {.include = include}); } else if (c=='\'' || c=='\"') { //character or string literal - array_char string = array_new(tl); - s = read_cstring(&string, s, e, c, mq); + darray_char *string = talloc_darray(tl); + s = read_cstring(string, s, e, c, mq); if (ssize==0) { tok_msg_error(empty_char_constant, orig, "Empty character constant"); } @@ -520,8 +547,8 @@ struct token_list *tokenize(const char *orig, size_t orig_size, {.opkw = ent->id}); if (ent->id == INCLUDE) { //hacky way to lex #include string properly - struct token *ts = array.item; - struct token *tp = ts+array.size-1; + struct token *ts = arr->item; + struct token *tp = ts+arr->size-1; while (tp>ts && token_is_ignored(tp-1)) tp--; if (tp>ts && token_is_op(tp-1, '#')) { @@ -563,7 +590,7 @@ struct token_list *tokenize(const char *orig, size_t orig_size, "Text contains non-standard line terminators"); } - finalize(tl, array.item, array.item+array.size); + finalize(tl, arr->item, arr->item+arr->size); return tl; } @@ -646,9 +673,9 @@ int tok_point_lookup(struct tok_point *out, const char *ptr, } } -static char *escape_string(array_char *buf, const char *str, size_t size) { +static char *escape_string(darray_char *buf, const char *str, size_t size) { const char *s = str, *e = s+size; - array_from_lit(*buf, ""); + darray_from_lit(*buf, ""); for (;sitem; @@ -816,7 +843,7 @@ int token_list_sanity_check(const struct token_list *tl, FILE *err) { //Make sure txt and orig match exactly except for backslash line breaks if (!txt_orig_matches(i->txt, i->txt_size, i->orig, i->orig_size)) { - array_char buf = array_new(NULL); + darray_char buf = darray_new(); fprintf(err, "txt and orig do not match:\n" "\ttxt = \"%s\"\n", @@ -824,7 +851,7 @@ int token_list_sanity_check(const struct token_list *tl, FILE *err) { fprintf(err, "\torig = \"%s\"\n", escape_string(&buf, i->orig, i->orig_size) ); - array_free(buf); + darray_free(buf); return 0; } @@ -961,7 +988,7 @@ static char *sprint_token_flags(char buf[3], struct token_flags flags) { void token_list_dump(const struct token_list *tl, FILE *f) { struct token *tok; - array_char buf = array_new(NULL); + darray_char buf = darray_new(); size_t i = 0; char buf2[8]; const char *token_type_str[] = { @@ -994,7 +1021,7 @@ void token_list_dump(const struct token_list *tl, FILE *f) { #endif } - array_free(buf); + darray_free(buf); } void tok_message_print(struct tok_message *m, struct token_list *tl) { diff --git a/ccan/ccan_tokenizer/ccan_tokenizer.h b/ccan/ccan_tokenizer/ccan_tokenizer.h index eb541679..9c40ae20 100644 --- a/ccan/ccan_tokenizer/ccan_tokenizer.h +++ b/ccan/ccan_tokenizer/ccan_tokenizer.h @@ -28,7 +28,7 @@ #ifndef CCAN_TOKENIZER_H #define CCAN_TOKENIZER_H -#include +#include #include "charflag.h" #include "dict.h" #include "queue.h" @@ -180,7 +180,7 @@ struct token { struct tok_integer integer; struct tok_floating floating; int opkw; //operator or keyword ID (e.g. '+', INC_OP (++), ADD_ASSIGN (+=)) - array_char string; //applies to TOK_CHAR and TOK_STRING + darray_char *string; //applies to TOK_CHAR and TOK_STRING char *include; //applies to TOK_STRING_IQUOTE and TOK_STRING_IANGLE }; @@ -245,8 +245,8 @@ extern struct dict *tokenizer_dict; typedef queue(struct tok_message) tok_message_queue; -//the token_list is allocated as a child of orig -struct token_list *tokenize(const char *orig, size_t orig_size, tok_message_queue *mq); +//the token_list is allocated as a child of tcontext +struct token_list *tokenize(const void *tcontext, const char *orig, size_t orig_size, tok_message_queue *mq); size_t token_list_count(const struct token_list *tl); @@ -300,7 +300,7 @@ void tok_message_queue_dump(const tok_message_queue *mq); /* Miscellaneous internal components */ -char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq); +char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq); char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq); diff --git a/ccan/ccan_tokenizer/read_cstring.c b/ccan/ccan_tokenizer/read_cstring.c index 9a62d0cc..d1277ca1 100644 --- a/ccan/ccan_tokenizer/read_cstring.c +++ b/ccan/ccan_tokenizer/read_cstring.c @@ -11,16 +11,16 @@ static char *strdup_rng(const char *s, const char *e) { //Reads a C string starting at s until quoteChar is found or e is reached // Returns the pointer to the terminating quote character or e if none was found -char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) { +char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) { const char * const tokstart = s; const char *p; int has_endquote=0, has_newlines=0; //tok_msg_debug(called, s, "Called read_cstring on `%s`", s); - #define append(startptr,endptr) array_append_items(*out, startptr, (endptr)-(startptr)) - #define append_char(theChar) array_append(*out, theChar) - #define append_zero() do {array_append(*out, 0); out->size--;} while(0) + #define append(startptr,endptr) darray_append_items(*out, startptr, (endptr)-(startptr)) + #define append_char(theChar) darray_append(*out, theChar) + #define append_zero() do {darray_append(*out, 0); out->size--;} while(0) p = s; while (porig != txt || tl->orig_size != txt_size) failed("tokenize() did not replicate orig/orig_size from arguments"); @@ -1271,10 +1271,10 @@ static void test_tokenizer_single(struct tokenizer_test *t, tok_message_queue *m case TOK_CHAR: case TOK_STRING: //anything using string - if (tok_gen->string.size != tok_correct->string.size || - memcmp(tok_gen->string.item, tok_correct->string.item, - tok_gen->string.size) || - tok_gen->string.item[tok_gen->string.size] != 0 ) + if (tok_gen->string->size != tok_correct->string->size || + memcmp(tok_gen->string->item, tok_correct->string->item, + tok_gen->string->size) || + tok_gen->string->item[tok_gen->string->size] != 0 ) failed("Token \"%s\": String value incorrect", tok_correct->txt); break; case TOK_STRING_IQUOTE: @@ -1309,7 +1309,7 @@ done: static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) { FILE *f = fopen(file_name, "rb"); - array_char text = array_new(NULL); + darray_char *text = talloc_darray(NULL); const size_t inc = 1024; struct token_list *tl; @@ -1321,10 +1321,10 @@ static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) { for (;;) { size_t read_len; - array_realloc(text, text.size+inc+1); - read_len = fread(text.item+text.size, 1, inc, f); - text.size += read_len; - text.item[text.size] = 0; + darray_realloc(*text, text->size+inc+1); + read_len = fread(text->item+text->size, 1, inc, f); + text->size += read_len; + text->item[text->size] = 0; if (read_len < inc) break; @@ -1335,7 +1335,7 @@ static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) { goto end; } - tl = tokenize(text.item, text.size, mq); + tl = tokenize(text, text->item, text->size, mq); tl->filename = file_name; //printf("File '%s' has %zu tokens\n", file_name, token_list_count(tl)); @@ -1354,7 +1354,7 @@ static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) { }*/ end: - array_free(text); + talloc_free(text); if (f) fclose(f); }