ccan_tokenizer: update to be compatible with darray.

author Joey Adams <joeyadams3.14159@gmail.com>

Sat, 19 Feb 2011 10:53:04 +0000 (05:53 -0500)

committer Rusty Russell <rusty@rustcorp.com.au>

Tue, 22 Feb 2011 13:28:36 +0000 (23:58 +1030)
author Joey Adams <joeyadams3.14159@gmail.com>
Sat, 19 Feb 2011 10:53:04 +0000 (05:53 -0500)
committer Rusty Russell <rusty@rustcorp.com.au>
Tue, 22 Feb 2011 13:28:36 +0000 (23:58 +1030)
diff --git a/ccan/ccan_tokenizer/LICENSE b/ccan/ccan_tokenizer/LICENSE

new file mode 120000 (symlink)

index 0000000..2354d12
--- /dev/null
+++ b/ccan/ccan_tokenizer/LICENSE
@@ -0,0 +1 @@
+../../licenses/BSD-MIT
+\ No newline at end of file
diff --git a/ccan/ccan_tokenizer/_info b/ccan/ccan_tokenizer/_info

index 754b3cf680f4cc412b3d1adf36112ba557cf8fbc..4801a13b52f87f10976cacaa5bbde02369969f4d 100644 (file)
--- a/ccan/ccan_tokenizer/_info
+++ b/ccan/ccan_tokenizer/_info
@@ -64,7 +64,7 @@
   *             err(1, "Could not read file %s", argv[1]);
   *
   *     //tokenize the contents
- *     tl = tokenize(file, len, &mq);
+ *     tl = tokenize(file, file, len, &mq);
   *
   *     //print warnings, errors, etc.
   *     while (queue_count(mq)) {
@@ -81,6 +81,8 @@
   *
   *     return 0;
   * }
+ *
+ * License: BSD
   */
  int main(int argc, char *argv[])
  {
@@ -89,7 +91,8 @@ int main(int argc, char *argv[])
                 return 1;
  
         if (strcmp(argv[1], "depends") == 0) {
-               printf("ccan/array\n");
+               printf("ccan/darray\n");
+               printf("ccan/talloc\n");
                 return 0;
         }
  
diff --git a/ccan/ccan_tokenizer/ccan_tokenizer.c b/ccan/ccan_tokenizer/ccan_tokenizer.c

index cb1b2716a71e722f0578d1417e68609d4e9c3c51..47282e93edce84362fbf215314ca04d51200e527 100644 (file)
--- a/ccan/ccan_tokenizer/ccan_tokenizer.c
+++ b/ccan/ccan_tokenizer/ccan_tokenizer.c
@@ -170,18 +170,41 @@ struct tokenizer *tokenizer_new(void *ctx) {
  
  #endif
  
+static int talloc_darray_destructor(void *ptr);
+
+/*
+ * darray(T) *talloc_darray(const void *context);
+ *
+ * Create a new darray anchored in a talloc buffer.
+ * When this pointer is freed, the darray will be freed as well.
+ */
+static void *talloc_darray(const void *context)
+{
+       void *ret = talloc(context, darray(void));
+       darray_init(*(darray(void)*)ret);
+       talloc_set_destructor(ret, talloc_darray_destructor);
+       return ret;
+}
+
+static int talloc_darray_destructor(void *ptr)
+{
+       darray(void) *arr = ptr;
+       free(arr->item);
+       return 0;
+}
+
  #define MESSAGE_PATH "tokenize/"
  
  static void unbreak_backslash_broken_lines(struct token_list *tl, tok_message_queue *mq) {
         const char *s = tl->orig, *e = s+tl->orig_size;
-       array_char txt = array_new(tl);
-       array(const char*) olines = array_new(tl);
-       array(const char*) tlines = array_new(tl);
+       darray_char         *txt    = talloc_darray(tl);
+       darray(const char*) *olines = talloc_darray(tl);
+       darray(const char*) *tlines = talloc_darray(tl);
         
         do {
                 const char *line_start = s, *line_end;
                 const char *lnw; //last non-white
-               size_t start_offset = txt.size;
+               size_t start_offset = txt->size;
                 
                 //scan to the next line and find the last non-white character in the line
                 while (s<e && !creturn(*s)) s++;
@@ -197,35 +220,39 @@ static void unbreak_backslash_broken_lines(struct token_list *tl, tok_message_qu
                 
                 //add the backslash-break-free version of the text
                 if (lnw>line_start && lnw[-1]=='\\' && line_end<e) {
-                       array_append_items(txt, line_start, lnw-1-line_start);
+                       darray_append_items(*txt, line_start, lnw-1-line_start);
                         if (lnw<e && cspace(*lnw)) {
                                 tok_msg_warn(spaces_after_backslash_break, lnw,
                                         "Trailing spaces after backslash-broken line");
                         }
                 } else
-                       array_append_items(txt, line_start, s-line_start);
+                       darray_append_items(*txt, line_start, s-line_start);
                 
                 //add the line starts for this line
-               array_append(olines, line_start);
-               array_append(tlines, (const char*)start_offset);
+               darray_append(*olines, line_start);
+               darray_append(*tlines, (const char*)start_offset);
                         //Since the txt buffer moves when expanded, we're storing offsets
                         //  for now.  Once we're done building txt, we can add the base
                         //  of it to all the offsets to make them pointers.
         } while (s<e);
         
         //stick a null terminator at the end of the text
-       array_realloc(txt, txt.size+1);
-       txt.item[txt.size] = 0;
+       darray_realloc(*txt, txt->size+1);
+       txt->item[txt->size] = 0;
         
         //convert the line start offsets to pointers
-       array_for_t(i, tlines, const char *, *i = txt.item + (size_t)*i);
-       
-       tl->olines = olines.item;
-       tl->olines_size = olines.size;
-       tl->txt = txt.item;
-       tl->txt_size = txt.size;
-       tl->tlines = tlines.item;
-       tl->tlines_size = tlines.size;
+       {
+               const char **i;
+               darray_foreach(i, *tlines)
+                       *i = txt->item + (size_t)(*i);
+       }
+       
+       tl->olines = olines->item;
+       tl->olines_size = olines->size;
+       tl->txt = txt->item;
+       tl->txt_size = txt->size;
+       tl->tlines = tlines->item;
+       tl->tlines_size = tlines->size;
  }
  
  static void normal_keyword(struct token *tok) {
@@ -366,7 +393,7 @@ static void finalize(struct token_list *tl, struct token *start, struct token *e
                 struct token tok = {__VA_ARGS__}; \
                 tok.txt = orig; \
                 tok.txt_size = s-orig; \
-               array_append(array, tok); \
+               darray_append(*arr, tok); \
         } while (0)
  
  #define cstray(c) (ccontrol(c) || cextended(c) || (c)=='@' || (c)=='`' || (c)=='\\')
@@ -379,12 +406,12 @@ static void free_tokenizer_dict(void) {
         talloc_free(tokenizer_dict);
  }
  
-struct token_list *tokenize(const char *orig, size_t orig_size,
+struct token_list *tokenize(const void *tcontext, const char *orig, size_t orig_size,
                                 tok_message_queue *mq) {
-       struct token_list *tl = talloc(orig, struct token_list);
+       struct token_list *tl = talloc(tcontext, struct token_list);
         const char *s, *e;
         size_t stray_count=0, cr_count=0;
-       array(struct token) array = array_new(tl);
+       darray(struct token) *arr = talloc_darray(tl);
         int only_pound_include = 0;
         
         if (!tokenizer_dict) {
@@ -401,7 +428,7 @@ struct token_list *tokenize(const char *orig, size_t orig_size,
         s = tl->txt;
         e = s + tl->txt_size;
         
-       array_appends_t(array, struct token, {
+       darray_appends_t(*arr, struct token, {
                 .type = TOK_STARTLINE,
                 .txt = s,
                 .txt_size = 0
@@ -453,7 +480,7 @@ struct token_list *tokenize(const char *orig, size_t orig_size,
                         s = read_cnumber(&tok, s-1, e, mq);
                         tok.txt = orig;
                         tok.txt_size = s-orig;
-                       array_append(array, tok);
+                       darray_append(*arr, tok);
                         
                 } else if (csymbol(c) || cident(c)) {
                         if (only_pound_include && (c=='"' || c=='<')) { //include string
@@ -475,13 +502,13 @@ struct token_list *tokenize(const char *orig, size_t orig_size,
                                         {.include = include});
                                 
                         } else if (c=='\'' || c=='\"') { //character or string literal
-                               array_char string = array_new(tl);
-                               s = read_cstring(&string, s, e, c, mq);
+                               darray_char *string = talloc_darray(tl);
+                               s = read_cstring(string, s, e, c, mq);
                                 if (s<e) s++; //advance past endquote (if available)
                                 add(.type = c=='\'' ? TOK_CHAR : TOK_STRING,
                                     {.string = string});
                                 
-                               if (c=='\'' && string.size==0) {
+                               if (c=='\'' && string->size==0) {
                                         tok_msg_error(empty_char_constant, orig,
                                                 "Empty character constant");
                                 }
@@ -520,8 +547,8 @@ struct token_list *tokenize(const char *orig, size_t orig_size,
                                                         {.opkw = ent->id});
                                                 if (ent->id == INCLUDE) {
                                                         //hacky way to lex #include string properly
-                                                       struct token *ts = array.item;
-                                                       struct token *tp = ts+array.size-1;
+                                                       struct token *ts = arr->item;
+                                                       struct token *tp = ts+arr->size-1;
                                                         while (tp>ts && token_is_ignored(tp-1))
                                                                 tp--;
                                                         if (tp>ts && token_is_op(tp-1, '#')) {
@@ -563,7 +590,7 @@ struct token_list *tokenize(const char *orig, size_t orig_size,
                         "Text contains non-standard line terminators");
         }
         
-       finalize(tl, array.item, array.item+array.size);
+       finalize(tl, arr->item, arr->item+arr->size);
         
         return tl;
  }
@@ -646,9 +673,9 @@ int tok_point_lookup(struct tok_point *out, const char *ptr,
         }
  }
  
-static char *escape_string(array_char *buf, const char *str, size_t size) {
+static char *escape_string(darray_char *buf, const char *str, size_t size) {
         const char *s = str, *e = s+size;
-       array_from_lit(*buf, "");
+       darray_from_lit(*buf, "");
         
         for (;s<e;s++) {
                 char buffer[8];
@@ -668,7 +695,7 @@ static char *escape_string(array_char *buf, const char *str, size_t size) {
                                 buffer[0] = c;
                                 buffer[1] = 0;
                 }
-               array_append_string(*buf, esc);
+               darray_append_string(*buf, esc);
         }
         
         return buf->item;
@@ -816,7 +843,7 @@ int token_list_sanity_check(const struct token_list *tl, FILE *err) {
                 
                 //Make sure txt and orig match exactly except for backslash line breaks
                 if (!txt_orig_matches(i->txt, i->txt_size, i->orig, i->orig_size)) {
-                       array_char buf = array_new(NULL);
+                       darray_char buf = darray_new();
                         fprintf(err,
                                 "txt and orig do not match:\n"
                                 "\ttxt  = \"%s\"\n",
@@ -824,7 +851,7 @@ int token_list_sanity_check(const struct token_list *tl, FILE *err) {
                         fprintf(err, "\torig = \"%s\"\n",
                                 escape_string(&buf, i->orig, i->orig_size) );
                         
-                       array_free(buf);
+                       darray_free(buf);
                         return 0;
                 }
                 
@@ -961,7 +988,7 @@ static char *sprint_token_flags(char buf[3], struct token_flags flags) {
  
  void token_list_dump(const struct token_list *tl, FILE *f) {
         struct token *tok;
-       array_char buf = array_new(NULL);
+       darray_char buf = darray_new();
         size_t i = 0;
         char buf2[8];
         const char *token_type_str[] = {
@@ -994,7 +1021,7 @@ void token_list_dump(const struct token_list *tl, FILE *f) {
                 #endif
         }
         
-       array_free(buf);
+       darray_free(buf);
  }
  
  void tok_message_print(struct tok_message *m, struct token_list *tl) {
diff --git a/ccan/ccan_tokenizer/ccan_tokenizer.h b/ccan/ccan_tokenizer/ccan_tokenizer.h

index eb541679da5bc832534f271345e31f75d2c5ff2b..9c40ae20973c026f162a8e8fecbadd76c6b12565 100644 (file)
--- a/ccan/ccan_tokenizer/ccan_tokenizer.h
+++ b/ccan/ccan_tokenizer/ccan_tokenizer.h
@@ -28,7 +28,7 @@
  #ifndef CCAN_TOKENIZER_H
  #define CCAN_TOKENIZER_H
  
-#include <ccan/array/array.h>
+#include <ccan/darray/darray.h>
  #include "charflag.h"
  #include "dict.h"
  #include "queue.h"
@@ -180,7 +180,7 @@ struct token {
                 struct tok_integer integer;
                 struct tok_floating floating;
                 int opkw; //operator or keyword ID (e.g. '+', INC_OP (++), ADD_ASSIGN (+=))
-               array_char string; //applies to TOK_CHAR and TOK_STRING
+               darray_char *string; //applies to TOK_CHAR and TOK_STRING
                 char *include; //applies to TOK_STRING_IQUOTE and TOK_STRING_IANGLE
         };
         
@@ -245,8 +245,8 @@ extern struct dict *tokenizer_dict;
  
  typedef queue(struct tok_message) tok_message_queue;
  
-//the token_list is allocated as a child of orig
-struct token_list *tokenize(const char *orig, size_t orig_size, tok_message_queue *mq);
+//the token_list is allocated as a child of tcontext
+struct token_list *tokenize(const void *tcontext, const char *orig, size_t orig_size, tok_message_queue *mq);
  
  size_t token_list_count(const struct token_list *tl);
  
@@ -300,7 +300,7 @@ void tok_message_queue_dump(const tok_message_queue *mq);
  
  /* Miscellaneous internal components */
  
-char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq);
+char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq);
  char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq);
  
  
diff --git a/ccan/ccan_tokenizer/read_cstring.c b/ccan/ccan_tokenizer/read_cstring.c

index 9a62d0cc5b8c02b171e5b8cc1e56906197298355..d1277ca12315425111ad47c217a1b21fc46c87a3 100644 (file)
--- a/ccan/ccan_tokenizer/read_cstring.c
+++ b/ccan/ccan_tokenizer/read_cstring.c
@@ -11,16 +11,16 @@ static char *strdup_rng(const char *s, const char *e) {
  
  //Reads a C string starting at s until quoteChar is found or e is reached
  //  Returns the pointer to the terminating quote character or e if none was found
-char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) {
+char *read_cstring(darray_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) {
         const char * const tokstart = s;
         const char *p;
         int has_endquote=0, has_newlines=0;
         
         //tok_msg_debug(called, s, "Called read_cstring on `%s`", s);
         
-       #define append(startptr,endptr) array_append_items(*out, startptr, (endptr)-(startptr))
-       #define append_char(theChar) array_append(*out, theChar)
-       #define append_zero() do {array_append(*out, 0); out->size--;} while(0)
+       #define append(startptr,endptr) darray_append_items(*out, startptr, (endptr)-(startptr))
+       #define append_char(theChar) darray_append(*out, theChar)
+       #define append_zero() do {darray_append(*out, 0); out->size--;} while(0)
         
         p = s;
         while (p<e) {
diff --git a/ccan/ccan_tokenizer/test/run-simple-token.c b/ccan/ccan_tokenizer/test/run-simple-token.c

index efc394309c562c7d4524147b662482a65fd8e221..7957dba07d4c500e25b957be6b3fa4be01589e91 100644 (file)
--- a/ccan/ccan_tokenizer/test/run-simple-token.c
+++ b/ccan/ccan_tokenizer/test/run-simple-token.c
@@ -49,7 +49,7 @@ static struct token_list *test_tokens(const char *orig, unsigned int size)
         char *string = talloc_strdup(NULL, orig);
         unsigned int i;
  
-       toks = tokenize(string, strlen(string), MQ);
+       toks = tokenize(string, string, strlen(string), MQ);
         ok1(token_list_sanity_check(toks, stdout));
         
         ok1(token_list_count(toks) == strlen(string)/size + 1);
@@ -71,7 +71,7 @@ static struct token_list *test_tokens_spaced(const char *orig,
         char *string = spacify(orig, size);
         unsigned int i;
  
-       toks = tokenize(string, strlen(string), MQ);
+       toks = tokenize(string, string, strlen(string), MQ);
         ok1(token_list_sanity_check(toks, stdout));
         
         ok1(token_list_count(toks) == strlen(orig)/size*2 + 1);
@@ -98,7 +98,7 @@ static struct token_list *test_tokens_backslashed(const char *orig,
         const char *string = backslashify(orig);
         unsigned int i;
  
-       toks = tokenize(string, strlen(string), MQ);
+       toks = tokenize(string, string, strlen(string), MQ);
         ok1(token_list_sanity_check(toks, stdout));
         
         ok1(token_list_count(toks) == strlen(orig)/size + 1);
@@ -173,7 +173,7 @@ int main(void)
  
         /* char literal */
         str = talloc_strdup(NULL, char_token);
-       toks = tokenize(str, strlen(str), MQ);
+       toks = tokenize(str, str, strlen(str), MQ);
         ok1(token_list_sanity_check(toks, stdout));
         ok1(token_list_count(toks) == 2);
         ok1(item(0).type == TOK_STARTLINE);
@@ -187,7 +187,7 @@ int main(void)
  
         /* string literal */
         str = talloc_strdup(NULL, string_token);
-       toks = tokenize(str, strlen(str), MQ);
+       toks = tokenize(str, str, strlen(str), MQ);
         ok1(token_list_sanity_check(toks, stdout));
         ok1(token_list_count(toks) == 2);
         ok1(item(0).type == TOK_STARTLINE);
@@ -201,7 +201,7 @@ int main(void)
  
         /* Identifiers */
         str = talloc_strdup(NULL, ident_tokens);
-       toks = tokenize(str, strlen(str), MQ);
+       toks = tokenize(str, str, strlen(str), MQ);
         ok1(token_list_sanity_check(toks, stdout));
         token_list_dump(toks, stdout);
         ok1(token_list_count(toks) == 10);
@@ -248,7 +248,7 @@ int main(void)
  
         /* Identifiers */
         backslashed_idents = backslashify(ident_tokens);
-       toks = tokenize(backslashed_idents, strlen(backslashed_idents), MQ);
+       toks = tokenize(backslashed_idents, backslashed_idents, strlen(backslashed_idents), MQ);
         ok1(token_list_sanity_check(toks, stdout));
         ok1(token_list_count(toks) == 10);
         ok1(item(0).type == TOK_STARTLINE);
diff --git a/ccan/ccan_tokenizer/test/run.c b/ccan/ccan_tokenizer/test/run.c

index 7ff03da6da83900f000ddaee3536b10d05f4ddda..92fe43c68a67d818bd64133bf681f9af11f66407 100644 (file)
--- a/ccan/ccan_tokenizer/test/run.c
+++ b/ccan/ccan_tokenizer/test/run.c
@@ -41,7 +41,7 @@
  #define array_count_pair(type, ...) (const type []){__VA_ARGS__}, sizeof((const type []){__VA_ARGS__})/sizeof(type)
  
  static void test_read_cstring(void) {
-       #define next() do {array_free(str); array_init(str, NULL); csp++;} while(0)
+       #define next() do {darray_free(str); darray_init(str); csp++;} while(0)
         #define cs (*csp)
         #define verify_quotechar(correct, correct_continuation_offset, quotechar) do { \
                 const size_t s = sizeof(correct)-1; \
@@ -69,7 +69,7 @@ static void test_read_cstring(void) {
         };
         const char * const *csp = cstrings;
         const char *p;
-       array_char str = array_new(NULL);
+       darray_char str = darray_new();
         tok_message_queue mq;
         
         queue_init(mq, NULL);
@@ -107,7 +107,7 @@ static void test_read_cstring(void) {
         //Check a series of hex escapes
         verify("\x50\x35\x12\xEF\xFE\x12\x45", 32);
         
-       array_free(str);
+       darray_free(str);
         
         //tok_message_queue_dump(&mq);
         
@@ -896,7 +896,7 @@ struct tokenizer_test {
  };
  
  #define T(txt, ...) {txt, sizeof(txt)-1, array_count_pair(struct token, __VA_ARGS__)}
-#define string(txt) {.string={.item = (txt), .size = sizeof(txt)-1}}
+#define string(txt) {.string=(darray_char[1]){{.item = (txt), .size = sizeof(txt)-1}}}
  #define opkw(v) {.opkw = (v)}
  #define txt(t) .txt = (t), .txt_size = sizeof(t)-1
  #define integer(...) {.integer={__VA_ARGS__}}
@@ -1226,7 +1226,7 @@ static void test_tokenizer_single(struct tokenizer_test *t, tok_message_queue *m
                 goto done; \
         } while(0)
         
-       tl = tokenize(txt, txt_size, mq);
+       tl = tokenize(txt, txt, txt_size, mq);
         
         if (tl->orig != txt || tl->orig_size != txt_size)
                 failed("tokenize() did not replicate orig/orig_size from arguments");
@@ -1271,10 +1271,10 @@ static void test_tokenizer_single(struct tokenizer_test *t, tok_message_queue *m
                         case TOK_CHAR:
                         case TOK_STRING:
                                 //anything using string
-                               if (tok_gen->string.size != tok_correct->string.size ||
-                                       memcmp(tok_gen->string.item, tok_correct->string.item,
-                                       tok_gen->string.size) ||
-                                       tok_gen->string.item[tok_gen->string.size] != 0 )
+                               if (tok_gen->string->size != tok_correct->string->size ||
+                                       memcmp(tok_gen->string->item, tok_correct->string->item,
+                                       tok_gen->string->size) ||
+                                       tok_gen->string->item[tok_gen->string->size] != 0 )
                                         failed("Token \"%s\": String value incorrect", tok_correct->txt);
                                 break;
                         case TOK_STRING_IQUOTE:
@@ -1309,7 +1309,7 @@ done:
  
  static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) {
         FILE *f = fopen(file_name, "rb");
-       array_char text = array_new(NULL);
+       darray_char *text = talloc_darray(NULL);
         const size_t inc = 1024;
         struct token_list *tl;
         
@@ -1321,10 +1321,10 @@ static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) {
         for (;;) {
                 size_t read_len;
                 
-               array_realloc(text, text.size+inc+1);
-               read_len = fread(text.item+text.size, 1, inc, f);
-               text.size += read_len;
-               text.item[text.size] = 0;
+               darray_realloc(*text, text->size+inc+1);
+               read_len = fread(text->item+text->size, 1, inc, f);
+               text->size += read_len;
+               text->item[text->size] = 0;
                 
                 if (read_len < inc)
                         break;
@@ -1335,7 +1335,7 @@ static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) {
                 goto end;
         }
         
-       tl = tokenize(text.item, text.size, mq);
+       tl = tokenize(text, text->item, text->size, mq);
         tl->filename = file_name;
         
         //printf("File '%s' has %zu tokens\n", file_name, token_list_count(tl));
@@ -1354,7 +1354,7 @@ static void test_tokenizer_file(const char *file_name, tok_message_queue *mq) {
         }*/
         
  end:
-       array_free(text);
+       talloc_free(text);
         if (f)
                 fclose(f);
  }
author	Joey Adams <joeyadams3.14159@gmail.com>
	Sat, 19 Feb 2011 10:53:04 +0000 (05:53 -0500)
committer	Rusty Russell <rusty@rustcorp.com.au>
	Tue, 22 Feb 2011 13:28:36 +0000 (23:58 +1030)
ccan/ccan_tokenizer/LICENSE	[new symlink]	patch \| blob
ccan/ccan_tokenizer/_info		patch \| blob \| history
ccan/ccan_tokenizer/ccan_tokenizer.c		patch \| blob \| history
ccan/ccan_tokenizer/ccan_tokenizer.h		patch \| blob \| history
ccan/ccan_tokenizer/read_cstring.c		patch \| blob \| history
ccan/ccan_tokenizer/test/run-simple-token.c		patch \| blob \| history
ccan/ccan_tokenizer/test/run.c		patch \| blob \| history