]> git.ozlabs.org Git - ccan/blobdiff - ccan/cdump/cdump.c
cdump: new module.
[ccan] / ccan / cdump / cdump.c
diff --git a/ccan/cdump/cdump.c b/ccan/cdump/cdump.c
new file mode 100644 (file)
index 0000000..20cdb4e
--- /dev/null
@@ -0,0 +1,543 @@
+/* MIT (BSD) license - see LICENSE file for details */
+#include "cdump.h"
+#include <ccan/tal/str/str.h>
+#include <assert.h>
+
+struct token {
+       const char *p;
+       size_t len;
+};
+
+static void add_token(struct token **toks, const char *p, size_t len)
+{
+       size_t n = tal_count(*toks);
+       tal_resize(toks, n+1);
+       (*toks)[n].p = p;
+       (*toks)[n].len = len;
+}
+
+/* Simplified tokenizer: comments and preproc directives removed,
+   identifiers are a token, others are single char tokens. */
+static struct token *tokenize(const void *ctx, const char *code)
+{
+       unsigned int i, len, tok_start = -1;
+       bool start_of_line = true;
+       struct token *toks = tal_arr(ctx, struct token, 0);
+
+       for (i = 0; code[i]; i += len) {
+               if (code[i] == '#' && start_of_line) {
+                       /* Preprocessor line. */
+                       len = strcspn(code+i, "\n");
+               } else if (code[i] == '/' && code[i+1] == '/') {
+                       /* One line comment. */
+                       len = strcspn(code+i, "\n");
+                       if (tok_start != -1U) {
+                               add_token(&toks, code+tok_start, i - tok_start);
+                               tok_start = -1U;
+                       }
+               } else if (code[i] == '/' && code[i+1] == '*') {
+                       /* Multi-line comment. */
+                       const char *end = strstr(code+i+2, "*/");
+                       len = (end + 2) - (code + i);
+                       if (!end)
+                               len = strlen(code + i);
+                       if (tok_start != -1U) {
+                               add_token(&toks, code+tok_start, i - tok_start);
+                               tok_start = -1U;
+                       }
+               } else if (cisalnum(code[i]) || code[i] == '_') {
+                       /* Identifier or part thereof */
+                       if (tok_start == -1U)
+                               tok_start = i;
+                       len = 1;
+               } else if (!cisspace(code[i])) {
+                       /* Punctuation: treat as single char token. */
+                       if (tok_start != -1U) {
+                               add_token(&toks, code+tok_start, i - tok_start);
+                               tok_start = -1U;
+                       }
+                       add_token(&toks, code+i, 1);
+                       len = 1;
+               } else {
+                       /* Whitespace. */
+                       if (tok_start != -1U) {
+                               add_token(&toks, code+tok_start, i - tok_start);
+                               tok_start = -1U;
+                       }
+                       len = 1;
+               }
+               if (code[i] == '\n')
+                       start_of_line = true;
+               else if (!cisspace(code[i]))
+                       start_of_line = false;
+       }
+
+       /* Add terminating NULL. */
+       tal_resizez(&toks, tal_count(toks) + 1);
+       return toks;
+}
+
+struct parse_state {
+       const char *code;
+       const struct token *toks;
+       struct cdump_definitions *defs;
+       char *complaints;
+};
+
+static bool tok_is(const struct token **toks, const char *target)
+{
+       return (*toks)->p && (*toks)->len == strlen(target)
+               && memcmp((*toks)->p, target, (*toks)->len) == 0;
+}
+
+static const struct token *tok_peek(const struct token **toks)
+{
+       if (toks[0]->p)
+               return toks[0];
+       return NULL;
+}
+
+static const struct token *tok_take(const struct token **toks)
+{
+       if (!toks[0]->p)
+               return NULL;
+
+       return (*toks)++;
+}
+
+static const struct token *tok_take_if(const struct token **toks,
+                                      const char *target)
+{
+       if (tok_is(toks, target))
+               return tok_take(toks);
+       return NULL;
+}
+
+static const char *tok_take_ident(const tal_t *ctx, const struct token **toks)
+{
+       const struct token *t = tok_peek(toks);
+
+       if (!t)
+               return NULL;
+
+       if (strspn(t->p, "_0123456789"
+                  "abcdefghijklmnopqrstuvwxyz"
+                  "ABCDEFGHIJKLMNOPQRSTUVWXYZ") < t->len)
+               return NULL;
+
+       t = tok_take(toks);
+       return tal_strndup(ctx, t->p, t->len);
+}
+
+static char *string_of_toks(const tal_t *ctx,
+                           const struct token *first,
+                           const struct token *until)
+{
+       const struct token *end = until - 1;
+       return tal_strndup(ctx, first->p, end->p - first->p + end->len);
+}
+
+static char *tok_take_until(const tal_t *ctx,
+                           const struct token **toks,
+                           const char *delims)
+{
+       const struct token *t, *start;
+
+       start = tok_peek(toks);
+       while ((t = tok_peek(toks)) != NULL) {
+               /* If this contains a delimiter, copy up to prev token. */
+               if (strcspn(t->p, delims) < t->len)
+                       return string_of_toks(ctx, start, t);
+               tok_take(toks);
+       };
+
+       /* EOF without finding delimiter */
+       return NULL;
+}
+
+static bool type_defined(const struct cdump_type *t)
+{
+       switch (t->kind) {
+       case CDUMP_STRUCT:
+       case CDUMP_UNION:
+               return (t->u.members != NULL);
+       case CDUMP_ENUM:
+               return (t->u.enum_vals != NULL);
+
+       /* These shouldn't happen; we don't try to define them. */
+       case CDUMP_UNKNOWN:
+       case CDUMP_ARRAY:
+       case CDUMP_POINTER:
+               break;
+       }
+       abort();
+}
+
+/* May allocate a new type if not already found (steals @name) */
+static struct cdump_type *get_type(struct cdump_definitions *defs,
+                                  enum cdump_type_kind kind,
+                                  const char *name)
+{
+       struct cdump_map *m;
+       struct cdump_type *t;
+
+       switch (kind) {
+       case CDUMP_STRUCT:
+               m = &defs->structs;
+               break;
+       case CDUMP_UNION:
+               m = &defs->unions;
+               break;
+       case CDUMP_ENUM:
+               m = &defs->enums;
+               break;
+       case CDUMP_UNKNOWN:
+       case CDUMP_ARRAY:
+       case CDUMP_POINTER:
+               m = NULL;
+       }
+
+       /* Do we already have it? */
+       if (m) {
+               t = strmap_get(m, name);
+               if (t)
+                       return t;
+       }
+
+       t = tal(defs, struct cdump_type);
+       t->kind = kind;
+       t->name = name ? tal_steal(t, name) : NULL;
+       /* These are actually the same, but be thorough */
+       t->u.members = NULL;
+       t->u.enum_vals = NULL;
+       if (m)
+               strmap_add(m, t->name, t);
+
+       return t;
+}
+
+static void complain(struct parse_state *ps, const char *complaint)
+{
+       unsigned int linenum;
+       const char *p = ps->code;
+
+       for (linenum = 1; p < ps->toks[0].p; linenum++) {
+               p = strchr(p+1, '\n');
+               if (!p)
+                       break;
+       }
+
+       tal_append_fmt(&ps->complaints,
+                      "Line %u: '%.*s': %s\n",
+                      linenum, (int)ps->toks[0].len,
+                      ps->toks[0].p, complaint);
+}
+
+static void tok_take_unknown_statement(struct parse_state *ps)
+{
+       complain(ps, "Ignoring unknown statement until next semicolon");
+       tal_free(tok_take_until(NULL, &ps->toks, ";"));
+       tok_take_if(&ps->toks, ";");
+}
+
+/* [ ... */
+static bool tok_take_array(struct parse_state *ps, struct cdump_type **type)
+{
+       /* This will be some arbitrary expression! */
+       struct cdump_type *arr = get_type(ps->defs, CDUMP_ARRAY, NULL);
+
+       arr->u.arr.size = tok_take_until(arr, &ps->toks, "]");
+       if (!arr->u.arr.size) {
+               complain(ps, "Could not find closing array size ]");
+               return false;
+       }
+
+       arr->u.arr.type = *type;
+       *type = arr;
+
+       /* Swallow ] */
+       tok_take(&ps->toks);
+       return true;
+}
+
+static struct cdump_type *ptr_of(struct parse_state *ps,
+                                const struct cdump_type *ptr_to)
+{
+       struct cdump_type *ptr = get_type(ps->defs, CDUMP_POINTER, NULL);
+       ptr->u.ptr = ptr_to;
+       return ptr;
+}
+
+static bool tok_take_type(struct parse_state *ps, struct cdump_type **type)
+{
+       const char *name;
+       const struct token *types;
+       enum cdump_type_kind kind;
+
+       /* Ignoring weird typedefs, only these can be combined. */
+       types = ps->toks;
+       while (tok_take_if(&ps->toks, "int")
+              || tok_take_if(&ps->toks, "long")
+              || tok_take_if(&ps->toks, "short")
+              || tok_take_if(&ps->toks, "double")
+              || tok_take_if(&ps->toks, "float")
+              || tok_take_if(&ps->toks, "char")
+              || tok_take_if(&ps->toks, "signed")
+              || tok_take_if(&ps->toks, "unsigned"));
+
+       /* Did we get some? */
+       if (ps->toks != types) {
+               name = string_of_toks(NULL, types, tok_peek(&ps->toks));
+               kind = CDUMP_UNKNOWN;
+       } else {
+               /* Try normal types (or simple typedefs, etc). */
+               if (tok_take_if(&ps->toks, "struct")) {
+                       kind = CDUMP_STRUCT;
+               } else if (tok_take_if(&ps->toks, "union")) {
+                       kind = CDUMP_UNION;
+               } else if (tok_take_if(&ps->toks, "enum")) {
+                       kind = CDUMP_ENUM;
+               } else
+                       kind = CDUMP_UNKNOWN;
+
+               name = tok_take_ident(ps->defs, &ps->toks);
+               if (!name) {
+                       complain(ps, "Invalid typename");
+                       return false;
+               }
+       }
+
+       *type = get_type(ps->defs, kind, name);
+       return true;
+}
+
+/* struct|union ... */
+static bool tok_take_conglom(struct parse_state *ps,
+                            enum cdump_type_kind conglom_kind)
+{
+       struct cdump_type *e;
+       const char *name;
+       size_t n;
+
+       assert(conglom_kind == CDUMP_STRUCT || conglom_kind == CDUMP_UNION);
+
+       name = tok_take_ident(ps->defs, &ps->toks);
+       if (!name) {
+               complain(ps, "Invalid struct/union name");
+               return false;
+       }
+
+       e = get_type(ps->defs, conglom_kind, name);
+       if (type_defined(e)) {
+               complain(ps, "Type already defined");
+               return false;
+       }
+
+       if (!tok_take_if(&ps->toks, "{")) {
+               complain(ps, "Expected { for struct/union");
+               return false;
+       }
+
+       e->u.members = tal_arr(e, struct cdump_member, n = 0);
+       while (!tok_is(&ps->toks, "}")) {
+               struct cdump_type *basetype;
+               const struct token *quals;
+               unsigned int num_quals = 0;
+
+               /* Anything can have these prepended. */
+               quals = ps->toks;
+               while (tok_take_if(&ps->toks, "const")
+                      || tok_take_if(&ps->toks, "volatile"))
+                       num_quals++;
+
+               /* eg. "struct foo" or "varint_t" */
+               if (!tok_take_type(ps, &basetype)) {
+                       complain(ps, "Expected typename inside struct/union");
+                       return false;
+               }
+
+               do {
+                       struct cdump_member *m;
+
+                       tal_resize(&e->u.members, n+1);
+                       m = &e->u.members[n++];
+                       m->type = basetype;
+                       if (num_quals) {
+                               m->qualifiers
+                                       = string_of_toks(e, quals,
+                                                        quals + num_quals);
+                       } else
+                               m->qualifiers = NULL;
+
+                       /* May have multiple asterisks. */
+                       while (tok_take_if(&ps->toks, "*"))
+                               m->type = ptr_of(ps, m->type);
+
+                       m->name = tok_take_ident(e, &ps->toks);
+                       if (!m->name) {
+                               complain(ps, "Expected name for member");
+                               return false;
+                       }
+
+                       /* May be an array. */
+                       while (tok_take_if(&ps->toks, "[")) {
+                               if (!tok_take_array(ps, &m->type))
+                                       return false;
+                       }
+               } while (tok_take_if(&ps->toks, ","));
+
+               if (!tok_take_if(&ps->toks, ";")) {
+                       complain(ps, "Expected ; at end of member");
+                       return false;
+               }
+       }
+
+       if (tok_take_if(&ps->toks, "}") && tok_take_if(&ps->toks, ";"))
+               return true;
+       complain(ps, "Expected }; at end of struct/union");
+       return false;
+}
+
+/* enum ... */
+static bool tok_take_enum(struct parse_state *ps)
+{
+       size_t n = 0;
+       struct cdump_type *e;
+       const char *name;
+
+       name = tok_take_ident(ps->defs, &ps->toks);
+       if (!name) {
+               complain(ps, "Expected enum name");
+               return false;
+       }
+
+       e = get_type(ps->defs, CDUMP_ENUM, name);
+
+       /* Duplicate name? */
+       if (type_defined(e)) {
+               complain(ps, "enum already defined");
+               return false;
+       }
+
+       if (!tok_take_if(&ps->toks, "{")) {
+               complain(ps, "Expected { after enum name");
+               return false;
+       }
+
+       e->u.enum_vals = tal_arr(e, struct cdump_enum_val, n);
+       do {
+               struct cdump_enum_val *v;
+
+               tal_resize(&e->u.enum_vals, n+1);
+               v = &e->u.enum_vals[n++];
+
+               v->name = tok_take_ident(e, &ps->toks);
+               if (!v->name) {
+                       complain(ps, "Expected enum value name");
+                       return false;
+               }
+               if (tok_take_if(&ps->toks, "=")) {
+                       v->value = tok_take_until(e, &ps->toks, ",}");
+                       if (!v->value) {
+                               complain(ps, "Expected , or } to end value");
+                               return false;
+                       }
+               } else
+                       v->value = NULL;
+       } while (tok_take_if(&ps->toks, ","));
+
+       if (tok_take_if(&ps->toks, "}") && tok_take_if(&ps->toks, ";"))
+               return true;
+
+       complain(ps, "Expected }; at end of enum");
+       return false;
+}
+
+static bool gather_undefines(const char *name,
+                            struct cdump_type *t,
+                            struct cdump_map *undefs)
+{
+       if (!type_defined(t))
+               strmap_add(undefs, name, t);
+       return true;
+}
+
+static bool remove_from_map(const char *name,
+                           struct cdump_type *t,
+                           struct cdump_map *map)
+{
+       strmap_del(map, name, NULL);
+       return true;
+}
+
+static void remove_undefined(struct cdump_map *map)
+{
+       struct cdump_map undefs;
+
+       /* We can't delete inside iterator, so gather all the undefs
+        * then remove them. */
+       strmap_init(&undefs);
+
+       strmap_iterate(map, gather_undefines, &undefs);
+       strmap_iterate(&undefs, remove_from_map, map);
+       strmap_clear(&undefs);
+}
+
+static void destroy_definitions(struct cdump_definitions *defs)
+{
+       strmap_clear(&defs->enums);
+       strmap_clear(&defs->structs);
+       strmap_clear(&defs->unions);
+}
+
+/* Simple LL(1) parser, inspired by Tridge's genstruct.pl. */
+struct cdump_definitions *cdump_extract(const tal_t *ctx, const char *code,
+                                       char **complaints)
+{
+       struct parse_state ps;
+       const struct token *toks;
+
+       ps.defs = tal(ctx, struct cdump_definitions);
+       ps.complaints = tal_strdup(ctx, "");
+       ps.code = code;
+
+       strmap_init(&ps.defs->enums);
+       strmap_init(&ps.defs->structs);
+       strmap_init(&ps.defs->unions);
+       tal_add_destructor(ps.defs, destroy_definitions);
+
+       toks = ps.toks = tokenize(ps.defs, code);
+       while (tok_peek(&ps.toks)) {
+               if (tok_take_if(&ps.toks, "struct")) {
+                       if (!tok_take_conglom(&ps, CDUMP_STRUCT))
+                               goto fail;
+               } else if (tok_take_if(&ps.toks, "union")) {
+                       if (!tok_take_conglom(&ps, CDUMP_UNION))
+                               goto fail;
+               } else if (tok_take_if(&ps.toks, "enum")) {
+                       if (!tok_take_enum(&ps))
+                               goto fail;
+               } else
+                       tok_take_unknown_statement(&ps);
+       }
+
+       /* Now, remove any undefined types! */
+       remove_undefined(&ps.defs->enums);
+       remove_undefined(&ps.defs->structs);
+       remove_undefined(&ps.defs->unions);
+       tal_free(toks);
+
+out:
+       if (streq(ps.complaints, ""))
+               ps.complaints = tal_free(ps.complaints);
+
+       if (complaints)
+               *complaints = ps.complaints;
+       else
+               tal_free(ps.complaints);
+       return ps.defs;
+
+fail:
+       ps.defs = tal_free(ps.defs);
+       goto out;
+}