1 /* MIT (BSD) license - see LICENSE file for details */
3 #include <ccan/tal/str/str.h>
11 static void add_token(struct token **toks, const char *p, size_t len)
13 size_t n = tal_count(*toks);
14 tal_resize(toks, n+1);
19 static size_t to_eol(const char *p)
21 size_t len = strcspn(p, "\n");
23 /* And any \ continuations. */
24 while (p[len] && p[len-1] == '\\')
25 len += strcspn(p+len+1, "\n") + 1;
29 /* Simplified tokenizer: comments and preproc directives removed,
30 identifiers are a token, others are single char tokens. */
31 static struct token *tokenize(const void *ctx, const char *code)
33 unsigned int i, len, tok_start = -1;
34 bool start_of_line = true;
35 struct token *toks = tal_arr(ctx, struct token, 0);
37 for (i = 0; code[i]; i += len) {
38 if (code[i] == '#' && start_of_line) {
39 /* Preprocessor line. */
40 len = to_eol(code + i);
41 } else if (code[i] == '/' && code[i+1] == '/') {
42 /* One line comment. */
43 len = to_eol(code + i);
44 if (tok_start != -1U) {
45 add_token(&toks, code+tok_start, i - tok_start);
48 } else if (code[i] == '/' && code[i+1] == '*') {
49 /* Multi-line comment. */
50 const char *end = strstr(code+i+2, "*/");
51 len = (end + 2) - (code + i);
53 len = strlen(code + i);
54 if (tok_start != -1U) {
55 add_token(&toks, code+tok_start, i - tok_start);
58 } else if (cisalnum(code[i]) || code[i] == '_') {
59 /* Identifier or part thereof */
63 } else if (!cisspace(code[i])) {
64 /* Punctuation: treat as single char token. */
65 if (tok_start != -1U) {
66 add_token(&toks, code+tok_start, i - tok_start);
69 add_token(&toks, code+i, 1);
73 if (tok_start != -1U) {
74 add_token(&toks, code+tok_start, i - tok_start);
81 else if (!cisspace(code[i]))
82 start_of_line = false;
85 /* Add terminating NULL. */
86 tal_resizez(&toks, tal_count(toks) + 1);
92 const struct token *toks;
93 struct cdump_definitions *defs;
97 static const struct token *tok_peek(const struct token **toks)
99 /* Ignore removed tokens (eg. comments) */
100 while (toks[0]->len == 0) {
108 static bool tok_is(const struct token **toks, const char *target)
110 const struct token *t = tok_peek(toks);
111 return (t && t->len == strlen(target)
112 && memcmp(t->p, target, t->len) == 0);
115 static const struct token *tok_take(const struct token **toks)
117 const struct token *t = tok_peek(toks);
124 static const struct token *tok_take_if(const struct token **toks,
127 if (tok_is(toks, target))
128 return tok_take(toks);
132 static const char *tok_take_ident(const tal_t *ctx, const struct token **toks)
134 const struct token *t = tok_peek(toks);
139 if (strspn(t->p, "_0123456789"
140 "abcdefghijklmnopqrstuvwxyz"
141 "ABCDEFGHIJKLMNOPQRSTUVWXYZ") < t->len)
145 return tal_strndup(ctx, t->p, t->len);
148 static char *string_of_toks(const tal_t *ctx,
149 const struct token *first,
150 const struct token *until)
154 /* Careful to skip erased tokens (eg. comments) */
155 str = p = tal_arr(ctx, char, until->p - first->p + 1);
156 while (first != until) {
157 const struct token *next = first + 1;
160 memcpy(p, first->p, first->len);
162 /* Insert space if they weren't adjacent, unless last */
164 if (first->p + first->len != next->p)
175 static char *tok_take_until(const tal_t *ctx,
176 const struct token **toks,
179 const struct token *t, *start;
181 start = tok_peek(toks);
182 while ((t = tok_peek(toks)) != NULL) {
183 /* If this contains a delimiter, copy up to prev token. */
184 if (strcspn(t->p, delims) < t->len)
185 return string_of_toks(ctx, start, t);
189 /* EOF without finding delimiter */
193 static bool type_defined(const struct cdump_type *t)
198 return (t->u.members != NULL);
200 return (t->u.enum_vals != NULL);
202 /* These shouldn't happen; we don't try to define them. */
211 /* May allocate a new type if not already found (steals @name) */
212 static struct cdump_type *get_type(struct cdump_definitions *defs,
213 enum cdump_type_kind kind,
217 struct cdump_type *t;
235 /* Do we already have it? */
237 t = strmap_get(m, name);
242 t = tal(defs, struct cdump_type);
244 t->name = name ? tal_steal(t, name) : NULL;
245 /* These are actually the same, but be thorough */
247 t->u.enum_vals = NULL;
249 strmap_add(m, t->name, t);
254 static void complain(struct parse_state *ps, const char *complaint)
256 unsigned int linenum;
257 const char *p = ps->code;
259 for (linenum = 1; p < ps->toks[0].p; linenum++) {
260 p = strchr(p+1, '\n');
265 tal_append_fmt(&ps->complaints,
266 "Line %u: '%.*s': %s\n",
267 linenum, (int)ps->toks[0].len,
268 ps->toks[0].p, complaint);
271 static void tok_take_unknown_statement(struct parse_state *ps)
273 complain(ps, "Ignoring unknown statement until next semicolon");
274 tal_free(tok_take_until(NULL, &ps->toks, ";"));
275 tok_take_if(&ps->toks, ";");
278 static bool tok_take_expr(struct parse_state *ps, const char *term)
280 while (!tok_is(&ps->toks, term)) {
281 if (tok_take_if(&ps->toks, "(")) {
282 if (!tok_take_expr(ps, ")"))
284 } else if (tok_take_if(&ps->toks, "[")) {
285 if (!tok_take_expr(ps, "]"))
287 } else if (!tok_take(&ps->toks))
290 return tok_take(&ps->toks);
293 static char *tok_take_expr_str(const tal_t *ctx,
294 struct parse_state *ps,
297 const struct token *start = tok_peek(&ps->toks);
299 if (!tok_take_expr(ps, term))
302 return string_of_toks(ctx, start, ps->toks - 1);
306 static bool tok_take_array(struct parse_state *ps, struct cdump_type **type)
308 /* This will be some arbitrary expression! */
309 struct cdump_type *arr = get_type(ps->defs, CDUMP_ARRAY, NULL);
311 arr->u.arr.size = tok_take_expr_str(arr, ps, "]");
312 if (!arr->u.arr.size) {
313 complain(ps, "Could not find closing array size ]");
317 arr->u.arr.type = *type;
323 static struct cdump_type *ptr_of(struct parse_state *ps,
324 const struct cdump_type *ptr_to)
326 struct cdump_type *ptr = get_type(ps->defs, CDUMP_POINTER, NULL);
331 static bool tok_take_type(struct parse_state *ps, struct cdump_type **type)
334 const struct token *types;
335 enum cdump_type_kind kind;
337 /* Ignoring weird typedefs, only these can be combined. */
339 while (tok_take_if(&ps->toks, "int")
340 || tok_take_if(&ps->toks, "long")
341 || tok_take_if(&ps->toks, "short")
342 || tok_take_if(&ps->toks, "double")
343 || tok_take_if(&ps->toks, "float")
344 || tok_take_if(&ps->toks, "char")
345 || tok_take_if(&ps->toks, "signed")
346 || tok_take_if(&ps->toks, "unsigned"));
348 /* Did we get some? */
349 if (ps->toks != types) {
350 name = string_of_toks(NULL, types, tok_peek(&ps->toks));
351 kind = CDUMP_UNKNOWN;
353 /* Try normal types (or simple typedefs, etc). */
354 if (tok_take_if(&ps->toks, "struct")) {
356 } else if (tok_take_if(&ps->toks, "union")) {
358 } else if (tok_take_if(&ps->toks, "enum")) {
361 kind = CDUMP_UNKNOWN;
363 name = tok_take_ident(ps->defs, &ps->toks);
365 complain(ps, "Invalid typename");
370 *type = get_type(ps->defs, kind, name);
375 static bool tok_maybe_take_cdump_note(const tal_t *ctx,
376 struct parse_state *ps, const char **note)
379 if (tok_take_if(&ps->toks, "CDUMP")) {
380 if (!tok_take_if(&ps->toks, "(")) {
381 complain(ps, "Expected ( after CDUMP");
384 *note = tok_take_expr_str(ctx, ps, ")");
386 complain(ps, "Expected ) after CDUMP(");
393 /* __attribute__((...)) */
394 static bool tok_ignore_attribute(struct parse_state *ps)
396 if (!tok_take_if(&ps->toks, "__attribute__"))
399 if (!tok_take_if(&ps->toks, "(") || !tok_take_if(&ps->toks, "(")) {
400 complain(ps, "Expected (( after __attribute__");
404 if (!tok_take_expr(ps, ")")) {
405 complain(ps, "Expected expression after __attribute__((");
408 if (!tok_take_if(&ps->toks, ")")) {
409 complain(ps, "Expected )) __attribute__((");
415 /* struct|union ... */
416 static bool tok_take_conglom(struct parse_state *ps,
417 enum cdump_type_kind conglom_kind)
419 struct cdump_type *e;
423 assert(conglom_kind == CDUMP_STRUCT || conglom_kind == CDUMP_UNION);
425 name = tok_take_ident(ps->defs, &ps->toks);
427 complain(ps, "Invalid struct/union name");
431 e = get_type(ps->defs, conglom_kind, name);
432 if (type_defined(e)) {
433 complain(ps, "Type already defined");
437 if (!tok_maybe_take_cdump_note(e, ps, &e->note))
440 if (!tok_ignore_attribute(ps))
443 if (!tok_take_if(&ps->toks, "{")) {
444 complain(ps, "Expected { for struct/union");
448 e->u.members = tal_arr(e, struct cdump_member, n = 0);
449 while (!tok_is(&ps->toks, "}")) {
450 struct cdump_type *basetype;
451 const struct token *quals;
452 unsigned int num_quals = 0;
454 if (!tok_ignore_attribute(ps))
457 /* Anything can have these prepended. */
459 while (tok_take_if(&ps->toks, "const")
460 || tok_take_if(&ps->toks, "volatile"))
463 /* eg. "struct foo" or "varint_t" */
464 if (!tok_take_type(ps, &basetype)) {
465 complain(ps, "Expected typename inside struct/union");
470 struct cdump_member *m;
472 tal_resize(&e->u.members, n+1);
473 m = &e->u.members[n++];
477 = string_of_toks(e, quals,
480 m->qualifiers = NULL;
482 /* May have multiple asterisks. */
483 while (tok_take_if(&ps->toks, "*"))
484 m->type = ptr_of(ps, m->type);
486 if (!tok_ignore_attribute(ps))
489 m->name = tok_take_ident(e, &ps->toks);
491 complain(ps, "Expected name for member");
495 /* May be an array. */
496 while (tok_take_if(&ps->toks, "[")) {
497 if (!tok_take_array(ps, &m->type))
502 if (!tok_maybe_take_cdump_note(e->u.members,
506 if (!tok_ignore_attribute(ps))
508 } while (tok_take_if(&ps->toks, ","));
510 if (!tok_take_if(&ps->toks, ";")) {
511 complain(ps, "Expected ; at end of member");
516 if (!tok_take_if(&ps->toks, "}")) {
517 complain(ps, "Expected } at end of struct/union");
521 if (!tok_ignore_attribute(ps))
524 if (!tok_take_if(&ps->toks, ";")) {
525 complain(ps, "Expected ; at end of struct/union");
532 static bool tok_take_enum(struct parse_state *ps)
535 struct cdump_type *e;
538 name = tok_take_ident(ps->defs, &ps->toks);
540 complain(ps, "Expected enum name");
544 e = get_type(ps->defs, CDUMP_ENUM, name);
546 /* Duplicate name? */
547 if (type_defined(e)) {
548 complain(ps, "enum already defined");
553 if (!tok_maybe_take_cdump_note(e, ps, &e->note))
556 if (!tok_ignore_attribute(ps))
559 if (!tok_take_if(&ps->toks, "{")) {
560 complain(ps, "Expected { after enum name");
564 e->u.enum_vals = tal_arr(e, struct cdump_enum_val, n);
566 struct cdump_enum_val *v;
568 /* GCC extension: comma and end of enum */
569 if (tok_is(&ps->toks, "}"))
572 tal_resize(&e->u.enum_vals, n+1);
573 v = &e->u.enum_vals[n++];
575 v->name = tok_take_ident(e, &ps->toks);
577 complain(ps, "Expected enum value name");
582 if (!tok_maybe_take_cdump_note(e->u.enum_vals, ps, &v->note))
585 if (tok_take_if(&ps->toks, "=")) {
586 v->value = tok_take_until(e, &ps->toks, ",}");
588 complain(ps, "Expected , or } to end value");
593 } while (tok_take_if(&ps->toks, ","));
595 if (!tok_take_if(&ps->toks, "}")) {
596 complain(ps, "Expected } at end of enum");
600 if (!tok_ignore_attribute(ps))
603 if (!tok_take_if(&ps->toks, ";")) {
604 complain(ps, "Expected ; at end of enum");
610 static bool gather_undefines(const char *name,
611 struct cdump_type *t,
612 struct cdump_map *undefs)
614 if (!type_defined(t))
615 strmap_add(undefs, name, t);
619 static bool remove_from_map(const char *name,
620 struct cdump_type *t,
621 struct cdump_map *map)
623 strmap_del(map, name, NULL);
627 static void remove_undefined(struct cdump_map *map)
629 struct cdump_map undefs;
631 /* We can't delete inside iterator, so gather all the undefs
632 * then remove them. */
633 strmap_init(&undefs);
635 strmap_iterate(map, gather_undefines, &undefs);
636 strmap_iterate(&undefs, remove_from_map, map);
637 strmap_clear(&undefs);
640 static void destroy_definitions(struct cdump_definitions *defs)
642 strmap_clear(&defs->enums);
643 strmap_clear(&defs->structs);
644 strmap_clear(&defs->unions);
647 /* Simple LL(1) parser, inspired by Tridge's genstruct.pl. */
648 struct cdump_definitions *cdump_extract(const tal_t *ctx, const char *code,
651 struct parse_state ps;
652 const struct token *toks;
654 ps.defs = tal(ctx, struct cdump_definitions);
655 ps.complaints = tal_strdup(ctx, "");
658 strmap_init(&ps.defs->enums);
659 strmap_init(&ps.defs->structs);
660 strmap_init(&ps.defs->unions);
661 tal_add_destructor(ps.defs, destroy_definitions);
663 toks = ps.toks = tokenize(ps.defs, code);
664 while (tok_peek(&ps.toks)) {
665 if (!tok_ignore_attribute(&ps))
667 if (tok_take_if(&ps.toks, "struct")) {
668 if (!tok_take_conglom(&ps, CDUMP_STRUCT))
670 } else if (tok_take_if(&ps.toks, "union")) {
671 if (!tok_take_conglom(&ps, CDUMP_UNION))
673 } else if (tok_take_if(&ps.toks, "enum")) {
674 if (!tok_take_enum(&ps))
677 tok_take_unknown_statement(&ps);
680 /* Now, remove any undefined types! */
681 remove_undefined(&ps.defs->enums);
682 remove_undefined(&ps.defs->structs);
683 remove_undefined(&ps.defs->unions);
687 if (streq(ps.complaints, ""))
688 ps.complaints = tal_free(ps.complaints);
691 *complaints = ps.complaints;
693 tal_free(ps.complaints);
697 ps.defs = tal_free(ps.defs);