1 /* MIT (BSD) license - see LICENSE file for details */
3 #include <ccan/tal/str/str.h>
11 static void add_token(struct token **toks, const char *p, size_t len)
13 size_t n = tal_count(*toks);
14 tal_resize(toks, n+1);
19 /* Simplified tokenizer: comments and preproc directives removed,
20 identifiers are a token, others are single char tokens. */
21 static struct token *tokenize(const void *ctx, const char *code)
23 unsigned int i, len, tok_start = -1;
24 bool start_of_line = true;
25 struct token *toks = tal_arr(ctx, struct token, 0);
27 for (i = 0; code[i]; i += len) {
28 if (code[i] == '#' && start_of_line) {
29 /* Preprocessor line. */
30 len = strcspn(code+i, "\n");
31 } else if (code[i] == '/' && code[i+1] == '/') {
32 /* One line comment. */
33 len = strcspn(code+i, "\n");
34 if (tok_start != -1U) {
35 add_token(&toks, code+tok_start, i - tok_start);
38 } else if (code[i] == '/' && code[i+1] == '*') {
39 /* Multi-line comment. */
40 const char *end = strstr(code+i+2, "*/");
41 len = (end + 2) - (code + i);
43 len = strlen(code + i);
44 if (tok_start != -1U) {
45 add_token(&toks, code+tok_start, i - tok_start);
48 } else if (cisalnum(code[i]) || code[i] == '_') {
49 /* Identifier or part thereof */
53 } else if (!cisspace(code[i])) {
54 /* Punctuation: treat as single char token. */
55 if (tok_start != -1U) {
56 add_token(&toks, code+tok_start, i - tok_start);
59 add_token(&toks, code+i, 1);
63 if (tok_start != -1U) {
64 add_token(&toks, code+tok_start, i - tok_start);
71 else if (!cisspace(code[i]))
72 start_of_line = false;
75 /* Add terminating NULL. */
76 tal_resizez(&toks, tal_count(toks) + 1);
82 const struct token *toks;
83 struct cdump_definitions *defs;
87 static const struct token *tok_peek(const struct token **toks)
89 /* Ignore removed tokens (eg. comments) */
90 while (toks[0]->len == 0) {
98 static bool tok_is(const struct token **toks, const char *target)
100 const struct token *t = tok_peek(toks);
101 return (t && t->len == strlen(target)
102 && memcmp(t->p, target, t->len) == 0);
105 static const struct token *tok_take(const struct token **toks)
107 const struct token *t = tok_peek(toks);
114 static const struct token *tok_take_if(const struct token **toks,
117 if (tok_is(toks, target))
118 return tok_take(toks);
122 static const char *tok_take_ident(const tal_t *ctx, const struct token **toks)
124 const struct token *t = tok_peek(toks);
129 if (strspn(t->p, "_0123456789"
130 "abcdefghijklmnopqrstuvwxyz"
131 "ABCDEFGHIJKLMNOPQRSTUVWXYZ") < t->len)
135 return tal_strndup(ctx, t->p, t->len);
138 static char *string_of_toks(const tal_t *ctx,
139 const struct token *first,
140 const struct token *until)
144 /* Careful to skip erased tokens (eg. comments) */
145 str = p = tal_arr(ctx, char, until->p - first->p + 1);
146 while (first != until) {
147 const struct token *next = first + 1;
150 memcpy(p, first->p, first->len);
152 /* Insert space if they weren't adjacent, unless last */
154 if (first->p + first->len != next->p)
165 static char *tok_take_until(const tal_t *ctx,
166 const struct token **toks,
169 const struct token *t, *start;
171 start = tok_peek(toks);
172 while ((t = tok_peek(toks)) != NULL) {
173 /* If this contains a delimiter, copy up to prev token. */
174 if (strcspn(t->p, delims) < t->len)
175 return string_of_toks(ctx, start, t);
179 /* EOF without finding delimiter */
183 static bool type_defined(const struct cdump_type *t)
188 return (t->u.members != NULL);
190 return (t->u.enum_vals != NULL);
192 /* These shouldn't happen; we don't try to define them. */
201 /* May allocate a new type if not already found (steals @name) */
202 static struct cdump_type *get_type(struct cdump_definitions *defs,
203 enum cdump_type_kind kind,
207 struct cdump_type *t;
225 /* Do we already have it? */
227 t = strmap_get(m, name);
232 t = tal(defs, struct cdump_type);
234 t->name = name ? tal_steal(t, name) : NULL;
235 /* These are actually the same, but be thorough */
237 t->u.enum_vals = NULL;
239 strmap_add(m, t->name, t);
244 static void complain(struct parse_state *ps, const char *complaint)
246 unsigned int linenum;
247 const char *p = ps->code;
249 for (linenum = 1; p < ps->toks[0].p; linenum++) {
250 p = strchr(p+1, '\n');
255 tal_append_fmt(&ps->complaints,
256 "Line %u: '%.*s': %s\n",
257 linenum, (int)ps->toks[0].len,
258 ps->toks[0].p, complaint);
261 static void tok_take_unknown_statement(struct parse_state *ps)
263 complain(ps, "Ignoring unknown statement until next semicolon");
264 tal_free(tok_take_until(NULL, &ps->toks, ";"));
265 tok_take_if(&ps->toks, ";");
268 static bool tok_take_expr(struct parse_state *ps, const char *term)
270 while (!tok_is(&ps->toks, term)) {
271 if (tok_take_if(&ps->toks, "(")) {
272 if (!tok_take_expr(ps, ")"))
274 } else if (tok_take_if(&ps->toks, "[")) {
275 if (!tok_take_expr(ps, "]"))
277 } else if (!tok_take(&ps->toks))
280 return tok_take(&ps->toks);
284 static bool tok_take_array(struct parse_state *ps, struct cdump_type **type)
286 /* This will be some arbitrary expression! */
287 struct cdump_type *arr = get_type(ps->defs, CDUMP_ARRAY, NULL);
288 const struct token *start = tok_peek(&ps->toks);
290 if (!tok_take_expr(ps, "]")) {
291 complain(ps, "Could not find closing array size ]");
295 arr->u.arr.size = string_of_toks(arr, start, ps->toks - 1);
296 arr->u.arr.type = *type;
302 static struct cdump_type *ptr_of(struct parse_state *ps,
303 const struct cdump_type *ptr_to)
305 struct cdump_type *ptr = get_type(ps->defs, CDUMP_POINTER, NULL);
310 static bool tok_take_type(struct parse_state *ps, struct cdump_type **type)
313 const struct token *types;
314 enum cdump_type_kind kind;
316 /* Ignoring weird typedefs, only these can be combined. */
318 while (tok_take_if(&ps->toks, "int")
319 || tok_take_if(&ps->toks, "long")
320 || tok_take_if(&ps->toks, "short")
321 || tok_take_if(&ps->toks, "double")
322 || tok_take_if(&ps->toks, "float")
323 || tok_take_if(&ps->toks, "char")
324 || tok_take_if(&ps->toks, "signed")
325 || tok_take_if(&ps->toks, "unsigned"));
327 /* Did we get some? */
328 if (ps->toks != types) {
329 name = string_of_toks(NULL, types, tok_peek(&ps->toks));
330 kind = CDUMP_UNKNOWN;
332 /* Try normal types (or simple typedefs, etc). */
333 if (tok_take_if(&ps->toks, "struct")) {
335 } else if (tok_take_if(&ps->toks, "union")) {
337 } else if (tok_take_if(&ps->toks, "enum")) {
340 kind = CDUMP_UNKNOWN;
342 name = tok_take_ident(ps->defs, &ps->toks);
344 complain(ps, "Invalid typename");
349 *type = get_type(ps->defs, kind, name);
353 /* struct|union ... */
354 static bool tok_take_conglom(struct parse_state *ps,
355 enum cdump_type_kind conglom_kind)
357 struct cdump_type *e;
361 assert(conglom_kind == CDUMP_STRUCT || conglom_kind == CDUMP_UNION);
363 name = tok_take_ident(ps->defs, &ps->toks);
365 complain(ps, "Invalid struct/union name");
369 e = get_type(ps->defs, conglom_kind, name);
370 if (type_defined(e)) {
371 complain(ps, "Type already defined");
375 if (!tok_take_if(&ps->toks, "{")) {
376 complain(ps, "Expected { for struct/union");
380 e->u.members = tal_arr(e, struct cdump_member, n = 0);
381 while (!tok_is(&ps->toks, "}")) {
382 struct cdump_type *basetype;
383 const struct token *quals;
384 unsigned int num_quals = 0;
386 /* Anything can have these prepended. */
388 while (tok_take_if(&ps->toks, "const")
389 || tok_take_if(&ps->toks, "volatile"))
392 /* eg. "struct foo" or "varint_t" */
393 if (!tok_take_type(ps, &basetype)) {
394 complain(ps, "Expected typename inside struct/union");
399 struct cdump_member *m;
401 tal_resize(&e->u.members, n+1);
402 m = &e->u.members[n++];
406 = string_of_toks(e, quals,
409 m->qualifiers = NULL;
411 /* May have multiple asterisks. */
412 while (tok_take_if(&ps->toks, "*"))
413 m->type = ptr_of(ps, m->type);
415 m->name = tok_take_ident(e, &ps->toks);
417 complain(ps, "Expected name for member");
421 /* May be an array. */
422 while (tok_take_if(&ps->toks, "[")) {
423 if (!tok_take_array(ps, &m->type))
426 } while (tok_take_if(&ps->toks, ","));
428 if (!tok_take_if(&ps->toks, ";")) {
429 complain(ps, "Expected ; at end of member");
434 if (tok_take_if(&ps->toks, "}") && tok_take_if(&ps->toks, ";"))
436 complain(ps, "Expected }; at end of struct/union");
441 static bool tok_take_enum(struct parse_state *ps)
444 struct cdump_type *e;
447 name = tok_take_ident(ps->defs, &ps->toks);
449 complain(ps, "Expected enum name");
453 e = get_type(ps->defs, CDUMP_ENUM, name);
455 /* Duplicate name? */
456 if (type_defined(e)) {
457 complain(ps, "enum already defined");
461 if (!tok_take_if(&ps->toks, "{")) {
462 complain(ps, "Expected { after enum name");
466 e->u.enum_vals = tal_arr(e, struct cdump_enum_val, n);
468 struct cdump_enum_val *v;
470 /* GCC extension: comma and end of enum */
471 if (tok_is(&ps->toks, "}"))
474 tal_resize(&e->u.enum_vals, n+1);
475 v = &e->u.enum_vals[n++];
477 v->name = tok_take_ident(e, &ps->toks);
479 complain(ps, "Expected enum value name");
482 if (tok_take_if(&ps->toks, "=")) {
483 v->value = tok_take_until(e, &ps->toks, ",}");
485 complain(ps, "Expected , or } to end value");
490 } while (tok_take_if(&ps->toks, ","));
492 if (tok_take_if(&ps->toks, "}") && tok_take_if(&ps->toks, ";"))
495 complain(ps, "Expected }; at end of enum");
499 static bool gather_undefines(const char *name,
500 struct cdump_type *t,
501 struct cdump_map *undefs)
503 if (!type_defined(t))
504 strmap_add(undefs, name, t);
508 static bool remove_from_map(const char *name,
509 struct cdump_type *t,
510 struct cdump_map *map)
512 strmap_del(map, name, NULL);
516 static void remove_undefined(struct cdump_map *map)
518 struct cdump_map undefs;
520 /* We can't delete inside iterator, so gather all the undefs
521 * then remove them. */
522 strmap_init(&undefs);
524 strmap_iterate(map, gather_undefines, &undefs);
525 strmap_iterate(&undefs, remove_from_map, map);
526 strmap_clear(&undefs);
529 static void destroy_definitions(struct cdump_definitions *defs)
531 strmap_clear(&defs->enums);
532 strmap_clear(&defs->structs);
533 strmap_clear(&defs->unions);
536 /* Simple LL(1) parser, inspired by Tridge's genstruct.pl. */
537 struct cdump_definitions *cdump_extract(const tal_t *ctx, const char *code,
540 struct parse_state ps;
541 const struct token *toks;
543 ps.defs = tal(ctx, struct cdump_definitions);
544 ps.complaints = tal_strdup(ctx, "");
547 strmap_init(&ps.defs->enums);
548 strmap_init(&ps.defs->structs);
549 strmap_init(&ps.defs->unions);
550 tal_add_destructor(ps.defs, destroy_definitions);
552 toks = ps.toks = tokenize(ps.defs, code);
553 while (tok_peek(&ps.toks)) {
554 if (tok_take_if(&ps.toks, "struct")) {
555 if (!tok_take_conglom(&ps, CDUMP_STRUCT))
557 } else if (tok_take_if(&ps.toks, "union")) {
558 if (!tok_take_conglom(&ps, CDUMP_UNION))
560 } else if (tok_take_if(&ps.toks, "enum")) {
561 if (!tok_take_enum(&ps))
564 tok_take_unknown_statement(&ps);
567 /* Now, remove any undefined types! */
568 remove_undefined(&ps.defs->enums);
569 remove_undefined(&ps.defs->structs);
570 remove_undefined(&ps.defs->unions);
574 if (streq(ps.complaints, ""))
575 ps.complaints = tal_free(ps.complaints);
578 *complaints = ps.complaints;
580 tal_free(ps.complaints);
584 ps.defs = tal_free(ps.defs);