]> git.ozlabs.org Git - ccan/blob - ccan/cdump/cdump.c
cdump: add CDUMP() support.
[ccan] / ccan / cdump / cdump.c
1 /* MIT (BSD) license - see LICENSE file for details */
2 #include "cdump.h"
3 #include <ccan/tal/str/str.h>
4 #include <assert.h>
5
6 struct token {
7         const char *p;
8         size_t len;
9 };
10
11 static void add_token(struct token **toks, const char *p, size_t len)
12 {
13         size_t n = tal_count(*toks);
14         tal_resize(toks, n+1);
15         (*toks)[n].p = p;
16         (*toks)[n].len = len;
17 }
18
19 /* Simplified tokenizer: comments and preproc directives removed,
20    identifiers are a token, others are single char tokens. */
21 static struct token *tokenize(const void *ctx, const char *code)
22 {
23         unsigned int i, len, tok_start = -1;
24         bool start_of_line = true;
25         struct token *toks = tal_arr(ctx, struct token, 0);
26
27         for (i = 0; code[i]; i += len) {
28                 if (code[i] == '#' && start_of_line) {
29                         /* Preprocessor line. */
30                         len = strcspn(code+i, "\n");
31                 } else if (code[i] == '/' && code[i+1] == '/') {
32                         /* One line comment. */
33                         len = strcspn(code+i, "\n");
34                         if (tok_start != -1U) {
35                                 add_token(&toks, code+tok_start, i - tok_start);
36                                 tok_start = -1U;
37                         }
38                 } else if (code[i] == '/' && code[i+1] == '*') {
39                         /* Multi-line comment. */
40                         const char *end = strstr(code+i+2, "*/");
41                         len = (end + 2) - (code + i);
42                         if (!end)
43                                 len = strlen(code + i);
44                         if (tok_start != -1U) {
45                                 add_token(&toks, code+tok_start, i - tok_start);
46                                 tok_start = -1U;
47                         }
48                 } else if (cisalnum(code[i]) || code[i] == '_') {
49                         /* Identifier or part thereof */
50                         if (tok_start == -1U)
51                                 tok_start = i;
52                         len = 1;
53                 } else if (!cisspace(code[i])) {
54                         /* Punctuation: treat as single char token. */
55                         if (tok_start != -1U) {
56                                 add_token(&toks, code+tok_start, i - tok_start);
57                                 tok_start = -1U;
58                         }
59                         add_token(&toks, code+i, 1);
60                         len = 1;
61                 } else {
62                         /* Whitespace. */
63                         if (tok_start != -1U) {
64                                 add_token(&toks, code+tok_start, i - tok_start);
65                                 tok_start = -1U;
66                         }
67                         len = 1;
68                 }
69                 if (code[i] == '\n')
70                         start_of_line = true;
71                 else if (!cisspace(code[i]))
72                         start_of_line = false;
73         }
74
75         /* Add terminating NULL. */
76         tal_resizez(&toks, tal_count(toks) + 1);
77         return toks;
78 }
79
80 struct parse_state {
81         const char *code;
82         const struct token *toks;
83         struct cdump_definitions *defs;
84         char *complaints;
85 };
86
87 static const struct token *tok_peek(const struct token **toks)
88 {
89         /* Ignore removed tokens (eg. comments) */
90         while (toks[0]->len == 0) {
91                 if (!toks[0]->p)
92                         return NULL;
93                 (*toks)++;
94         }
95         return toks[0];
96 }
97
98 static bool tok_is(const struct token **toks, const char *target)
99 {
100         const struct token *t = tok_peek(toks);
101         return (t && t->len == strlen(target)
102                 && memcmp(t->p, target, t->len) == 0);
103 }
104
105 static const struct token *tok_take(const struct token **toks)
106 {
107         const struct token *t = tok_peek(toks);
108         if (t)
109                 (*toks)++;
110
111         return t;
112 }
113
114 static const struct token *tok_take_if(const struct token **toks,
115                                        const char *target)
116 {
117         if (tok_is(toks, target))
118                 return tok_take(toks);
119         return NULL;
120 }
121
122 static const char *tok_take_ident(const tal_t *ctx, const struct token **toks)
123 {
124         const struct token *t = tok_peek(toks);
125
126         if (!t)
127                 return NULL;
128
129         if (strspn(t->p, "_0123456789"
130                    "abcdefghijklmnopqrstuvwxyz"
131                    "ABCDEFGHIJKLMNOPQRSTUVWXYZ") < t->len)
132                 return NULL;
133
134         t = tok_take(toks);
135         return tal_strndup(ctx, t->p, t->len);
136 }
137
138 static char *string_of_toks(const tal_t *ctx,
139                             const struct token *first,
140                             const struct token *until)
141 {
142         char *str, *p;
143
144         /* Careful to skip erased tokens (eg. comments) */
145         str = p = tal_arr(ctx, char, until->p - first->p + 1);
146         while (first != until) {
147                 const struct token *next = first + 1;
148
149                 if (first->len) {
150                         memcpy(p, first->p, first->len);
151                         p += first->len;
152                         /* Insert space if they weren't adjacent, unless last */
153                         if (next != until) {
154                                 if (first->p + first->len != next->p)
155                                         *(p++) = ' ';
156                         }
157                 }
158                 first = next;
159         }
160         *p = '\0';
161
162         return str;
163 }
164
165 static char *tok_take_until(const tal_t *ctx,
166                             const struct token **toks,
167                             const char *delims)
168 {
169         const struct token *t, *start;
170
171         start = tok_peek(toks);
172         while ((t = tok_peek(toks)) != NULL) {
173                 /* If this contains a delimiter, copy up to prev token. */
174                 if (strcspn(t->p, delims) < t->len)
175                         return string_of_toks(ctx, start, t);
176                 tok_take(toks);
177         };
178
179         /* EOF without finding delimiter */
180         return NULL;
181 }
182
183 static bool type_defined(const struct cdump_type *t)
184 {
185         switch (t->kind) {
186         case CDUMP_STRUCT:
187         case CDUMP_UNION:
188                 return (t->u.members != NULL);
189         case CDUMP_ENUM:
190                 return (t->u.enum_vals != NULL);
191
192         /* These shouldn't happen; we don't try to define them. */
193         case CDUMP_UNKNOWN:
194         case CDUMP_ARRAY:
195         case CDUMP_POINTER:
196                 break;
197         }
198         abort();
199 }
200
201 /* May allocate a new type if not already found (steals @name) */
202 static struct cdump_type *get_type(struct cdump_definitions *defs,
203                                    enum cdump_type_kind kind,
204                                    const char *name)
205 {
206         struct cdump_map *m;
207         struct cdump_type *t;
208
209         switch (kind) {
210         case CDUMP_STRUCT:
211                 m = &defs->structs;
212                 break;
213         case CDUMP_UNION:
214                 m = &defs->unions;
215                 break;
216         case CDUMP_ENUM:
217                 m = &defs->enums;
218                 break;
219         case CDUMP_UNKNOWN:
220         case CDUMP_ARRAY:
221         case CDUMP_POINTER:
222                 m = NULL;
223         }
224
225         /* Do we already have it? */
226         if (m) {
227                 t = strmap_get(m, name);
228                 if (t)
229                         return t;
230         }
231
232         t = tal(defs, struct cdump_type);
233         t->kind = kind;
234         t->name = name ? tal_steal(t, name) : NULL;
235         /* These are actually the same, but be thorough */
236         t->u.members = NULL;
237         t->u.enum_vals = NULL;
238         if (m)
239                 strmap_add(m, t->name, t);
240
241         return t;
242 }
243
244 static void complain(struct parse_state *ps, const char *complaint)
245 {
246         unsigned int linenum;
247         const char *p = ps->code;
248
249         for (linenum = 1; p < ps->toks[0].p; linenum++) {
250                 p = strchr(p+1, '\n');
251                 if (!p)
252                         break;
253         }
254
255         tal_append_fmt(&ps->complaints,
256                        "Line %u: '%.*s': %s\n",
257                        linenum, (int)ps->toks[0].len,
258                        ps->toks[0].p, complaint);
259 }
260
261 static void tok_take_unknown_statement(struct parse_state *ps)
262 {
263         complain(ps, "Ignoring unknown statement until next semicolon");
264         tal_free(tok_take_until(NULL, &ps->toks, ";"));
265         tok_take_if(&ps->toks, ";");
266 }
267
268 static bool tok_take_expr(struct parse_state *ps, const char *term)
269 {
270         while (!tok_is(&ps->toks, term)) {
271                 if (tok_take_if(&ps->toks, "(")) {
272                         if (!tok_take_expr(ps, ")"))
273                                 return false;
274                 } else if (tok_take_if(&ps->toks, "[")) {
275                         if (!tok_take_expr(ps, "]"))
276                                 return false;
277                 } else if (!tok_take(&ps->toks))
278                         return false;
279         }
280         return tok_take(&ps->toks);
281 }
282
283 static char *tok_take_expr_str(const tal_t *ctx,
284                                struct parse_state *ps,
285                                const char *term)
286 {
287         const struct token *start = tok_peek(&ps->toks);
288
289         if (!tok_take_expr(ps, term))
290                 return NULL;
291
292         return string_of_toks(ctx, start, ps->toks - 1);
293 }
294
295 /* [ ... */
296 static bool tok_take_array(struct parse_state *ps, struct cdump_type **type)
297 {
298         /* This will be some arbitrary expression! */
299         struct cdump_type *arr = get_type(ps->defs, CDUMP_ARRAY, NULL);
300
301         arr->u.arr.size = tok_take_expr_str(arr, ps, "]");
302         if (!arr->u.arr.size) {
303                 complain(ps, "Could not find closing array size ]");
304                 return false;
305         }
306
307         arr->u.arr.type = *type;
308         *type = arr;
309
310         return true;
311 }
312
313 static struct cdump_type *ptr_of(struct parse_state *ps,
314                                  const struct cdump_type *ptr_to)
315 {
316         struct cdump_type *ptr = get_type(ps->defs, CDUMP_POINTER, NULL);
317         ptr->u.ptr = ptr_to;
318         return ptr;
319 }
320
321 static bool tok_take_type(struct parse_state *ps, struct cdump_type **type)
322 {
323         const char *name;
324         const struct token *types;
325         enum cdump_type_kind kind;
326
327         /* Ignoring weird typedefs, only these can be combined. */
328         types = ps->toks;
329         while (tok_take_if(&ps->toks, "int")
330                || tok_take_if(&ps->toks, "long")
331                || tok_take_if(&ps->toks, "short")
332                || tok_take_if(&ps->toks, "double")
333                || tok_take_if(&ps->toks, "float")
334                || tok_take_if(&ps->toks, "char")
335                || tok_take_if(&ps->toks, "signed")
336                || tok_take_if(&ps->toks, "unsigned"));
337
338         /* Did we get some? */
339         if (ps->toks != types) {
340                 name = string_of_toks(NULL, types, tok_peek(&ps->toks));
341                 kind = CDUMP_UNKNOWN;
342         } else {
343                 /* Try normal types (or simple typedefs, etc). */
344                 if (tok_take_if(&ps->toks, "struct")) {
345                         kind = CDUMP_STRUCT;
346                 } else if (tok_take_if(&ps->toks, "union")) {
347                         kind = CDUMP_UNION;
348                 } else if (tok_take_if(&ps->toks, "enum")) {
349                         kind = CDUMP_ENUM;
350                 } else
351                         kind = CDUMP_UNKNOWN;
352
353                 name = tok_take_ident(ps->defs, &ps->toks);
354                 if (!name) {
355                         complain(ps, "Invalid typename");
356                         return false;
357                 }
358         }
359
360         *type = get_type(ps->defs, kind, name);
361         return true;
362 }
363
364 /* CDUMP */
365 static bool tok_maybe_take_cdump_note(const tal_t *ctx,
366                                       struct parse_state *ps, const char **note)
367 {
368         *note = NULL;
369         if (tok_take_if(&ps->toks, "CDUMP")) {
370                 if (!tok_take_if(&ps->toks, "(")) {
371                         complain(ps, "Expected ( after CDUMP");
372                         return false;
373                 }
374                 *note = tok_take_expr_str(ctx, ps, ")");
375                 if (!*note) {
376                         complain(ps, "Expected ) after CDUMP(");
377                         return false;
378                 }
379         }
380         return true;
381 }
382
383 /* struct|union ... */
384 static bool tok_take_conglom(struct parse_state *ps,
385                              enum cdump_type_kind conglom_kind)
386 {
387         struct cdump_type *e;
388         const char *name;
389         size_t n;
390
391         assert(conglom_kind == CDUMP_STRUCT || conglom_kind == CDUMP_UNION);
392
393         name = tok_take_ident(ps->defs, &ps->toks);
394         if (!name) {
395                 complain(ps, "Invalid struct/union name");
396                 return false;
397         }
398
399         e = get_type(ps->defs, conglom_kind, name);
400         if (type_defined(e)) {
401                 complain(ps, "Type already defined");
402                 return false;
403         }
404
405         if (!tok_maybe_take_cdump_note(e, ps, &e->note))
406                 return false;
407
408         if (!tok_take_if(&ps->toks, "{")) {
409                 complain(ps, "Expected { for struct/union");
410                 return false;
411         }
412
413         e->u.members = tal_arr(e, struct cdump_member, n = 0);
414         while (!tok_is(&ps->toks, "}")) {
415                 struct cdump_type *basetype;
416                 const struct token *quals;
417                 unsigned int num_quals = 0;
418
419                 /* Anything can have these prepended. */
420                 quals = ps->toks;
421                 while (tok_take_if(&ps->toks, "const")
422                        || tok_take_if(&ps->toks, "volatile"))
423                         num_quals++;
424
425                 /* eg. "struct foo" or "varint_t" */
426                 if (!tok_take_type(ps, &basetype)) {
427                         complain(ps, "Expected typename inside struct/union");
428                         return false;
429                 }
430
431                 do {
432                         struct cdump_member *m;
433
434                         tal_resize(&e->u.members, n+1);
435                         m = &e->u.members[n++];
436                         m->type = basetype;
437                         if (num_quals) {
438                                 m->qualifiers
439                                         = string_of_toks(e, quals,
440                                                          quals + num_quals);
441                         } else
442                                 m->qualifiers = NULL;
443
444                         /* May have multiple asterisks. */
445                         while (tok_take_if(&ps->toks, "*"))
446                                 m->type = ptr_of(ps, m->type);
447
448                         m->name = tok_take_ident(e, &ps->toks);
449                         if (!m->name) {
450                                 complain(ps, "Expected name for member");
451                                 return false;
452                         }
453
454                         /* May be an array. */
455                         while (tok_take_if(&ps->toks, "[")) {
456                                 if (!tok_take_array(ps, &m->type))
457                                         return false;
458                         }
459
460                         /* CDUMP() */
461                         if (!tok_maybe_take_cdump_note(e->u.members,
462                                                        ps, &m->note))
463                                 return false;
464                 } while (tok_take_if(&ps->toks, ","));
465
466                 if (!tok_take_if(&ps->toks, ";")) {
467                         complain(ps, "Expected ; at end of member");
468                         return false;
469                 }
470         }
471
472         if (tok_take_if(&ps->toks, "}") && tok_take_if(&ps->toks, ";"))
473                 return true;
474         complain(ps, "Expected }; at end of struct/union");
475         return false;
476 }
477
478 /* enum ... */
479 static bool tok_take_enum(struct parse_state *ps)
480 {
481         size_t n = 0;
482         struct cdump_type *e;
483         const char *name;
484
485         name = tok_take_ident(ps->defs, &ps->toks);
486         if (!name) {
487                 complain(ps, "Expected enum name");
488                 return false;
489         }
490
491         e = get_type(ps->defs, CDUMP_ENUM, name);
492
493         /* Duplicate name? */
494         if (type_defined(e)) {
495                 complain(ps, "enum already defined");
496                 return false;
497         }
498
499         /* CDUMP() */
500         if (!tok_maybe_take_cdump_note(e, ps, &e->note))
501                 return false;
502
503         if (!tok_take_if(&ps->toks, "{")) {
504                 complain(ps, "Expected { after enum name");
505                 return false;
506         }
507
508         e->u.enum_vals = tal_arr(e, struct cdump_enum_val, n);
509         do {
510                 struct cdump_enum_val *v;
511
512                 /* GCC extension: comma and end of enum */
513                 if (tok_is(&ps->toks, "}"))
514                         break;
515
516                 tal_resize(&e->u.enum_vals, n+1);
517                 v = &e->u.enum_vals[n++];
518
519                 v->name = tok_take_ident(e, &ps->toks);
520                 if (!v->name) {
521                         complain(ps, "Expected enum value name");
522                         return false;
523                 }
524
525                 /* CDUMP() */
526                 if (!tok_maybe_take_cdump_note(e->u.enum_vals, ps, &v->note))
527                         return false;
528
529                 if (tok_take_if(&ps->toks, "=")) {
530                         v->value = tok_take_until(e, &ps->toks, ",}");
531                         if (!v->value) {
532                                 complain(ps, "Expected , or } to end value");
533                                 return false;
534                         }
535                 } else
536                         v->value = NULL;
537         } while (tok_take_if(&ps->toks, ","));
538
539         if (tok_take_if(&ps->toks, "}") && tok_take_if(&ps->toks, ";"))
540                 return true;
541
542         complain(ps, "Expected }; at end of enum");
543         return false;
544 }
545
546 static bool gather_undefines(const char *name,
547                              struct cdump_type *t,
548                              struct cdump_map *undefs)
549 {
550         if (!type_defined(t))
551                 strmap_add(undefs, name, t);
552         return true;
553 }
554
555 static bool remove_from_map(const char *name,
556                             struct cdump_type *t,
557                             struct cdump_map *map)
558 {
559         strmap_del(map, name, NULL);
560         return true;
561 }
562
563 static void remove_undefined(struct cdump_map *map)
564 {
565         struct cdump_map undefs;
566
567         /* We can't delete inside iterator, so gather all the undefs
568          * then remove them. */
569         strmap_init(&undefs);
570
571         strmap_iterate(map, gather_undefines, &undefs);
572         strmap_iterate(&undefs, remove_from_map, map);
573         strmap_clear(&undefs);
574 }
575
576 static void destroy_definitions(struct cdump_definitions *defs)
577 {
578         strmap_clear(&defs->enums);
579         strmap_clear(&defs->structs);
580         strmap_clear(&defs->unions);
581 }
582
583 /* Simple LL(1) parser, inspired by Tridge's genstruct.pl. */
584 struct cdump_definitions *cdump_extract(const tal_t *ctx, const char *code,
585                                         char **complaints)
586 {
587         struct parse_state ps;
588         const struct token *toks;
589
590         ps.defs = tal(ctx, struct cdump_definitions);
591         ps.complaints = tal_strdup(ctx, "");
592         ps.code = code;
593
594         strmap_init(&ps.defs->enums);
595         strmap_init(&ps.defs->structs);
596         strmap_init(&ps.defs->unions);
597         tal_add_destructor(ps.defs, destroy_definitions);
598
599         toks = ps.toks = tokenize(ps.defs, code);
600         while (tok_peek(&ps.toks)) {
601                 if (tok_take_if(&ps.toks, "struct")) {
602                         if (!tok_take_conglom(&ps, CDUMP_STRUCT))
603                                 goto fail;
604                 } else if (tok_take_if(&ps.toks, "union")) {
605                         if (!tok_take_conglom(&ps, CDUMP_UNION))
606                                 goto fail;
607                 } else if (tok_take_if(&ps.toks, "enum")) {
608                         if (!tok_take_enum(&ps))
609                                 goto fail;
610                 } else
611                         tok_take_unknown_statement(&ps);
612         }
613
614         /* Now, remove any undefined types! */
615         remove_undefined(&ps.defs->enums);
616         remove_undefined(&ps.defs->structs);
617         remove_undefined(&ps.defs->unions);
618         tal_free(toks);
619
620 out:
621         if (streq(ps.complaints, ""))
622                 ps.complaints = tal_free(ps.complaints);
623
624         if (complaints)
625                 *complaints = ps.complaints;
626         else
627                 tal_free(ps.complaints);
628         return ps.defs;
629
630 fail:
631         ps.defs = tal_free(ps.defs);
632         goto out;
633 }