cdump: ignore __attribute__ (gcc extension).
[ccan] / ccan / cdump / cdump.c
1 /* MIT (BSD) license - see LICENSE file for details */
2 #include "cdump.h"
3 #include <ccan/tal/str/str.h>
4 #include <assert.h>
5
6 struct token {
7         const char *p;
8         size_t len;
9 };
10
11 static void add_token(struct token **toks, const char *p, size_t len)
12 {
13         size_t n = tal_count(*toks);
14         tal_resize(toks, n+1);
15         (*toks)[n].p = p;
16         (*toks)[n].len = len;
17 }
18
19 static size_t to_eol(const char *p)
20 {
21         size_t len = strcspn(p, "\n");
22
23         /* And any \ continuations. */
24         while (p[len] && p[len-1] == '\\')
25                 len += strcspn(p+len+1, "\n") + 1;
26         return len;
27 }
28
29 /* Simplified tokenizer: comments and preproc directives removed,
30    identifiers are a token, others are single char tokens. */
31 static struct token *tokenize(const void *ctx, const char *code)
32 {
33         unsigned int i, len, tok_start = -1;
34         bool start_of_line = true;
35         struct token *toks = tal_arr(ctx, struct token, 0);
36
37         for (i = 0; code[i]; i += len) {
38                 if (code[i] == '#' && start_of_line) {
39                         /* Preprocessor line. */
40                         len = to_eol(code + i);
41                 } else if (code[i] == '/' && code[i+1] == '/') {
42                         /* One line comment. */
43                         len = to_eol(code + i);
44                         if (tok_start != -1U) {
45                                 add_token(&toks, code+tok_start, i - tok_start);
46                                 tok_start = -1U;
47                         }
48                 } else if (code[i] == '/' && code[i+1] == '*') {
49                         /* Multi-line comment. */
50                         const char *end = strstr(code+i+2, "*/");
51                         len = (end + 2) - (code + i);
52                         if (!end)
53                                 len = strlen(code + i);
54                         if (tok_start != -1U) {
55                                 add_token(&toks, code+tok_start, i - tok_start);
56                                 tok_start = -1U;
57                         }
58                 } else if (cisalnum(code[i]) || code[i] == '_') {
59                         /* Identifier or part thereof */
60                         if (tok_start == -1U)
61                                 tok_start = i;
62                         len = 1;
63                 } else if (!cisspace(code[i])) {
64                         /* Punctuation: treat as single char token. */
65                         if (tok_start != -1U) {
66                                 add_token(&toks, code+tok_start, i - tok_start);
67                                 tok_start = -1U;
68                         }
69                         add_token(&toks, code+i, 1);
70                         len = 1;
71                 } else {
72                         /* Whitespace. */
73                         if (tok_start != -1U) {
74                                 add_token(&toks, code+tok_start, i - tok_start);
75                                 tok_start = -1U;
76                         }
77                         len = 1;
78                 }
79                 if (code[i] == '\n')
80                         start_of_line = true;
81                 else if (!cisspace(code[i]))
82                         start_of_line = false;
83         }
84
85         /* Add terminating NULL. */
86         tal_resizez(&toks, tal_count(toks) + 1);
87         return toks;
88 }
89
90 struct parse_state {
91         const char *code;
92         const struct token *toks;
93         struct cdump_definitions *defs;
94         char *complaints;
95 };
96
97 static const struct token *tok_peek(const struct token **toks)
98 {
99         /* Ignore removed tokens (eg. comments) */
100         while (toks[0]->len == 0) {
101                 if (!toks[0]->p)
102                         return NULL;
103                 (*toks)++;
104         }
105         return toks[0];
106 }
107
108 static bool tok_is(const struct token **toks, const char *target)
109 {
110         const struct token *t = tok_peek(toks);
111         return (t && t->len == strlen(target)
112                 && memcmp(t->p, target, t->len) == 0);
113 }
114
115 static const struct token *tok_take(const struct token **toks)
116 {
117         const struct token *t = tok_peek(toks);
118         if (t)
119                 (*toks)++;
120
121         return t;
122 }
123
124 static const struct token *tok_take_if(const struct token **toks,
125                                        const char *target)
126 {
127         if (tok_is(toks, target))
128                 return tok_take(toks);
129         return NULL;
130 }
131
132 static const char *tok_take_ident(const tal_t *ctx, const struct token **toks)
133 {
134         const struct token *t = tok_peek(toks);
135
136         if (!t)
137                 return NULL;
138
139         if (strspn(t->p, "_0123456789"
140                    "abcdefghijklmnopqrstuvwxyz"
141                    "ABCDEFGHIJKLMNOPQRSTUVWXYZ") < t->len)
142                 return NULL;
143
144         t = tok_take(toks);
145         return tal_strndup(ctx, t->p, t->len);
146 }
147
148 static char *string_of_toks(const tal_t *ctx,
149                             const struct token *first,
150                             const struct token *until)
151 {
152         char *str, *p;
153
154         /* Careful to skip erased tokens (eg. comments) */
155         str = p = tal_arr(ctx, char, until->p - first->p + 1);
156         while (first != until) {
157                 const struct token *next = first + 1;
158
159                 if (first->len) {
160                         memcpy(p, first->p, first->len);
161                         p += first->len;
162                         /* Insert space if they weren't adjacent, unless last */
163                         if (next != until) {
164                                 if (first->p + first->len != next->p)
165                                         *(p++) = ' ';
166                         }
167                 }
168                 first = next;
169         }
170         *p = '\0';
171
172         return str;
173 }
174
175 static char *tok_take_until(const tal_t *ctx,
176                             const struct token **toks,
177                             const char *delims)
178 {
179         const struct token *t, *start;
180
181         start = tok_peek(toks);
182         while ((t = tok_peek(toks)) != NULL) {
183                 /* If this contains a delimiter, copy up to prev token. */
184                 if (strcspn(t->p, delims) < t->len)
185                         return string_of_toks(ctx, start, t);
186                 tok_take(toks);
187         };
188
189         /* EOF without finding delimiter */
190         return NULL;
191 }
192
193 static bool type_defined(const struct cdump_type *t)
194 {
195         switch (t->kind) {
196         case CDUMP_STRUCT:
197         case CDUMP_UNION:
198                 return (t->u.members != NULL);
199         case CDUMP_ENUM:
200                 return (t->u.enum_vals != NULL);
201
202         /* These shouldn't happen; we don't try to define them. */
203         case CDUMP_UNKNOWN:
204         case CDUMP_ARRAY:
205         case CDUMP_POINTER:
206                 break;
207         }
208         abort();
209 }
210
211 /* May allocate a new type if not already found (steals @name) */
212 static struct cdump_type *get_type(struct cdump_definitions *defs,
213                                    enum cdump_type_kind kind,
214                                    const char *name)
215 {
216         struct cdump_map *m;
217         struct cdump_type *t;
218
219         switch (kind) {
220         case CDUMP_STRUCT:
221                 m = &defs->structs;
222                 break;
223         case CDUMP_UNION:
224                 m = &defs->unions;
225                 break;
226         case CDUMP_ENUM:
227                 m = &defs->enums;
228                 break;
229         case CDUMP_UNKNOWN:
230         case CDUMP_ARRAY:
231         case CDUMP_POINTER:
232                 m = NULL;
233         }
234
235         /* Do we already have it? */
236         if (m) {
237                 t = strmap_get(m, name);
238                 if (t)
239                         return t;
240         }
241
242         t = tal(defs, struct cdump_type);
243         t->kind = kind;
244         t->name = name ? tal_steal(t, name) : NULL;
245         /* These are actually the same, but be thorough */
246         t->u.members = NULL;
247         t->u.enum_vals = NULL;
248         if (m)
249                 strmap_add(m, t->name, t);
250
251         return t;
252 }
253
254 static void complain(struct parse_state *ps, const char *complaint)
255 {
256         unsigned int linenum;
257         const char *p = ps->code;
258
259         for (linenum = 1; p < ps->toks[0].p; linenum++) {
260                 p = strchr(p+1, '\n');
261                 if (!p)
262                         break;
263         }
264
265         tal_append_fmt(&ps->complaints,
266                        "Line %u: '%.*s': %s\n",
267                        linenum, (int)ps->toks[0].len,
268                        ps->toks[0].p, complaint);
269 }
270
271 static void tok_take_unknown_statement(struct parse_state *ps)
272 {
273         complain(ps, "Ignoring unknown statement until next semicolon");
274         tal_free(tok_take_until(NULL, &ps->toks, ";"));
275         tok_take_if(&ps->toks, ";");
276 }
277
278 static bool tok_take_expr(struct parse_state *ps, const char *term)
279 {
280         while (!tok_is(&ps->toks, term)) {
281                 if (tok_take_if(&ps->toks, "(")) {
282                         if (!tok_take_expr(ps, ")"))
283                                 return false;
284                 } else if (tok_take_if(&ps->toks, "[")) {
285                         if (!tok_take_expr(ps, "]"))
286                                 return false;
287                 } else if (!tok_take(&ps->toks))
288                         return false;
289         }
290         return tok_take(&ps->toks);
291 }
292
293 static char *tok_take_expr_str(const tal_t *ctx,
294                                struct parse_state *ps,
295                                const char *term)
296 {
297         const struct token *start = tok_peek(&ps->toks);
298
299         if (!tok_take_expr(ps, term))
300                 return NULL;
301
302         return string_of_toks(ctx, start, ps->toks - 1);
303 }
304
305 /* [ ... */
306 static bool tok_take_array(struct parse_state *ps, struct cdump_type **type)
307 {
308         /* This will be some arbitrary expression! */
309         struct cdump_type *arr = get_type(ps->defs, CDUMP_ARRAY, NULL);
310
311         arr->u.arr.size = tok_take_expr_str(arr, ps, "]");
312         if (!arr->u.arr.size) {
313                 complain(ps, "Could not find closing array size ]");
314                 return false;
315         }
316
317         arr->u.arr.type = *type;
318         *type = arr;
319
320         return true;
321 }
322
323 static struct cdump_type *ptr_of(struct parse_state *ps,
324                                  const struct cdump_type *ptr_to)
325 {
326         struct cdump_type *ptr = get_type(ps->defs, CDUMP_POINTER, NULL);
327         ptr->u.ptr = ptr_to;
328         return ptr;
329 }
330
331 static bool tok_take_type(struct parse_state *ps, struct cdump_type **type)
332 {
333         const char *name;
334         const struct token *types;
335         enum cdump_type_kind kind;
336
337         /* Ignoring weird typedefs, only these can be combined. */
338         types = ps->toks;
339         while (tok_take_if(&ps->toks, "int")
340                || tok_take_if(&ps->toks, "long")
341                || tok_take_if(&ps->toks, "short")
342                || tok_take_if(&ps->toks, "double")
343                || tok_take_if(&ps->toks, "float")
344                || tok_take_if(&ps->toks, "char")
345                || tok_take_if(&ps->toks, "signed")
346                || tok_take_if(&ps->toks, "unsigned"));
347
348         /* Did we get some? */
349         if (ps->toks != types) {
350                 name = string_of_toks(NULL, types, tok_peek(&ps->toks));
351                 kind = CDUMP_UNKNOWN;
352         } else {
353                 /* Try normal types (or simple typedefs, etc). */
354                 if (tok_take_if(&ps->toks, "struct")) {
355                         kind = CDUMP_STRUCT;
356                 } else if (tok_take_if(&ps->toks, "union")) {
357                         kind = CDUMP_UNION;
358                 } else if (tok_take_if(&ps->toks, "enum")) {
359                         kind = CDUMP_ENUM;
360                 } else
361                         kind = CDUMP_UNKNOWN;
362
363                 name = tok_take_ident(ps->defs, &ps->toks);
364                 if (!name) {
365                         complain(ps, "Invalid typename");
366                         return false;
367                 }
368         }
369
370         *type = get_type(ps->defs, kind, name);
371         return true;
372 }
373
374 /* CDUMP */
375 static bool tok_maybe_take_cdump_note(const tal_t *ctx,
376                                       struct parse_state *ps, const char **note)
377 {
378         *note = NULL;
379         if (tok_take_if(&ps->toks, "CDUMP")) {
380                 if (!tok_take_if(&ps->toks, "(")) {
381                         complain(ps, "Expected ( after CDUMP");
382                         return false;
383                 }
384                 *note = tok_take_expr_str(ctx, ps, ")");
385                 if (!*note) {
386                         complain(ps, "Expected ) after CDUMP(");
387                         return false;
388                 }
389         }
390         return true;
391 }
392
393 /* __attribute__((...)) */
394 static bool tok_ignore_attribute(struct parse_state *ps)
395 {
396         if (!tok_take_if(&ps->toks, "__attribute__"))
397                 return true;
398
399         if (!tok_take_if(&ps->toks, "(") || !tok_take_if(&ps->toks, "(")) {
400                 complain(ps, "Expected (( after __attribute__");
401                 return false;
402         }
403
404         if (!tok_take_expr(ps, ")")) {
405                 complain(ps, "Expected expression after __attribute__((");
406                 return false;
407         }
408         if (!tok_take_if(&ps->toks, ")")) {
409                 complain(ps, "Expected )) __attribute__((");
410                 return false;
411         }
412         return true;
413 }
414
415 /* struct|union ... */
416 static bool tok_take_conglom(struct parse_state *ps,
417                              enum cdump_type_kind conglom_kind)
418 {
419         struct cdump_type *e;
420         const char *name;
421         size_t n;
422
423         assert(conglom_kind == CDUMP_STRUCT || conglom_kind == CDUMP_UNION);
424
425         name = tok_take_ident(ps->defs, &ps->toks);
426         if (!name) {
427                 complain(ps, "Invalid struct/union name");
428                 return false;
429         }
430
431         e = get_type(ps->defs, conglom_kind, name);
432         if (type_defined(e)) {
433                 complain(ps, "Type already defined");
434                 return false;
435         }
436
437         if (!tok_maybe_take_cdump_note(e, ps, &e->note))
438                 return false;
439
440         if (!tok_ignore_attribute(ps))
441                 return false;
442
443         if (!tok_take_if(&ps->toks, "{")) {
444                 complain(ps, "Expected { for struct/union");
445                 return false;
446         }
447
448         e->u.members = tal_arr(e, struct cdump_member, n = 0);
449         while (!tok_is(&ps->toks, "}")) {
450                 struct cdump_type *basetype;
451                 const struct token *quals;
452                 unsigned int num_quals = 0;
453
454                 if (!tok_ignore_attribute(ps))
455                         return false;
456
457                 /* Anything can have these prepended. */
458                 quals = ps->toks;
459                 while (tok_take_if(&ps->toks, "const")
460                        || tok_take_if(&ps->toks, "volatile"))
461                         num_quals++;
462
463                 /* eg. "struct foo" or "varint_t" */
464                 if (!tok_take_type(ps, &basetype)) {
465                         complain(ps, "Expected typename inside struct/union");
466                         return false;
467                 }
468
469                 do {
470                         struct cdump_member *m;
471
472                         tal_resize(&e->u.members, n+1);
473                         m = &e->u.members[n++];
474                         m->type = basetype;
475                         if (num_quals) {
476                                 m->qualifiers
477                                         = string_of_toks(e, quals,
478                                                          quals + num_quals);
479                         } else
480                                 m->qualifiers = NULL;
481
482                         /* May have multiple asterisks. */
483                         while (tok_take_if(&ps->toks, "*"))
484                                 m->type = ptr_of(ps, m->type);
485
486                         if (!tok_ignore_attribute(ps))
487                                 return false;
488
489                         m->name = tok_take_ident(e, &ps->toks);
490                         if (!m->name) {
491                                 complain(ps, "Expected name for member");
492                                 return false;
493                         }
494
495                         /* May be an array. */
496                         while (tok_take_if(&ps->toks, "[")) {
497                                 if (!tok_take_array(ps, &m->type))
498                                         return false;
499                         }
500
501                         /* CDUMP() */
502                         if (!tok_maybe_take_cdump_note(e->u.members,
503                                                        ps, &m->note))
504                                 return false;
505
506                         if (!tok_ignore_attribute(ps))
507                                 return false;
508                 } while (tok_take_if(&ps->toks, ","));
509
510                 if (!tok_take_if(&ps->toks, ";")) {
511                         complain(ps, "Expected ; at end of member");
512                         return false;
513                 }
514         }
515
516         if (!tok_take_if(&ps->toks, "}")) {
517                 complain(ps, "Expected } at end of struct/union");
518                 return false;
519         }
520
521         if (!tok_ignore_attribute(ps))
522                 return false;
523
524         if (!tok_take_if(&ps->toks, ";")) {
525                 complain(ps, "Expected ; at end of struct/union");
526                 return false;
527         }
528         return true;
529 }
530
531 /* enum ... */
532 static bool tok_take_enum(struct parse_state *ps)
533 {
534         size_t n = 0;
535         struct cdump_type *e;
536         const char *name;
537
538         name = tok_take_ident(ps->defs, &ps->toks);
539         if (!name) {
540                 complain(ps, "Expected enum name");
541                 return false;
542         }
543
544         e = get_type(ps->defs, CDUMP_ENUM, name);
545
546         /* Duplicate name? */
547         if (type_defined(e)) {
548                 complain(ps, "enum already defined");
549                 return false;
550         }
551
552         /* CDUMP() */
553         if (!tok_maybe_take_cdump_note(e, ps, &e->note))
554                 return false;
555
556         if (!tok_ignore_attribute(ps))
557                 return false;
558
559         if (!tok_take_if(&ps->toks, "{")) {
560                 complain(ps, "Expected { after enum name");
561                 return false;
562         }
563
564         e->u.enum_vals = tal_arr(e, struct cdump_enum_val, n);
565         do {
566                 struct cdump_enum_val *v;
567
568                 /* GCC extension: comma and end of enum */
569                 if (tok_is(&ps->toks, "}"))
570                         break;
571
572                 tal_resize(&e->u.enum_vals, n+1);
573                 v = &e->u.enum_vals[n++];
574
575                 v->name = tok_take_ident(e, &ps->toks);
576                 if (!v->name) {
577                         complain(ps, "Expected enum value name");
578                         return false;
579                 }
580
581                 /* CDUMP() */
582                 if (!tok_maybe_take_cdump_note(e->u.enum_vals, ps, &v->note))
583                         return false;
584
585                 if (tok_take_if(&ps->toks, "=")) {
586                         v->value = tok_take_until(e, &ps->toks, ",}");
587                         if (!v->value) {
588                                 complain(ps, "Expected , or } to end value");
589                                 return false;
590                         }
591                 } else
592                         v->value = NULL;
593         } while (tok_take_if(&ps->toks, ","));
594
595         if (!tok_take_if(&ps->toks, "}")) {
596                 complain(ps, "Expected } at end of enum");
597                 return false;
598         }
599
600         if (!tok_ignore_attribute(ps))
601                 return false;
602
603         if (!tok_take_if(&ps->toks, ";")) {
604                 complain(ps, "Expected ; at end of enum");
605                 return false;
606         }
607         return true;
608 }
609
610 static bool gather_undefines(const char *name,
611                              struct cdump_type *t,
612                              struct cdump_map *undefs)
613 {
614         if (!type_defined(t))
615                 strmap_add(undefs, name, t);
616         return true;
617 }
618
619 static bool remove_from_map(const char *name,
620                             struct cdump_type *t,
621                             struct cdump_map *map)
622 {
623         strmap_del(map, name, NULL);
624         return true;
625 }
626
627 static void remove_undefined(struct cdump_map *map)
628 {
629         struct cdump_map undefs;
630
631         /* We can't delete inside iterator, so gather all the undefs
632          * then remove them. */
633         strmap_init(&undefs);
634
635         strmap_iterate(map, gather_undefines, &undefs);
636         strmap_iterate(&undefs, remove_from_map, map);
637         strmap_clear(&undefs);
638 }
639
640 static void destroy_definitions(struct cdump_definitions *defs)
641 {
642         strmap_clear(&defs->enums);
643         strmap_clear(&defs->structs);
644         strmap_clear(&defs->unions);
645 }
646
647 /* Simple LL(1) parser, inspired by Tridge's genstruct.pl. */
648 struct cdump_definitions *cdump_extract(const tal_t *ctx, const char *code,
649                                         char **complaints)
650 {
651         struct parse_state ps;
652         const struct token *toks;
653
654         ps.defs = tal(ctx, struct cdump_definitions);
655         ps.complaints = tal_strdup(ctx, "");
656         ps.code = code;
657
658         strmap_init(&ps.defs->enums);
659         strmap_init(&ps.defs->structs);
660         strmap_init(&ps.defs->unions);
661         tal_add_destructor(ps.defs, destroy_definitions);
662
663         toks = ps.toks = tokenize(ps.defs, code);
664         while (tok_peek(&ps.toks)) {
665                 if (!tok_ignore_attribute(&ps))
666                         goto fail;
667                 if (tok_take_if(&ps.toks, "struct")) {
668                         if (!tok_take_conglom(&ps, CDUMP_STRUCT))
669                                 goto fail;
670                 } else if (tok_take_if(&ps.toks, "union")) {
671                         if (!tok_take_conglom(&ps, CDUMP_UNION))
672                                 goto fail;
673                 } else if (tok_take_if(&ps.toks, "enum")) {
674                         if (!tok_take_enum(&ps))
675                                 goto fail;
676                 } else
677                         tok_take_unknown_statement(&ps);
678         }
679
680         /* Now, remove any undefined types! */
681         remove_undefined(&ps.defs->enums);
682         remove_undefined(&ps.defs->structs);
683         remove_undefined(&ps.defs->unions);
684         tal_free(toks);
685
686 out:
687         if (streq(ps.complaints, ""))
688                 ps.complaints = tal_free(ps.complaints);
689
690         if (complaints)
691                 *complaints = ps.complaints;
692         else
693                 tal_free(ps.complaints);
694         return ps.defs;
695
696 fail:
697         ps.defs = tal_free(ps.defs);
698         goto out;
699 }