ccanlint: test for C++ reserved words in headers.
[ccan] / tools / ccanlint / file_analysis.c
1 #include "config.h"
2 #include "ccanlint.h"
3 #include <ccan/talloc/talloc.h>
4 #include <ccan/str/str.h>
5 #include <ccan/str_talloc/str_talloc.h>
6 #include <ccan/talloc_link/talloc_link.h>
7 #include <ccan/hash/hash.h>
8 #include <ccan/htable/htable_type.h>
9 #include <ccan/grab_file/grab_file.h>
10 #include <ccan/noerr/noerr.h>
11 #include <ccan/foreach/foreach.h>
12 #include <ccan/asort/asort.h>
13 #include "../tools.h"
14 #include <unistd.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <fcntl.h>
18 #include <err.h>
19 #include <errno.h>
20 #include <dirent.h>
21 #include <ctype.h>
22 #include <stdarg.h>
23 #include <assert.h>
24
25 const char *ccan_dir;
26
27 static size_t dir_hash(const char *name)
28 {
29         return hash(name, strlen(name), 0);
30 }
31
32 static const char *manifest_name(const struct manifest *m)
33 {
34         return m->dir;
35 }
36
37 static bool dir_cmp(const struct manifest *m, const char *dir)
38 {
39         return strcmp(m->dir, dir) == 0;
40 }
41
42 HTABLE_DEFINE_TYPE(struct manifest, manifest_name, dir_hash, dir_cmp, manifest);
43 static struct htable_manifest *manifests;
44
45 const char *get_ccan_file_contents(struct ccan_file *f)
46 {
47         if (!f->contents) {
48                 f->contents = grab_file(f, f->fullname, &f->contents_size);
49                 if (!f->contents)
50                         err(1, "Reading file %s", f->fullname);
51         }
52         return f->contents;
53 }
54
55 char **get_ccan_file_lines(struct ccan_file *f)
56 {
57         if (!f->lines)
58                 f->lines = strsplit(f, get_ccan_file_contents(f), "\n");
59
60         /* FIXME: is f->num_lines necessary? */
61         f->num_lines = talloc_array_length(f->lines) - 1;
62         return f->lines;
63 }
64
65 struct list_head *get_ccan_file_docs(struct ccan_file *f)
66 {
67         if (!f->doc_sections) {
68                 get_ccan_file_lines(f);
69                 f->doc_sections = extract_doc_sections(f->lines);
70         }
71         return f->doc_sections;
72 }
73
74 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
75 {
76         struct ccan_file *f;
77
78         assert(dir[0] == '/');
79
80         f = talloc(ctx, struct ccan_file);
81         f->lines = NULL;
82         f->line_info = NULL;
83         f->doc_sections = NULL;
84         f->compiled = NULL;
85         f->name = talloc_steal(f, name);
86         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
87         f->contents = NULL;
88         f->cov_compiled = NULL;
89         return f;
90 }
91
92 static void add_files(struct manifest *m, const char *dir)
93 {
94         DIR *d;
95         struct dirent *ent;
96         char **subs = NULL;
97
98         if (dir[0])
99                 d = opendir(dir);
100         else
101                 d = opendir(".");
102         if (!d)
103                 err(1, "Opening directory %s", dir[0] ? dir : ".");
104
105         while ((ent = readdir(d)) != NULL) {
106                 struct stat st;
107                 struct ccan_file *f;
108                 struct list_head *dest;
109                 bool is_c_src;
110
111                 if (ent->d_name[0] == '.')
112                         continue;
113
114                 f = new_ccan_file(m, m->dir,
115                                   talloc_asprintf(m, "%s%s",
116                                                   dir, ent->d_name));
117                 if (lstat(f->name, &st) != 0)
118                         err(1, "lstat %s", f->name);
119
120                 if (S_ISDIR(st.st_mode)) {
121                         size_t len = talloc_array_length(subs);
122                         subs = talloc_realloc(m, subs, char *, len+1);
123                         subs[len] = talloc_append_string(f->name, "/");
124                         continue;
125                 }
126                 if (!S_ISREG(st.st_mode)) {
127                         talloc_free(f);
128                         continue;
129                 }
130
131                 if (streq(f->name, "_info")) {
132                         m->info_file = f;
133                         continue;
134                 }
135
136                 is_c_src = strends(f->name, ".c");
137                 if (!is_c_src && !strends(f->name, ".h")) {
138                         dest = &m->other_files;
139                 } else if (!strchr(f->name, '/')) {
140                         if (is_c_src)
141                                 dest = &m->c_files;
142                         else
143                                 dest = &m->h_files;
144                 } else if (strstarts(f->name, "test/")) {
145                         if (is_c_src) {
146                                 if (strstarts(f->name, "test/api"))
147                                         dest = &m->api_tests;
148                                 else if (strstarts(f->name, "test/run"))
149                                         dest = &m->run_tests;
150                                 else if (strstarts(f->name, "test/compile_ok"))
151                                         dest = &m->compile_ok_tests;
152                                 else if (strstarts(f->name, "test/compile_fail"))
153                                         dest = &m->compile_fail_tests;
154                                 else
155                                         dest = &m->other_test_c_files;
156                         } else
157                                 dest = &m->other_test_files;
158                 } else
159                         dest = &m->other_files;
160
161                 list_add(dest, &f->list);
162         }
163         closedir(d);
164
165         /* Before we recurse, sanity check this is a ccan module. */ 
166         if (!dir[0]) {
167                 size_t i;
168
169                 if (!m->info_file
170                     && list_empty(&m->c_files)
171                     && list_empty(&m->h_files))
172                         errx(1, "No _info, C or H files found here!");
173
174                 for (i = 0; i < talloc_array_length(subs); i++)
175                         add_files(m, subs[i]);
176         }
177         talloc_free(subs);
178 }
179
180 static int cmp_names(struct ccan_file *const *a, struct ccan_file *const *b,
181                      void *unused)
182 {
183         return strcmp((*a)->name, (*b)->name);
184 }
185
186 static void sort_files(struct list_head *list)
187 {
188         struct ccan_file **files = NULL, *f;
189         unsigned int i, num;
190
191         num = 0;
192         while ((f = list_top(list, struct ccan_file, list)) != NULL) {
193                 files = talloc_realloc(NULL, files, struct ccan_file *, num+1);
194                 files[num++] = f;
195                 list_del(&f->list);
196         }
197         asort(files, num, cmp_names, NULL);
198
199         for (i = 0; i < num; i++)
200                 list_add_tail(list, &files[i]->list);
201         talloc_free(files);
202 }
203
204 struct manifest *get_manifest(const void *ctx, const char *dir)
205 {
206         struct manifest *m;
207         char *olddir, *canon_dir;
208         unsigned int len;
209         struct list_head *list;
210
211         if (!manifests)
212                 manifests = htable_manifest_new();
213
214         olddir = talloc_getcwd(NULL);
215         if (!olddir)
216                 err(1, "Getting current directory");
217
218         if (chdir(dir) != 0)
219                 err(1, "Failed to chdir to %s", dir);
220
221         canon_dir = talloc_getcwd(olddir);
222         if (!canon_dir)
223                 err(1, "Getting current directory");
224
225         m = htable_manifest_get(manifests, canon_dir);
226         if (m)
227                 goto done;
228
229         m = talloc_linked(ctx, talloc(NULL, struct manifest));
230         m->info_file = NULL;
231         m->compiled = NULL;
232         m->dir = talloc_steal(m, canon_dir);
233         list_head_init(&m->c_files);
234         list_head_init(&m->h_files);
235         list_head_init(&m->api_tests);
236         list_head_init(&m->run_tests);
237         list_head_init(&m->compile_ok_tests);
238         list_head_init(&m->compile_fail_tests);
239         list_head_init(&m->other_test_c_files);
240         list_head_init(&m->other_test_files);
241         list_head_init(&m->other_files);
242         list_head_init(&m->examples);
243         list_head_init(&m->mangled_examples);
244         list_head_init(&m->deps);
245
246         len = strlen(m->dir);
247         while (len && m->dir[len-1] == '/')
248                 m->dir[--len] = '\0';
249
250         m->basename = strrchr(m->dir, '/');
251         if (!m->basename)
252                 errx(1, "I don't expect to be run from the root directory");
253         m->basename++;
254
255         /* We expect the ccan dir to be two levels above module dir. */
256         if (!ccan_dir) {
257                 char *p, *dir;
258                 dir = talloc_strdup(NULL, m->dir);
259                 p = strrchr(dir, '/');
260                 if (!p)
261                         errx(1, "I expect the ccan root directory in ../..");
262                 *p = '\0';
263                 p = strrchr(dir, '/');
264                 if (!p)
265                         errx(1, "I expect the ccan root directory in ../..");
266                 *p = '\0';
267                 ccan_dir = dir;
268         }
269
270         add_files(m, "");
271
272         /* Nicer to run tests in a predictable order. */
273         foreach_ptr(list, &m->api_tests, &m->run_tests, &m->compile_ok_tests,
274                     &m->compile_fail_tests)
275                 sort_files(list);
276
277         htable_manifest_add(manifests, m);
278
279 done:
280         if (chdir(olddir) != 0)
281                 err(1, "Returning to original directory '%s'", olddir);
282         talloc_free(olddir);
283
284         return m;
285 }
286
287
288 /**
289  * remove_comments - strip comments from a line, return copy.
290  * @line: line to copy
291  * @in_comment: are we already within a comment (from prev line).
292  * @unterminated: are we still in a comment for next line.
293  */
294 static char *remove_comments(const char *line, bool in_comment,
295                              bool *unterminated)
296 {
297         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
298
299         p = ret;
300         for (;;) {
301                 if (!in_comment) {
302                         /* Find first comment. */
303                         const char *old_comment = strstr(line, "/*");
304                         const char *new_comment = strstr(line, "//");
305                         const char *comment;
306
307                         if (new_comment && old_comment)
308                                 comment = new_comment < old_comment
309                                         ? new_comment : old_comment;
310                         else if (old_comment)
311                                 comment = old_comment;
312                         else if (new_comment)
313                                 comment = new_comment;
314                         else {
315                                 /* Nothing more. */
316                                 strcpy(p, line);
317                                 *unterminated = false;
318                                 break;
319                         }
320
321                         /* Copy up to comment. */
322                         memcpy(p, line, comment - line);
323                         p += comment - line;
324                         line += comment - line + 2;
325
326                         if (comment == new_comment) {
327                                 /* We're done: goes to EOL. */
328                                 p[0] = '\0';
329                                 *unterminated = false;
330                                 break;
331                         }
332                         in_comment = true;
333                 }
334
335                 if (in_comment) {
336                         const char *end = strstr(line, "*/");
337                         if (!end) {
338                                 *unterminated = true;
339                                 p[0] = '\0';
340                                 break;
341                         }
342                         line = end+2;
343                         in_comment = false;
344                 }
345         }
346         return ret;
347 }
348
349 static bool is_empty(const char *line)
350 {
351         return strspn(line, " \t") == strlen(line);
352 }
353
354 static bool continues(const char *line)
355 {
356         /* Technically, any odd number of these.  But who cares? */
357         return strends(line, "\\");
358 }
359
360 /* Get token if it's equal to token. */
361 bool get_token(const char **line, const char *token)
362 {
363         unsigned int toklen;
364
365         *line += strspn(*line, " \t");
366         if (cisalnum(token[0]) || token[0] == '_')
367                 toklen = strspn(*line, IDENT_CHARS);
368         else {
369                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
370                 toklen = strlen(token);
371         }
372
373         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
374                 *line += toklen;
375                 return true;
376         }
377         return false;
378 }
379
380 char *get_symbol_token(void *ctx, const char **line)
381 {
382         unsigned int toklen;
383         char *ret;
384
385         *line += strspn(*line, " \t");
386         toklen = strspn(*line, IDENT_CHARS);
387         if (!toklen)
388                 return NULL;
389         ret = talloc_strndup(ctx, *line, toklen);
390         *line += toklen;
391         return ret;
392 }
393
394 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
395 {
396         bool brackets, defined;
397
398         cond->inverse = get_token(line, "!");
399         defined = get_token(line, "defined");
400         brackets = get_token(line, "(");
401         cond->symbol = get_symbol_token(cond, line);
402         if (!cond->symbol)
403                 return false;
404         if (brackets && !get_token(line, ")"))
405                 return false;
406         if (!defined)
407                 cond->type = PP_COND_IF;
408
409         /* FIXME: We just chain them, ignoring operators. */
410         if (get_token(line, "||") || get_token(line, "&&")) {
411                 struct pp_conditions *sub = talloc(cond, struct pp_conditions);
412
413                 sub->parent = cond->parent;
414                 sub->type = PP_COND_IFDEF;
415                 if (parse_hash_if(sub, line))
416                         cond->parent = sub;
417         }
418
419         return true;
420 }
421
422 /* FIXME: Get serious! */
423 static struct pp_conditions *analyze_directive(struct ccan_file *f,
424                                                const char *line,
425                                                struct pp_conditions *parent)
426 {
427         struct pp_conditions *cond = talloc(f, struct pp_conditions);
428         bool unused;
429
430         line = remove_comments(line, false, &unused);
431
432         cond->parent = parent;
433         cond->type = PP_COND_IFDEF;
434
435         if (!get_token(&line, "#"))
436                 abort();
437
438         if (get_token(&line, "if")) {
439                 if (!parse_hash_if(cond, &line))
440                         goto unknown;
441         } else if (get_token(&line, "elif")) {
442                 /* Malformed? */
443                 if (!parent)
444                         return NULL;
445                 cond->parent = parent->parent;
446                 /* FIXME: Not quite true.  This implies !parent, but we don't
447                  * do multiple conditionals yet. */
448                 if (!parse_hash_if(cond, &line))
449                         goto unknown;
450         } else if (get_token(&line, "ifdef")) {
451                 bool brackets;
452                 cond->inverse = false;
453                 brackets = get_token(&line, "(");
454                 cond->symbol = get_symbol_token(cond, &line);
455                 if (!cond->symbol)
456                         goto unknown;
457                 if (brackets && !get_token(&line, ")"))
458                         goto unknown;
459         } else if (get_token(&line, "ifndef")) {
460                 bool brackets;
461                 cond->inverse = true;
462                 brackets = get_token(&line, "(");
463                 cond->symbol = get_symbol_token(cond, &line);
464                 if (!cond->symbol)
465                         goto unknown;
466                 if (brackets && !get_token(&line, ")"))
467                         goto unknown;
468         } else if (get_token(&line, "else")) {
469                 /* Malformed? */
470                 if (!parent)
471                         return NULL;
472
473                 *cond = *parent;
474                 cond->inverse = !cond->inverse;
475                 return cond;
476         } else if (get_token(&line, "endif")) {
477                 talloc_free(cond);
478                 /* Malformed? */
479                 if (!parent)
480                         return NULL;
481                 /* Back up one! */
482                 return parent->parent;
483         } else {
484                 /* Not a conditional. */
485                 talloc_free(cond);
486                 return parent;
487         }
488
489         if (!is_empty(line))
490                 goto unknown;
491         return cond;
492
493 unknown:
494         cond->type = PP_COND_UNKNOWN;
495         return cond;
496 }
497
498 /* This parser is rough, but OK if code is reasonably neat. */
499 struct line_info *get_ccan_line_info(struct ccan_file *f)
500 {
501         bool continued = false, in_comment = false;
502         struct pp_conditions *cond = NULL;
503         unsigned int i;
504
505         if (f->line_info)
506                 return f->line_info;
507
508         get_ccan_file_lines(f);
509         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
510
511         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
512                 char *p;
513                 bool still_doc_line;
514
515                 /* Current conditions apply to this line. */
516                 f->line_info[i].cond = cond;
517                 f->line_info[i].continued = continued;
518
519                 if (continued) {
520                         /* Same as last line. */
521                         f->line_info[i].type = f->line_info[i-1].type;
522                         /* Update in_comment. */
523                         remove_comments(f->lines[i], in_comment, &in_comment);
524                         continue;
525                 }
526
527                 /* Preprocessor directive? */
528                 if (!in_comment
529                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
530                         f->line_info[i].type = PREPROC_LINE;
531                         cond = analyze_directive(f, f->lines[i], cond);
532                         continue;
533                 }
534
535                 still_doc_line = (in_comment
536                                   && f->line_info[i-1].type == DOC_LINE);
537
538                 p = remove_comments(f->lines[i], in_comment, &in_comment);
539                 if (is_empty(p)) {
540                         if (strstarts(f->lines[i], "/**") || still_doc_line)
541                                 f->line_info[i].type = DOC_LINE;
542                         else
543                                 f->line_info[i].type = COMMENT_LINE;
544                 } else
545                         f->line_info[i].type = CODE_LINE;
546                 talloc_free(p);
547         }
548         return f->line_info;
549 }
550
551 struct symbol {
552         struct list_node list;
553         const char *name;
554         const unsigned int *value;
555 };
556
557 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
558 {
559         struct symbol *i;
560
561         list_for_each(syms, i, list)
562                 if (streq(sym, i->name))
563                         return i;
564         return NULL;
565 }
566
567 static enum line_compiled get_pp(struct pp_conditions *cond,
568                                  struct list_head *syms)
569 {
570         struct symbol *sym;
571         unsigned int val;
572         enum line_compiled parent, ret;
573
574         /* No conditions?  Easy. */
575         if (!cond)
576                 return COMPILED;
577
578         /* Check we get here at all. */
579         parent = get_pp(cond->parent, syms);
580         if (parent == NOT_COMPILED)
581                 return NOT_COMPILED;
582
583         if (cond->type == PP_COND_UNKNOWN)
584                 return MAYBE_COMPILED;
585
586         sym = find_symbol(syms, cond->symbol);
587         if (!sym)
588                 return MAYBE_COMPILED;
589
590         switch (cond->type) {
591         case PP_COND_IF:
592                 /* Undefined is 0. */
593                 val = sym->value ? *sym->value : 0;
594                 if (!val == cond->inverse)
595                         ret = COMPILED;
596                 else
597                         ret = NOT_COMPILED;
598                 break;
599
600         case PP_COND_IFDEF:
601                 if (cond->inverse == !sym->value)
602                         ret = COMPILED;
603                 else
604                         ret = NOT_COMPILED;
605                 break;
606
607         default:
608                 abort();
609         }
610
611         /* If parent didn't know, NO == NO, but YES == MAYBE. */
612         if (parent == MAYBE_COMPILED && ret == COMPILED)
613                 ret = MAYBE_COMPILED;
614         return ret;
615 }
616
617 static void add_symbol(struct list_head *head,
618                        const char *symbol, const unsigned int *value)
619 {
620         struct symbol *sym = talloc(head, struct symbol);
621         sym->name = symbol;
622         sym->value = value;
623         list_add(head, &sym->list);
624 }
625         
626 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
627                                     const char *symbol,
628                                     const unsigned int *value,
629                                     ...)
630 {
631         enum line_compiled ret;
632         struct list_head *head;
633         va_list ap;
634
635         head = talloc(NULL, struct list_head);
636         list_head_init(head);
637
638         va_start(ap, value);
639         add_symbol(head, symbol, value);
640
641         while ((symbol = va_arg(ap, const char *)) != NULL) {
642                 value = va_arg(ap, const unsigned int *);
643                 add_symbol(head, symbol, value);
644         }
645         ret = get_pp(cond, head);
646         talloc_free(head);
647         return ret;
648 }
649
650 void score_file_error(struct score *score, struct ccan_file *f, unsigned line,
651                       const char *errorfmt, ...)
652 {
653         va_list ap;
654
655         struct file_error *fe = talloc(score, struct file_error);
656         fe->file = f;
657         fe->line = line;
658         list_add_tail(&score->per_file_errors, &fe->list);
659
660         if (!score->error)
661                 score->error = talloc_strdup(score, "");
662         
663         if (verbose < 2 && strcount(score->error, "\n") > 5)
664                 return;
665
666         if (line)
667                 score->error = talloc_asprintf_append(score->error,
668                                                       "%s:%u:",
669                                                       f->fullname, line);
670         else
671                 score->error = talloc_asprintf_append(score->error,
672                                                       "%s:", f->fullname);
673
674         va_start(ap, errorfmt);
675         score->error = talloc_vasprintf_append(score->error, errorfmt, ap);
676         va_end(ap);
677         score->error = talloc_append_string(score->error, "\n");
678
679         if (verbose < 2 && strcount(score->error, "\n") > 5)
680                 score->error = talloc_append_string(score->error,
681                                     "... more (use -vv to see them all)\n");
682 }