b941f94e1c31e84c765b522f36da38338b7bca0b
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/talloc_link/talloc_link.h>
6 #include <ccan/hash/hash.h>
7 #include <ccan/htable/htable_type.h>
8 #include <ccan/grab_file/grab_file.h>
9 #include <ccan/noerr/noerr.h>
10 #include <ccan/foreach/foreach.h>
11 #include <ccan/asort/asort.h>
12 #include "../tools.h"
13 #include <unistd.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include <fcntl.h>
17 #include <err.h>
18 #include <errno.h>
19 #include <dirent.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <assert.h>
23
24 const char *ccan_dir;
25
26 static size_t dir_hash(const char *name)
27 {
28         return hash(name, strlen(name), 0);
29 }
30
31 static const char *manifest_name(const struct manifest *m)
32 {
33         return m->dir;
34 }
35
36 static bool dir_cmp(const struct manifest *m, const char *dir)
37 {
38         return strcmp(m->dir, dir) == 0;
39 }
40
41 HTABLE_DEFINE_TYPE(struct manifest, manifest_name, dir_hash, dir_cmp, manifest);
42 static struct htable_manifest *manifests;
43
44 const char *get_ccan_file_contents(struct ccan_file *f)
45 {
46         if (!f->contents) {
47                 f->contents = grab_file(f, f->fullname, &f->contents_size);
48                 if (!f->contents)
49                         err(1, "Reading file %s", f->fullname);
50         }
51         return f->contents;
52 }
53
54 char **get_ccan_file_lines(struct ccan_file *f)
55 {
56         if (!f->lines)
57                 f->lines = strsplit(f, get_ccan_file_contents(f),
58                                     "\n", &f->num_lines);
59
60         return f->lines;
61 }
62
63 struct list_head *get_ccan_file_docs(struct ccan_file *f)
64 {
65         if (!f->doc_sections) {
66                 get_ccan_file_lines(f);
67                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
68         }
69         return f->doc_sections;
70 }
71
72 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
73 {
74         struct ccan_file *f;
75
76         assert(dir[0] == '/');
77
78         f = talloc(ctx, struct ccan_file);
79         f->lines = NULL;
80         f->line_info = NULL;
81         f->doc_sections = NULL;
82         f->compiled = NULL;
83         f->name = talloc_steal(f, name);
84         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
85         f->contents = NULL;
86         f->cov_compiled = NULL;
87         return f;
88 }
89
90 static void add_files(struct manifest *m, const char *dir)
91 {
92         DIR *d;
93         struct dirent *ent;
94
95         if (dir[0])
96                 d = opendir(dir);
97         else
98                 d = opendir(".");
99         if (!d)
100                 err(1, "Opening directory %s", dir[0] ? dir : ".");
101
102         while ((ent = readdir(d)) != NULL) {
103                 struct stat st;
104                 struct ccan_file *f;
105                 struct list_head *dest;
106                 bool is_c_src;
107
108                 if (ent->d_name[0] == '.')
109                         continue;
110
111                 f = new_ccan_file(m, m->dir,
112                                   talloc_asprintf(m, "%s%s",
113                                                   dir, ent->d_name));
114                 if (lstat(f->name, &st) != 0)
115                         err(1, "lstat %s", f->name);
116
117                 if (S_ISDIR(st.st_mode)) {
118                         f->name = talloc_append_string(f->name, "/");
119                         add_files(m, f->name);
120                         continue;
121                 }
122                 if (!S_ISREG(st.st_mode)) {
123                         talloc_free(f);
124                         continue;
125                 }
126
127                 if (streq(f->name, "_info")) {
128                         m->info_file = f;
129                         continue;
130                 }
131
132                 is_c_src = strends(f->name, ".c");
133                 if (!is_c_src && !strends(f->name, ".h")) {
134                         dest = &m->other_files;
135                         continue;
136                 }
137
138                 if (!strchr(f->name, '/')) {
139                         if (is_c_src)
140                                 dest = &m->c_files;
141                         else
142                                 dest = &m->h_files;
143                 } else if (strstarts(f->name, "test/")) {
144                         if (is_c_src) {
145                                 if (strstarts(f->name, "test/api"))
146                                         dest = &m->api_tests;
147                                 else if (strstarts(f->name, "test/run"))
148                                         dest = &m->run_tests;
149                                 else if (strstarts(f->name, "test/compile_ok"))
150                                         dest = &m->compile_ok_tests;
151                                 else if (strstarts(f->name, "test/compile_fail"))
152                                         dest = &m->compile_fail_tests;
153                                 else
154                                         dest = &m->other_test_c_files;
155                         } else
156                                 dest = &m->other_test_files;
157                 } else
158                         dest = &m->other_files;
159
160                 list_add(dest, &f->list);
161         }
162         closedir(d);
163 }
164
165 static int cmp_names(struct ccan_file *const *a, struct ccan_file *const *b,
166                      void *unused)
167 {
168         return strcmp((*a)->name, (*b)->name);
169 }
170
171 static void sort_files(struct list_head *list)
172 {
173         struct ccan_file **files = NULL, *f;
174         unsigned int i, num;
175
176         num = 0;
177         while ((f = list_top(list, struct ccan_file, list)) != NULL) {
178                 files = talloc_realloc(NULL, files, struct ccan_file *, num+1);
179                 files[num++] = f;
180                 list_del(&f->list);
181         }
182         asort(files, num, cmp_names, NULL);
183
184         for (i = 0; i < num; i++)
185                 list_add_tail(list, &files[i]->list);
186         talloc_free(files);
187 }
188
189 struct manifest *get_manifest(const void *ctx, const char *dir)
190 {
191         struct manifest *m;
192         char *olddir, *canon_dir;
193         unsigned int len;
194         struct list_head *list;
195
196         if (!manifests)
197                 manifests = htable_manifest_new();
198
199         olddir = talloc_getcwd(NULL);
200         if (!olddir)
201                 err(1, "Getting current directory");
202
203         if (chdir(dir) != 0)
204                 err(1, "Failed to chdir to %s", dir);
205
206         canon_dir = talloc_getcwd(olddir);
207         if (!canon_dir)
208                 err(1, "Getting current directory");
209
210         m = htable_manifest_get(manifests, canon_dir);
211         if (m)
212                 goto done;
213
214         m = talloc_linked(ctx, talloc(NULL, struct manifest));
215         m->info_file = NULL;
216         m->compiled = NULL;
217         m->dir = talloc_steal(m, canon_dir);
218         list_head_init(&m->c_files);
219         list_head_init(&m->h_files);
220         list_head_init(&m->api_tests);
221         list_head_init(&m->run_tests);
222         list_head_init(&m->compile_ok_tests);
223         list_head_init(&m->compile_fail_tests);
224         list_head_init(&m->other_test_c_files);
225         list_head_init(&m->other_test_files);
226         list_head_init(&m->other_files);
227         list_head_init(&m->examples);
228         list_head_init(&m->mangled_examples);
229         list_head_init(&m->deps);
230
231         len = strlen(m->dir);
232         while (len && m->dir[len-1] == '/')
233                 m->dir[--len] = '\0';
234
235         m->basename = strrchr(m->dir, '/');
236         if (!m->basename)
237                 errx(1, "I don't expect to be run from the root directory");
238         m->basename++;
239
240         /* We expect the ccan dir to be two levels above module dir. */
241         if (!ccan_dir) {
242                 char *p;
243                 ccan_dir = talloc_strdup(NULL, m->dir);
244                 p = strrchr(ccan_dir, '/');
245                 *p = '\0';
246                 p = strrchr(ccan_dir, '/');
247                 *p = '\0';
248         }
249
250         add_files(m, "");
251
252         /* Nicer to run tests in a predictable order. */
253         foreach_ptr(list, &m->api_tests, &m->run_tests, &m->compile_ok_tests,
254                     &m->compile_fail_tests)
255                 sort_files(list);
256
257         htable_manifest_add(manifests, m);
258
259 done:
260         if (chdir(olddir) != 0)
261                 err(1, "Returning to original directory '%s'", olddir);
262         talloc_free(olddir);
263
264         return m;
265 }
266
267
268 /**
269  * remove_comments - strip comments from a line, return copy.
270  * @line: line to copy
271  * @in_comment: are we already within a comment (from prev line).
272  * @unterminated: are we still in a comment for next line.
273  */
274 static char *remove_comments(const char *line, bool in_comment,
275                              bool *unterminated)
276 {
277         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
278
279         p = ret;
280         for (;;) {
281                 if (!in_comment) {
282                         /* Find first comment. */
283                         const char *old_comment = strstr(line, "/*");
284                         const char *new_comment = strstr(line, "//");
285                         const char *comment;
286
287                         if (new_comment && old_comment)
288                                 comment = new_comment < old_comment
289                                         ? new_comment : old_comment;
290                         else if (old_comment)
291                                 comment = old_comment;
292                         else if (new_comment)
293                                 comment = new_comment;
294                         else {
295                                 /* Nothing more. */
296                                 strcpy(p, line);
297                                 *unterminated = false;
298                                 break;
299                         }
300
301                         /* Copy up to comment. */
302                         memcpy(p, line, comment - line);
303                         p += comment - line;
304                         line += comment - line + 2;
305
306                         if (comment == new_comment) {
307                                 /* We're done: goes to EOL. */
308                                 p[0] = '\0';
309                                 *unterminated = false;
310                                 break;
311                         }
312                         in_comment = true;
313                 }
314
315                 if (in_comment) {
316                         const char *end = strstr(line, "*/");
317                         if (!end) {
318                                 *unterminated = true;
319                                 p[0] = '\0';
320                                 break;
321                         }
322                         line = end+2;
323                         in_comment = false;
324                 }
325         }
326         return ret;
327 }
328
329 static bool is_empty(const char *line)
330 {
331         return strspn(line, " \t") == strlen(line);
332 }
333
334 static bool continues(const char *line)
335 {
336         /* Technically, any odd number of these.  But who cares? */
337         return strends(line, "\\");
338 }
339
340 /* Get token if it's equal to token. */
341 bool get_token(const char **line, const char *token)
342 {
343         unsigned int toklen;
344
345         *line += strspn(*line, " \t");
346         if (isalnum(token[0]) || token[0] == '_')
347                 toklen = strspn(*line, IDENT_CHARS);
348         else {
349                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
350                 toklen = strlen(token);
351         }
352
353         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
354                 *line += toklen;
355                 return true;
356         }
357         return false;
358 }
359
360 char *get_symbol_token(void *ctx, const char **line)
361 {
362         unsigned int toklen;
363         char *ret;
364
365         *line += strspn(*line, " \t");
366         toklen = strspn(*line, IDENT_CHARS);
367         if (!toklen)
368                 return NULL;
369         ret = talloc_strndup(ctx, *line, toklen);
370         *line += toklen;
371         return ret;
372 }
373
374 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
375 {
376         bool brackets, defined;
377
378         cond->inverse = get_token(line, "!");
379         defined = get_token(line, "defined");
380         brackets = get_token(line, "(");
381         cond->symbol = get_symbol_token(cond, line);
382         if (!cond->symbol)
383                 return false;
384         if (brackets && !get_token(line, ")"))
385                 return false;
386         if (!defined)
387                 cond->type = PP_COND_IF;
388         return true;
389 }
390
391 /* FIXME: Get serious! */
392 static struct pp_conditions *analyze_directive(struct ccan_file *f,
393                                                const char *line,
394                                                struct pp_conditions *parent)
395 {
396         struct pp_conditions *cond = talloc(f, struct pp_conditions);
397         bool unused;
398
399         line = remove_comments(line, false, &unused);
400
401         cond->parent = parent;
402         cond->type = PP_COND_IFDEF;
403
404         if (!get_token(&line, "#"))
405                 abort();
406
407         if (get_token(&line, "if")) {
408                 if (!parse_hash_if(cond, &line))
409                         goto unknown;
410         } else if (get_token(&line, "elif")) {
411                 /* Malformed? */
412                 if (!parent)
413                         return NULL;
414                 cond->parent = parent->parent;
415                 /* FIXME: Not quite true.  This implies !parent, but we don't
416                  * do multiple conditionals yet. */
417                 if (!parse_hash_if(cond, &line))
418                         goto unknown;
419         } else if (get_token(&line, "ifdef")) {
420                 bool brackets;
421                 cond->inverse = false;
422                 brackets = get_token(&line, "(");
423                 cond->symbol = get_symbol_token(cond, &line);
424                 if (!cond->symbol)
425                         goto unknown;
426                 if (brackets && !get_token(&line, ")"))
427                         goto unknown;
428         } else if (get_token(&line, "ifndef")) {
429                 bool brackets;
430                 cond->inverse = true;
431                 brackets = get_token(&line, "(");
432                 cond->symbol = get_symbol_token(cond, &line);
433                 if (!cond->symbol)
434                         goto unknown;
435                 if (brackets && !get_token(&line, ")"))
436                         goto unknown;
437         } else if (get_token(&line, "else")) {
438                 /* Malformed? */
439                 if (!parent)
440                         return NULL;
441
442                 *cond = *parent;
443                 cond->inverse = !cond->inverse;
444                 return cond;
445         } else if (get_token(&line, "endif")) {
446                 talloc_free(cond);
447                 /* Malformed? */
448                 if (!parent)
449                         return NULL;
450                 /* Back up one! */
451                 return parent->parent;
452         } else {
453                 /* Not a conditional. */
454                 talloc_free(cond);
455                 return parent;
456         }
457
458         if (!is_empty(line))
459                 goto unknown;
460         return cond;
461
462 unknown:
463         cond->type = PP_COND_UNKNOWN;
464         return cond;
465 }
466
467 /* This parser is rough, but OK if code is reasonably neat. */
468 struct line_info *get_ccan_line_info(struct ccan_file *f)
469 {
470         bool continued = false, in_comment = false;
471         struct pp_conditions *cond = NULL;
472         unsigned int i;
473
474         if (f->line_info)
475                 return f->line_info;
476
477         get_ccan_file_lines(f);
478         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
479
480         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
481                 char *p;
482                 bool still_doc_line;
483
484                 /* Current conditions apply to this line. */
485                 f->line_info[i].cond = cond;
486                 f->line_info[i].continued = continued;
487
488                 if (continued) {
489                         /* Same as last line. */
490                         f->line_info[i].type = f->line_info[i-1].type;
491                         /* Update in_comment. */
492                         remove_comments(f->lines[i], in_comment, &in_comment);
493                         continue;
494                 }
495
496                 /* Preprocessor directive? */
497                 if (!in_comment
498                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
499                         f->line_info[i].type = PREPROC_LINE;
500                         cond = analyze_directive(f, f->lines[i], cond);
501                         continue;
502                 }
503
504                 still_doc_line = (in_comment
505                                   && f->line_info[i-1].type == DOC_LINE);
506
507                 p = remove_comments(f->lines[i], in_comment, &in_comment);
508                 if (is_empty(p)) {
509                         if (strstarts(f->lines[i], "/**") || still_doc_line)
510                                 f->line_info[i].type = DOC_LINE;
511                         else
512                                 f->line_info[i].type = COMMENT_LINE;
513                 } else
514                         f->line_info[i].type = CODE_LINE;
515                 talloc_free(p);
516         }
517         return f->line_info;
518 }
519
520 struct symbol {
521         struct list_node list;
522         const char *name;
523         const unsigned int *value;
524 };
525
526 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
527 {
528         struct symbol *i;
529
530         list_for_each(syms, i, list)
531                 if (streq(sym, i->name))
532                         return i;
533         return NULL;
534 }
535
536 static enum line_compiled get_pp(struct pp_conditions *cond,
537                                  struct list_head *syms)
538 {
539         struct symbol *sym;
540         unsigned int val;
541         enum line_compiled parent, ret;
542
543         /* No conditions?  Easy. */
544         if (!cond)
545                 return COMPILED;
546
547         /* Check we get here at all. */
548         parent = get_pp(cond->parent, syms);
549         if (parent == NOT_COMPILED)
550                 return NOT_COMPILED;
551
552         if (cond->type == PP_COND_UNKNOWN)
553                 return MAYBE_COMPILED;
554
555         sym = find_symbol(syms, cond->symbol);
556         if (!sym)
557                 return MAYBE_COMPILED;
558
559         switch (cond->type) {
560         case PP_COND_IF:
561                 /* Undefined is 0. */
562                 val = sym->value ? *sym->value : 0;
563                 if (!val == cond->inverse)
564                         ret = COMPILED;
565                 else
566                         ret = NOT_COMPILED;
567                 break;
568
569         case PP_COND_IFDEF:
570                 if (cond->inverse == !sym->value)
571                         ret = COMPILED;
572                 else
573                         ret = NOT_COMPILED;
574                 break;
575
576         default:
577                 abort();
578         }
579
580         /* If parent didn't know, NO == NO, but YES == MAYBE. */
581         if (parent == MAYBE_COMPILED && ret == COMPILED)
582                 ret = MAYBE_COMPILED;
583         return ret;
584 }
585
586 static void add_symbol(struct list_head *head,
587                        const char *symbol, const unsigned int *value)
588 {
589         struct symbol *sym = talloc(head, struct symbol);
590         sym->name = symbol;
591         sym->value = value;
592         list_add(head, &sym->list);
593 }
594         
595 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
596                                     const char *symbol,
597                                     const unsigned int *value,
598                                     ...)
599 {
600         enum line_compiled ret;
601         struct list_head *head;
602         va_list ap;
603
604         head = talloc(NULL, struct list_head);
605         list_head_init(head);
606
607         va_start(ap, value);
608         add_symbol(head, symbol, value);
609
610         while ((symbol = va_arg(ap, const char *)) != NULL) {
611                 value = va_arg(ap, const unsigned int *);
612                 add_symbol(head, symbol, value);
613         }
614         ret = get_pp(cond, head);
615         talloc_free(head);
616         return ret;
617 }
618
619 void score_file_error(struct score *score, struct ccan_file *f, unsigned line,
620                       const char *errorfmt, ...)
621 {
622         va_list ap;
623
624         struct file_error *fe = talloc(score, struct file_error);
625         fe->file = f;
626         fe->line = line;
627         list_add_tail(&score->per_file_errors, &fe->list);
628
629         if (!score->error)
630                 score->error = talloc_strdup(score, "");
631         
632         if (verbose < 2 && strcount(score->error, "\n") > 5)
633                 return;
634
635         if (line)
636                 score->error = talloc_asprintf_append(score->error,
637                                                       "%s:%u:",
638                                                       f->fullname, line);
639         else
640                 score->error = talloc_asprintf_append(score->error,
641                                                       "%s:", f->fullname);
642
643         va_start(ap, errorfmt);
644         score->error = talloc_vasprintf_append(score->error, errorfmt, ap);
645         va_end(ap);
646         score->error = talloc_append_string(score->error, "\n");
647
648         if (verbose < 2 && strcount(score->error, "\n") > 5)
649                 score->error = talloc_append_string(score->error,
650                                     "... more (use -vv to see them all)\n");
651 }