]> git.ozlabs.org Git - ccan/blob - tools/ccanlint/file_analysis.c
ccanlint: compile tests in parallel
[ccan] / tools / ccanlint / file_analysis.c
1 #include "config.h"
2 #include "ccanlint.h"
3 #include <ccan/talloc/talloc.h>
4 #include <ccan/str/str.h>
5 #include <ccan/str_talloc/str_talloc.h>
6 #include <ccan/talloc_link/talloc_link.h>
7 #include <ccan/hash/hash.h>
8 #include <ccan/htable/htable_type.h>
9 #include <ccan/grab_file/grab_file.h>
10 #include <ccan/noerr/noerr.h>
11 #include <ccan/foreach/foreach.h>
12 #include <ccan/asort/asort.h>
13 #include <ccan/array_size/array_size.h>
14 #include "../tools.h"
15 #include <unistd.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <fcntl.h>
19 #include <err.h>
20 #include <errno.h>
21 #include <dirent.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <assert.h>
25
26 const char *ccan_dir;
27
28 static size_t dir_hash(const char *name)
29 {
30         return hash(name, strlen(name), 0);
31 }
32
33 static const char *manifest_name(const struct manifest *m)
34 {
35         return m->dir;
36 }
37
38 static bool dir_cmp(const struct manifest *m, const char *dir)
39 {
40         return strcmp(m->dir, dir) == 0;
41 }
42
43 HTABLE_DEFINE_TYPE(struct manifest, manifest_name, dir_hash, dir_cmp, manifest);
44 static struct htable_manifest *manifests;
45
46 const char *get_ccan_file_contents(struct ccan_file *f)
47 {
48         if (!f->contents) {
49                 f->contents = grab_file(f, f->fullname, &f->contents_size);
50                 if (!f->contents)
51                         err(1, "Reading file %s", f->fullname);
52         }
53         return f->contents;
54 }
55
56 char **get_ccan_file_lines(struct ccan_file *f)
57 {
58         if (!f->lines)
59                 f->lines = strsplit(f, get_ccan_file_contents(f), "\n");
60
61         /* FIXME: is f->num_lines necessary? */
62         f->num_lines = talloc_array_length(f->lines) - 1;
63         return f->lines;
64 }
65
66 struct list_head *get_ccan_file_docs(struct ccan_file *f)
67 {
68         if (!f->doc_sections) {
69                 get_ccan_file_lines(f);
70                 f->doc_sections = extract_doc_sections(f->lines);
71         }
72         return f->doc_sections;
73 }
74
75 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
76 {
77         struct ccan_file *f;
78         unsigned int i;
79
80         assert(dir[0] == '/');
81
82         f = talloc(ctx, struct ccan_file);
83         f->lines = NULL;
84         f->line_info = NULL;
85         f->doc_sections = NULL;
86         for (i = 0; i < ARRAY_SIZE(f->compiled); i++)
87                 f->compiled[i] = NULL;
88         f->name = talloc_steal(f, name);
89         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
90         f->contents = NULL;
91         f->simplified = NULL;
92         return f;
93 }
94
95 static void add_files(struct manifest *m, const char *dir)
96 {
97         DIR *d;
98         struct dirent *ent;
99         char **subs = NULL;
100
101         if (dir[0])
102                 d = opendir(dir);
103         else
104                 d = opendir(".");
105         if (!d)
106                 err(1, "Opening directory %s", dir[0] ? dir : ".");
107
108         while ((ent = readdir(d)) != NULL) {
109                 struct stat st;
110                 struct ccan_file *f;
111                 struct list_head *dest;
112                 bool is_c_src;
113
114                 if (ent->d_name[0] == '.')
115                         continue;
116
117                 f = new_ccan_file(m, m->dir,
118                                   talloc_asprintf(m, "%s%s",
119                                                   dir, ent->d_name));
120                 if (lstat(f->name, &st) != 0)
121                         err(1, "lstat %s", f->name);
122
123                 if (S_ISDIR(st.st_mode)) {
124                         size_t len = talloc_array_length(subs);
125                         subs = talloc_realloc(m, subs, char *, len+1);
126                         subs[len] = talloc_append_string(f->name, "/");
127                         continue;
128                 }
129                 if (!S_ISREG(st.st_mode)) {
130                         talloc_free(f);
131                         continue;
132                 }
133
134                 if (streq(f->name, "_info")) {
135                         m->info_file = f;
136                         continue;
137                 }
138
139                 is_c_src = strends(f->name, ".c");
140                 if (!is_c_src && !strends(f->name, ".h")) {
141                         dest = &m->other_files;
142                 } else if (!strchr(f->name, '/')) {
143                         if (is_c_src)
144                                 dest = &m->c_files;
145                         else
146                                 dest = &m->h_files;
147                 } else if (strstarts(f->name, "test/")) {
148                         if (is_c_src) {
149                                 if (strstarts(f->name, "test/api"))
150                                         dest = &m->api_tests;
151                                 else if (strstarts(f->name, "test/run"))
152                                         dest = &m->run_tests;
153                                 else if (strstarts(f->name, "test/compile_ok"))
154                                         dest = &m->compile_ok_tests;
155                                 else if (strstarts(f->name, "test/compile_fail"))
156                                         dest = &m->compile_fail_tests;
157                                 else
158                                         dest = &m->other_test_c_files;
159                         } else
160                                 dest = &m->other_test_files;
161                 } else
162                         dest = &m->other_files;
163
164                 list_add(dest, &f->list);
165         }
166         closedir(d);
167
168         /* Before we recurse, sanity check this is a ccan module. */ 
169         if (!dir[0]) {
170                 size_t i;
171
172                 if (!m->info_file
173                     && list_empty(&m->c_files)
174                     && list_empty(&m->h_files))
175                         errx(1, "No _info, C or H files found here!");
176
177                 for (i = 0; i < talloc_array_length(subs); i++)
178                         add_files(m, subs[i]);
179         }
180         talloc_free(subs);
181 }
182
183 static int cmp_names(struct ccan_file *const *a, struct ccan_file *const *b,
184                      void *unused)
185 {
186         return strcmp((*a)->name, (*b)->name);
187 }
188
189 static void sort_files(struct list_head *list)
190 {
191         struct ccan_file **files = NULL, *f;
192         unsigned int i, num;
193
194         num = 0;
195         while ((f = list_top(list, struct ccan_file, list)) != NULL) {
196                 files = talloc_realloc(NULL, files, struct ccan_file *, num+1);
197                 files[num++] = f;
198                 list_del(&f->list);
199         }
200         asort(files, num, cmp_names, NULL);
201
202         for (i = 0; i < num; i++)
203                 list_add_tail(list, &files[i]->list);
204         talloc_free(files);
205 }
206
207 struct manifest *get_manifest(const void *ctx, const char *dir)
208 {
209         struct manifest *m;
210         char *olddir, *canon_dir;
211         unsigned int len;
212         struct list_head *list;
213
214         if (!manifests)
215                 manifests = htable_manifest_new();
216
217         olddir = talloc_getcwd(NULL);
218         if (!olddir)
219                 err(1, "Getting current directory");
220
221         if (chdir(dir) != 0)
222                 err(1, "Failed to chdir to %s", dir);
223
224         canon_dir = talloc_getcwd(olddir);
225         if (!canon_dir)
226                 err(1, "Getting current directory");
227
228         m = htable_manifest_get(manifests, canon_dir);
229         if (m)
230                 goto done;
231
232         m = talloc_linked(ctx, talloc(NULL, struct manifest));
233         m->info_file = NULL;
234         m->compiled[COMPILE_NORMAL] = m->compiled[COMPILE_NOFEAT] = NULL;
235         m->dir = talloc_steal(m, canon_dir);
236         list_head_init(&m->c_files);
237         list_head_init(&m->h_files);
238         list_head_init(&m->api_tests);
239         list_head_init(&m->run_tests);
240         list_head_init(&m->compile_ok_tests);
241         list_head_init(&m->compile_fail_tests);
242         list_head_init(&m->other_test_c_files);
243         list_head_init(&m->other_test_files);
244         list_head_init(&m->other_files);
245         list_head_init(&m->examples);
246         list_head_init(&m->mangled_examples);
247         list_head_init(&m->deps);
248
249         len = strlen(m->dir);
250         while (len && m->dir[len-1] == '/')
251                 m->dir[--len] = '\0';
252
253         m->basename = strrchr(m->dir, '/');
254         if (!m->basename)
255                 errx(1, "I don't expect to be run from the root directory");
256         m->basename++;
257
258         /* We expect the ccan dir to be two levels above module dir. */
259         if (!ccan_dir) {
260                 char *p, *dir;
261                 dir = talloc_strdup(NULL, m->dir);
262                 p = strrchr(dir, '/');
263                 if (!p)
264                         errx(1, "I expect the ccan root directory in ../..");
265                 *p = '\0';
266                 p = strrchr(dir, '/');
267                 if (!p)
268                         errx(1, "I expect the ccan root directory in ../..");
269                 *p = '\0';
270                 ccan_dir = dir;
271         }
272
273         add_files(m, "");
274
275         /* Nicer to run tests in a predictable order. */
276         foreach_ptr(list, &m->api_tests, &m->run_tests, &m->compile_ok_tests,
277                     &m->compile_fail_tests)
278                 sort_files(list);
279
280         htable_manifest_add(manifests, m);
281
282 done:
283         if (chdir(olddir) != 0)
284                 err(1, "Returning to original directory '%s'", olddir);
285         talloc_free(olddir);
286
287         return m;
288 }
289
290
291 /**
292  * remove_comments - strip comments from a line, return copy.
293  * @line: line to copy
294  * @in_comment: are we already within a comment (from prev line).
295  * @unterminated: are we still in a comment for next line.
296  */
297 static char *remove_comments(const char *line, bool in_comment,
298                              bool *unterminated)
299 {
300         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
301
302         p = ret;
303         for (;;) {
304                 if (!in_comment) {
305                         /* Find first comment. */
306                         const char *old_comment = strstr(line, "/*");
307                         const char *new_comment = strstr(line, "//");
308                         const char *comment;
309
310                         if (new_comment && old_comment)
311                                 comment = new_comment < old_comment
312                                         ? new_comment : old_comment;
313                         else if (old_comment)
314                                 comment = old_comment;
315                         else if (new_comment)
316                                 comment = new_comment;
317                         else {
318                                 /* Nothing more. */
319                                 strcpy(p, line);
320                                 *unterminated = false;
321                                 break;
322                         }
323
324                         /* Copy up to comment. */
325                         memcpy(p, line, comment - line);
326                         p += comment - line;
327                         line += comment - line + 2;
328
329                         if (comment == new_comment) {
330                                 /* We're done: goes to EOL. */
331                                 p[0] = '\0';
332                                 *unterminated = false;
333                                 break;
334                         }
335                         in_comment = true;
336                 }
337
338                 if (in_comment) {
339                         const char *end = strstr(line, "*/");
340                         if (!end) {
341                                 *unterminated = true;
342                                 p[0] = '\0';
343                                 break;
344                         }
345                         line = end+2;
346                         in_comment = false;
347                 }
348         }
349         return ret;
350 }
351
352 static bool is_empty(const char *line)
353 {
354         return strspn(line, " \r\t") == strlen(line);
355 }
356
357 static bool continues(const char *line)
358 {
359         /* Technically, any odd number of these.  But who cares? */
360         return strends(line, "\\");
361 }
362
363 /* Get token if it's equal to token. */
364 bool get_token(const char **line, const char *token)
365 {
366         unsigned int toklen;
367
368         *line += strspn(*line, " \t");
369         if (cisalnum(token[0]) || token[0] == '_')
370                 toklen = strspn(*line, IDENT_CHARS);
371         else {
372                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
373                 toklen = strlen(token);
374         }
375
376         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
377                 *line += toklen;
378                 return true;
379         }
380         return false;
381 }
382
383 char *get_symbol_token(void *ctx, const char **line)
384 {
385         unsigned int toklen;
386         char *ret;
387
388         *line += strspn(*line, " \t");
389         toklen = strspn(*line, IDENT_CHARS);
390         if (!toklen)
391                 return NULL;
392         ret = talloc_strndup(ctx, *line, toklen);
393         *line += toklen;
394         return ret;
395 }
396
397 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
398 {
399         bool brackets, defined;
400
401         cond->inverse = get_token(line, "!");
402         defined = get_token(line, "defined");
403         brackets = get_token(line, "(");
404         cond->symbol = get_symbol_token(cond, line);
405         if (!cond->symbol)
406                 return false;
407         if (brackets && !get_token(line, ")"))
408                 return false;
409         if (!defined)
410                 cond->type = PP_COND_IF;
411
412         /* FIXME: We just chain them, ignoring operators. */
413         if (get_token(line, "||") || get_token(line, "&&")) {
414                 struct pp_conditions *sub = talloc(cond, struct pp_conditions);
415
416                 sub->parent = cond->parent;
417                 sub->type = PP_COND_IFDEF;
418                 if (parse_hash_if(sub, line))
419                         cond->parent = sub;
420         }
421
422         return true;
423 }
424
425 /* FIXME: Get serious! */
426 static struct pp_conditions *analyze_directive(struct ccan_file *f,
427                                                const char *line,
428                                                struct pp_conditions *parent)
429 {
430         struct pp_conditions *cond = talloc(f, struct pp_conditions);
431         bool unused;
432
433         line = remove_comments(line, false, &unused);
434
435         cond->parent = parent;
436         cond->type = PP_COND_IFDEF;
437
438         if (!get_token(&line, "#"))
439                 abort();
440
441         if (get_token(&line, "if")) {
442                 if (!parse_hash_if(cond, &line))
443                         goto unknown;
444         } else if (get_token(&line, "elif")) {
445                 /* Malformed? */
446                 if (!parent)
447                         return NULL;
448                 cond->parent = parent->parent;
449                 /* FIXME: Not quite true.  This implies !parent, but we don't
450                  * do multiple conditionals yet. */
451                 if (!parse_hash_if(cond, &line))
452                         goto unknown;
453         } else if (get_token(&line, "ifdef")) {
454                 bool brackets;
455                 cond->inverse = false;
456                 brackets = get_token(&line, "(");
457                 cond->symbol = get_symbol_token(cond, &line);
458                 if (!cond->symbol)
459                         goto unknown;
460                 if (brackets && !get_token(&line, ")"))
461                         goto unknown;
462         } else if (get_token(&line, "ifndef")) {
463                 bool brackets;
464                 cond->inverse = true;
465                 brackets = get_token(&line, "(");
466                 cond->symbol = get_symbol_token(cond, &line);
467                 if (!cond->symbol)
468                         goto unknown;
469                 if (brackets && !get_token(&line, ")"))
470                         goto unknown;
471         } else if (get_token(&line, "else")) {
472                 /* Malformed? */
473                 if (!parent)
474                         return NULL;
475
476                 *cond = *parent;
477                 cond->inverse = !cond->inverse;
478                 return cond;
479         } else if (get_token(&line, "endif")) {
480                 talloc_free(cond);
481                 /* Malformed? */
482                 if (!parent)
483                         return NULL;
484                 /* Back up one! */
485                 return parent->parent;
486         } else {
487                 /* Not a conditional. */
488                 talloc_free(cond);
489                 return parent;
490         }
491
492         if (!is_empty(line))
493                 goto unknown;
494         return cond;
495
496 unknown:
497         cond->type = PP_COND_UNKNOWN;
498         return cond;
499 }
500
501 /* This parser is rough, but OK if code is reasonably neat. */
502 struct line_info *get_ccan_line_info(struct ccan_file *f)
503 {
504         bool continued = false, in_comment = false;
505         struct pp_conditions *cond = NULL;
506         unsigned int i;
507
508         if (f->line_info)
509                 return f->line_info;
510
511         get_ccan_file_lines(f);
512         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
513
514         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
515                 char *p;
516                 bool still_doc_line;
517
518                 /* Current conditions apply to this line. */
519                 f->line_info[i].cond = cond;
520                 f->line_info[i].continued = continued;
521
522                 if (continued) {
523                         /* Same as last line. */
524                         f->line_info[i].type = f->line_info[i-1].type;
525                         /* Update in_comment. */
526                         remove_comments(f->lines[i], in_comment, &in_comment);
527                         continue;
528                 }
529
530                 /* Preprocessor directive? */
531                 if (!in_comment
532                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
533                         f->line_info[i].type = PREPROC_LINE;
534                         cond = analyze_directive(f, f->lines[i], cond);
535                         continue;
536                 }
537
538                 still_doc_line = (in_comment
539                                   && f->line_info[i-1].type == DOC_LINE);
540
541                 p = remove_comments(f->lines[i], in_comment, &in_comment);
542                 if (is_empty(p)) {
543                         if (strstarts(f->lines[i], "/**") || still_doc_line)
544                                 f->line_info[i].type = DOC_LINE;
545                         else
546                                 f->line_info[i].type = COMMENT_LINE;
547                 } else
548                         f->line_info[i].type = CODE_LINE;
549                 talloc_free(p);
550         }
551         return f->line_info;
552 }
553
554 struct symbol {
555         struct list_node list;
556         const char *name;
557         const unsigned int *value;
558 };
559
560 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
561 {
562         struct symbol *i;
563
564         list_for_each(syms, i, list)
565                 if (streq(sym, i->name))
566                         return i;
567         return NULL;
568 }
569
570 static enum line_compiled get_pp(struct pp_conditions *cond,
571                                  struct list_head *syms)
572 {
573         struct symbol *sym;
574         unsigned int val;
575         enum line_compiled parent, ret;
576
577         /* No conditions?  Easy. */
578         if (!cond)
579                 return COMPILED;
580
581         /* Check we get here at all. */
582         parent = get_pp(cond->parent, syms);
583         if (parent == NOT_COMPILED)
584                 return NOT_COMPILED;
585
586         if (cond->type == PP_COND_UNKNOWN)
587                 return MAYBE_COMPILED;
588
589         sym = find_symbol(syms, cond->symbol);
590         if (!sym)
591                 return MAYBE_COMPILED;
592
593         switch (cond->type) {
594         case PP_COND_IF:
595                 /* Undefined is 0. */
596                 val = sym->value ? *sym->value : 0;
597                 if (!val == cond->inverse)
598                         ret = COMPILED;
599                 else
600                         ret = NOT_COMPILED;
601                 break;
602
603         case PP_COND_IFDEF:
604                 if (cond->inverse == !sym->value)
605                         ret = COMPILED;
606                 else
607                         ret = NOT_COMPILED;
608                 break;
609
610         default:
611                 abort();
612         }
613
614         /* If parent didn't know, NO == NO, but YES == MAYBE. */
615         if (parent == MAYBE_COMPILED && ret == COMPILED)
616                 ret = MAYBE_COMPILED;
617         return ret;
618 }
619
620 static void add_symbol(struct list_head *head,
621                        const char *symbol, const unsigned int *value)
622 {
623         struct symbol *sym = talloc(head, struct symbol);
624         sym->name = symbol;
625         sym->value = value;
626         list_add(head, &sym->list);
627 }
628         
629 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
630                                     const char *symbol,
631                                     const unsigned int *value,
632                                     ...)
633 {
634         enum line_compiled ret;
635         struct list_head *head;
636         va_list ap;
637
638         head = talloc(NULL, struct list_head);
639         list_head_init(head);
640
641         va_start(ap, value);
642         add_symbol(head, symbol, value);
643
644         while ((symbol = va_arg(ap, const char *)) != NULL) {
645                 value = va_arg(ap, const unsigned int *);
646                 add_symbol(head, symbol, value);
647         }
648         ret = get_pp(cond, head);
649         talloc_free(head);
650         return ret;
651 }
652
653 void score_file_error(struct score *score, struct ccan_file *f, unsigned line,
654                       const char *errorfmt, ...)
655 {
656         va_list ap;
657
658         struct file_error *fe = talloc(score, struct file_error);
659         fe->file = f;
660         fe->line = line;
661         list_add_tail(&score->per_file_errors, &fe->list);
662
663         if (!score->error)
664                 score->error = talloc_strdup(score, "");
665         
666         if (verbose < 2 && strcount(score->error, "\n") > 5)
667                 return;
668
669         if (line)
670                 score->error = talloc_asprintf_append(score->error,
671                                                       "%s:%u:",
672                                                       f->fullname, line);
673         else
674                 score->error = talloc_asprintf_append(score->error,
675                                                       "%s:", f->fullname);
676
677         va_start(ap, errorfmt);
678         score->error = talloc_vasprintf_append(score->error, errorfmt, ap);
679         va_end(ap);
680         score->error = talloc_append_string(score->error, "\n");
681
682         if (verbose < 2 && strcount(score->error, "\n") > 5)
683                 score->error = talloc_append_string(score->error,
684                                     "... more (use -vv to see them all)\n");
685 }