]> git.ozlabs.org Git - ccan/blob - tools/ccanlint/file_analysis.c
block_pool, ccan_tokenizer, stringmap: add ccanlint license suppressions.
[ccan] / tools / ccanlint / file_analysis.c
1 #include "config.h"
2 #include "ccanlint.h"
3 #include <ccan/talloc/talloc.h>
4 #include <ccan/str/str.h>
5 #include <ccan/str_talloc/str_talloc.h>
6 #include <ccan/talloc_link/talloc_link.h>
7 #include <ccan/hash/hash.h>
8 #include <ccan/htable/htable_type.h>
9 #include <ccan/grab_file/grab_file.h>
10 #include <ccan/noerr/noerr.h>
11 #include <ccan/foreach/foreach.h>
12 #include <ccan/asort/asort.h>
13 #include "../tools.h"
14 #include <unistd.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <fcntl.h>
18 #include <err.h>
19 #include <errno.h>
20 #include <dirent.h>
21 #include <ctype.h>
22 #include <stdarg.h>
23 #include <assert.h>
24
25 const char *ccan_dir;
26
27 static size_t dir_hash(const char *name)
28 {
29         return hash(name, strlen(name), 0);
30 }
31
32 static const char *manifest_name(const struct manifest *m)
33 {
34         return m->dir;
35 }
36
37 static bool dir_cmp(const struct manifest *m, const char *dir)
38 {
39         return strcmp(m->dir, dir) == 0;
40 }
41
42 HTABLE_DEFINE_TYPE(struct manifest, manifest_name, dir_hash, dir_cmp, manifest);
43 static struct htable_manifest *manifests;
44
45 const char *get_ccan_file_contents(struct ccan_file *f)
46 {
47         if (!f->contents) {
48                 f->contents = grab_file(f, f->fullname, &f->contents_size);
49                 if (!f->contents)
50                         err(1, "Reading file %s", f->fullname);
51         }
52         return f->contents;
53 }
54
55 char **get_ccan_file_lines(struct ccan_file *f)
56 {
57         if (!f->lines)
58                 f->lines = strsplit(f, get_ccan_file_contents(f), "\n");
59
60         /* FIXME: is f->num_lines necessary? */
61         f->num_lines = talloc_array_length(f->lines) - 1;
62         return f->lines;
63 }
64
65 struct list_head *get_ccan_file_docs(struct ccan_file *f)
66 {
67         if (!f->doc_sections) {
68                 get_ccan_file_lines(f);
69                 f->doc_sections = extract_doc_sections(f->lines);
70         }
71         return f->doc_sections;
72 }
73
74 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
75 {
76         struct ccan_file *f;
77
78         assert(dir[0] == '/');
79
80         f = talloc(ctx, struct ccan_file);
81         f->lines = NULL;
82         f->line_info = NULL;
83         f->doc_sections = NULL;
84         f->compiled = NULL;
85         f->name = talloc_steal(f, name);
86         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
87         f->contents = NULL;
88         f->cov_compiled = NULL;
89         f->simplified = NULL;
90         return f;
91 }
92
93 static void add_files(struct manifest *m, const char *dir)
94 {
95         DIR *d;
96         struct dirent *ent;
97         char **subs = NULL;
98
99         if (dir[0])
100                 d = opendir(dir);
101         else
102                 d = opendir(".");
103         if (!d)
104                 err(1, "Opening directory %s", dir[0] ? dir : ".");
105
106         while ((ent = readdir(d)) != NULL) {
107                 struct stat st;
108                 struct ccan_file *f;
109                 struct list_head *dest;
110                 bool is_c_src;
111
112                 if (ent->d_name[0] == '.')
113                         continue;
114
115                 f = new_ccan_file(m, m->dir,
116                                   talloc_asprintf(m, "%s%s",
117                                                   dir, ent->d_name));
118                 if (lstat(f->name, &st) != 0)
119                         err(1, "lstat %s", f->name);
120
121                 if (S_ISDIR(st.st_mode)) {
122                         size_t len = talloc_array_length(subs);
123                         subs = talloc_realloc(m, subs, char *, len+1);
124                         subs[len] = talloc_append_string(f->name, "/");
125                         continue;
126                 }
127                 if (!S_ISREG(st.st_mode)) {
128                         talloc_free(f);
129                         continue;
130                 }
131
132                 if (streq(f->name, "_info")) {
133                         m->info_file = f;
134                         continue;
135                 }
136
137                 is_c_src = strends(f->name, ".c");
138                 if (!is_c_src && !strends(f->name, ".h")) {
139                         dest = &m->other_files;
140                 } else if (!strchr(f->name, '/')) {
141                         if (is_c_src)
142                                 dest = &m->c_files;
143                         else
144                                 dest = &m->h_files;
145                 } else if (strstarts(f->name, "test/")) {
146                         if (is_c_src) {
147                                 if (strstarts(f->name, "test/api"))
148                                         dest = &m->api_tests;
149                                 else if (strstarts(f->name, "test/run"))
150                                         dest = &m->run_tests;
151                                 else if (strstarts(f->name, "test/compile_ok"))
152                                         dest = &m->compile_ok_tests;
153                                 else if (strstarts(f->name, "test/compile_fail"))
154                                         dest = &m->compile_fail_tests;
155                                 else
156                                         dest = &m->other_test_c_files;
157                         } else
158                                 dest = &m->other_test_files;
159                 } else
160                         dest = &m->other_files;
161
162                 list_add(dest, &f->list);
163         }
164         closedir(d);
165
166         /* Before we recurse, sanity check this is a ccan module. */ 
167         if (!dir[0]) {
168                 size_t i;
169
170                 if (!m->info_file
171                     && list_empty(&m->c_files)
172                     && list_empty(&m->h_files))
173                         errx(1, "No _info, C or H files found here!");
174
175                 for (i = 0; i < talloc_array_length(subs); i++)
176                         add_files(m, subs[i]);
177         }
178         talloc_free(subs);
179 }
180
181 static int cmp_names(struct ccan_file *const *a, struct ccan_file *const *b,
182                      void *unused)
183 {
184         return strcmp((*a)->name, (*b)->name);
185 }
186
187 static void sort_files(struct list_head *list)
188 {
189         struct ccan_file **files = NULL, *f;
190         unsigned int i, num;
191
192         num = 0;
193         while ((f = list_top(list, struct ccan_file, list)) != NULL) {
194                 files = talloc_realloc(NULL, files, struct ccan_file *, num+1);
195                 files[num++] = f;
196                 list_del(&f->list);
197         }
198         asort(files, num, cmp_names, NULL);
199
200         for (i = 0; i < num; i++)
201                 list_add_tail(list, &files[i]->list);
202         talloc_free(files);
203 }
204
205 struct manifest *get_manifest(const void *ctx, const char *dir)
206 {
207         struct manifest *m;
208         char *olddir, *canon_dir;
209         unsigned int len;
210         struct list_head *list;
211
212         if (!manifests)
213                 manifests = htable_manifest_new();
214
215         olddir = talloc_getcwd(NULL);
216         if (!olddir)
217                 err(1, "Getting current directory");
218
219         if (chdir(dir) != 0)
220                 err(1, "Failed to chdir to %s", dir);
221
222         canon_dir = talloc_getcwd(olddir);
223         if (!canon_dir)
224                 err(1, "Getting current directory");
225
226         m = htable_manifest_get(manifests, canon_dir);
227         if (m)
228                 goto done;
229
230         m = talloc_linked(ctx, talloc(NULL, struct manifest));
231         m->info_file = NULL;
232         m->compiled = NULL;
233         m->dir = talloc_steal(m, canon_dir);
234         list_head_init(&m->c_files);
235         list_head_init(&m->h_files);
236         list_head_init(&m->api_tests);
237         list_head_init(&m->run_tests);
238         list_head_init(&m->compile_ok_tests);
239         list_head_init(&m->compile_fail_tests);
240         list_head_init(&m->other_test_c_files);
241         list_head_init(&m->other_test_files);
242         list_head_init(&m->other_files);
243         list_head_init(&m->examples);
244         list_head_init(&m->mangled_examples);
245         list_head_init(&m->deps);
246
247         len = strlen(m->dir);
248         while (len && m->dir[len-1] == '/')
249                 m->dir[--len] = '\0';
250
251         m->basename = strrchr(m->dir, '/');
252         if (!m->basename)
253                 errx(1, "I don't expect to be run from the root directory");
254         m->basename++;
255
256         /* We expect the ccan dir to be two levels above module dir. */
257         if (!ccan_dir) {
258                 char *p, *dir;
259                 dir = talloc_strdup(NULL, m->dir);
260                 p = strrchr(dir, '/');
261                 if (!p)
262                         errx(1, "I expect the ccan root directory in ../..");
263                 *p = '\0';
264                 p = strrchr(dir, '/');
265                 if (!p)
266                         errx(1, "I expect the ccan root directory in ../..");
267                 *p = '\0';
268                 ccan_dir = dir;
269         }
270
271         add_files(m, "");
272
273         /* Nicer to run tests in a predictable order. */
274         foreach_ptr(list, &m->api_tests, &m->run_tests, &m->compile_ok_tests,
275                     &m->compile_fail_tests)
276                 sort_files(list);
277
278         htable_manifest_add(manifests, m);
279
280 done:
281         if (chdir(olddir) != 0)
282                 err(1, "Returning to original directory '%s'", olddir);
283         talloc_free(olddir);
284
285         return m;
286 }
287
288
289 /**
290  * remove_comments - strip comments from a line, return copy.
291  * @line: line to copy
292  * @in_comment: are we already within a comment (from prev line).
293  * @unterminated: are we still in a comment for next line.
294  */
295 static char *remove_comments(const char *line, bool in_comment,
296                              bool *unterminated)
297 {
298         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
299
300         p = ret;
301         for (;;) {
302                 if (!in_comment) {
303                         /* Find first comment. */
304                         const char *old_comment = strstr(line, "/*");
305                         const char *new_comment = strstr(line, "//");
306                         const char *comment;
307
308                         if (new_comment && old_comment)
309                                 comment = new_comment < old_comment
310                                         ? new_comment : old_comment;
311                         else if (old_comment)
312                                 comment = old_comment;
313                         else if (new_comment)
314                                 comment = new_comment;
315                         else {
316                                 /* Nothing more. */
317                                 strcpy(p, line);
318                                 *unterminated = false;
319                                 break;
320                         }
321
322                         /* Copy up to comment. */
323                         memcpy(p, line, comment - line);
324                         p += comment - line;
325                         line += comment - line + 2;
326
327                         if (comment == new_comment) {
328                                 /* We're done: goes to EOL. */
329                                 p[0] = '\0';
330                                 *unterminated = false;
331                                 break;
332                         }
333                         in_comment = true;
334                 }
335
336                 if (in_comment) {
337                         const char *end = strstr(line, "*/");
338                         if (!end) {
339                                 *unterminated = true;
340                                 p[0] = '\0';
341                                 break;
342                         }
343                         line = end+2;
344                         in_comment = false;
345                 }
346         }
347         return ret;
348 }
349
350 static bool is_empty(const char *line)
351 {
352         return strspn(line, " \r\t") == strlen(line);
353 }
354
355 static bool continues(const char *line)
356 {
357         /* Technically, any odd number of these.  But who cares? */
358         return strends(line, "\\");
359 }
360
361 /* Get token if it's equal to token. */
362 bool get_token(const char **line, const char *token)
363 {
364         unsigned int toklen;
365
366         *line += strspn(*line, " \t");
367         if (cisalnum(token[0]) || token[0] == '_')
368                 toklen = strspn(*line, IDENT_CHARS);
369         else {
370                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
371                 toklen = strlen(token);
372         }
373
374         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
375                 *line += toklen;
376                 return true;
377         }
378         return false;
379 }
380
381 char *get_symbol_token(void *ctx, const char **line)
382 {
383         unsigned int toklen;
384         char *ret;
385
386         *line += strspn(*line, " \t");
387         toklen = strspn(*line, IDENT_CHARS);
388         if (!toklen)
389                 return NULL;
390         ret = talloc_strndup(ctx, *line, toklen);
391         *line += toklen;
392         return ret;
393 }
394
395 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
396 {
397         bool brackets, defined;
398
399         cond->inverse = get_token(line, "!");
400         defined = get_token(line, "defined");
401         brackets = get_token(line, "(");
402         cond->symbol = get_symbol_token(cond, line);
403         if (!cond->symbol)
404                 return false;
405         if (brackets && !get_token(line, ")"))
406                 return false;
407         if (!defined)
408                 cond->type = PP_COND_IF;
409
410         /* FIXME: We just chain them, ignoring operators. */
411         if (get_token(line, "||") || get_token(line, "&&")) {
412                 struct pp_conditions *sub = talloc(cond, struct pp_conditions);
413
414                 sub->parent = cond->parent;
415                 sub->type = PP_COND_IFDEF;
416                 if (parse_hash_if(sub, line))
417                         cond->parent = sub;
418         }
419
420         return true;
421 }
422
423 /* FIXME: Get serious! */
424 static struct pp_conditions *analyze_directive(struct ccan_file *f,
425                                                const char *line,
426                                                struct pp_conditions *parent)
427 {
428         struct pp_conditions *cond = talloc(f, struct pp_conditions);
429         bool unused;
430
431         line = remove_comments(line, false, &unused);
432
433         cond->parent = parent;
434         cond->type = PP_COND_IFDEF;
435
436         if (!get_token(&line, "#"))
437                 abort();
438
439         if (get_token(&line, "if")) {
440                 if (!parse_hash_if(cond, &line))
441                         goto unknown;
442         } else if (get_token(&line, "elif")) {
443                 /* Malformed? */
444                 if (!parent)
445                         return NULL;
446                 cond->parent = parent->parent;
447                 /* FIXME: Not quite true.  This implies !parent, but we don't
448                  * do multiple conditionals yet. */
449                 if (!parse_hash_if(cond, &line))
450                         goto unknown;
451         } else if (get_token(&line, "ifdef")) {
452                 bool brackets;
453                 cond->inverse = false;
454                 brackets = get_token(&line, "(");
455                 cond->symbol = get_symbol_token(cond, &line);
456                 if (!cond->symbol)
457                         goto unknown;
458                 if (brackets && !get_token(&line, ")"))
459                         goto unknown;
460         } else if (get_token(&line, "ifndef")) {
461                 bool brackets;
462                 cond->inverse = true;
463                 brackets = get_token(&line, "(");
464                 cond->symbol = get_symbol_token(cond, &line);
465                 if (!cond->symbol)
466                         goto unknown;
467                 if (brackets && !get_token(&line, ")"))
468                         goto unknown;
469         } else if (get_token(&line, "else")) {
470                 /* Malformed? */
471                 if (!parent)
472                         return NULL;
473
474                 *cond = *parent;
475                 cond->inverse = !cond->inverse;
476                 return cond;
477         } else if (get_token(&line, "endif")) {
478                 talloc_free(cond);
479                 /* Malformed? */
480                 if (!parent)
481                         return NULL;
482                 /* Back up one! */
483                 return parent->parent;
484         } else {
485                 /* Not a conditional. */
486                 talloc_free(cond);
487                 return parent;
488         }
489
490         if (!is_empty(line))
491                 goto unknown;
492         return cond;
493
494 unknown:
495         cond->type = PP_COND_UNKNOWN;
496         return cond;
497 }
498
499 /* This parser is rough, but OK if code is reasonably neat. */
500 struct line_info *get_ccan_line_info(struct ccan_file *f)
501 {
502         bool continued = false, in_comment = false;
503         struct pp_conditions *cond = NULL;
504         unsigned int i;
505
506         if (f->line_info)
507                 return f->line_info;
508
509         get_ccan_file_lines(f);
510         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
511
512         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
513                 char *p;
514                 bool still_doc_line;
515
516                 /* Current conditions apply to this line. */
517                 f->line_info[i].cond = cond;
518                 f->line_info[i].continued = continued;
519
520                 if (continued) {
521                         /* Same as last line. */
522                         f->line_info[i].type = f->line_info[i-1].type;
523                         /* Update in_comment. */
524                         remove_comments(f->lines[i], in_comment, &in_comment);
525                         continue;
526                 }
527
528                 /* Preprocessor directive? */
529                 if (!in_comment
530                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
531                         f->line_info[i].type = PREPROC_LINE;
532                         cond = analyze_directive(f, f->lines[i], cond);
533                         continue;
534                 }
535
536                 still_doc_line = (in_comment
537                                   && f->line_info[i-1].type == DOC_LINE);
538
539                 p = remove_comments(f->lines[i], in_comment, &in_comment);
540                 if (is_empty(p)) {
541                         if (strstarts(f->lines[i], "/**") || still_doc_line)
542                                 f->line_info[i].type = DOC_LINE;
543                         else
544                                 f->line_info[i].type = COMMENT_LINE;
545                 } else
546                         f->line_info[i].type = CODE_LINE;
547                 talloc_free(p);
548         }
549         return f->line_info;
550 }
551
552 struct symbol {
553         struct list_node list;
554         const char *name;
555         const unsigned int *value;
556 };
557
558 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
559 {
560         struct symbol *i;
561
562         list_for_each(syms, i, list)
563                 if (streq(sym, i->name))
564                         return i;
565         return NULL;
566 }
567
568 static enum line_compiled get_pp(struct pp_conditions *cond,
569                                  struct list_head *syms)
570 {
571         struct symbol *sym;
572         unsigned int val;
573         enum line_compiled parent, ret;
574
575         /* No conditions?  Easy. */
576         if (!cond)
577                 return COMPILED;
578
579         /* Check we get here at all. */
580         parent = get_pp(cond->parent, syms);
581         if (parent == NOT_COMPILED)
582                 return NOT_COMPILED;
583
584         if (cond->type == PP_COND_UNKNOWN)
585                 return MAYBE_COMPILED;
586
587         sym = find_symbol(syms, cond->symbol);
588         if (!sym)
589                 return MAYBE_COMPILED;
590
591         switch (cond->type) {
592         case PP_COND_IF:
593                 /* Undefined is 0. */
594                 val = sym->value ? *sym->value : 0;
595                 if (!val == cond->inverse)
596                         ret = COMPILED;
597                 else
598                         ret = NOT_COMPILED;
599                 break;
600
601         case PP_COND_IFDEF:
602                 if (cond->inverse == !sym->value)
603                         ret = COMPILED;
604                 else
605                         ret = NOT_COMPILED;
606                 break;
607
608         default:
609                 abort();
610         }
611
612         /* If parent didn't know, NO == NO, but YES == MAYBE. */
613         if (parent == MAYBE_COMPILED && ret == COMPILED)
614                 ret = MAYBE_COMPILED;
615         return ret;
616 }
617
618 static void add_symbol(struct list_head *head,
619                        const char *symbol, const unsigned int *value)
620 {
621         struct symbol *sym = talloc(head, struct symbol);
622         sym->name = symbol;
623         sym->value = value;
624         list_add(head, &sym->list);
625 }
626         
627 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
628                                     const char *symbol,
629                                     const unsigned int *value,
630                                     ...)
631 {
632         enum line_compiled ret;
633         struct list_head *head;
634         va_list ap;
635
636         head = talloc(NULL, struct list_head);
637         list_head_init(head);
638
639         va_start(ap, value);
640         add_symbol(head, symbol, value);
641
642         while ((symbol = va_arg(ap, const char *)) != NULL) {
643                 value = va_arg(ap, const unsigned int *);
644                 add_symbol(head, symbol, value);
645         }
646         ret = get_pp(cond, head);
647         talloc_free(head);
648         return ret;
649 }
650
651 void score_file_error(struct score *score, struct ccan_file *f, unsigned line,
652                       const char *errorfmt, ...)
653 {
654         va_list ap;
655
656         struct file_error *fe = talloc(score, struct file_error);
657         fe->file = f;
658         fe->line = line;
659         list_add_tail(&score->per_file_errors, &fe->list);
660
661         if (!score->error)
662                 score->error = talloc_strdup(score, "");
663         
664         if (verbose < 2 && strcount(score->error, "\n") > 5)
665                 return;
666
667         if (line)
668                 score->error = talloc_asprintf_append(score->error,
669                                                       "%s:%u:",
670                                                       f->fullname, line);
671         else
672                 score->error = talloc_asprintf_append(score->error,
673                                                       "%s:", f->fullname);
674
675         va_start(ap, errorfmt);
676         score->error = talloc_vasprintf_append(score->error, errorfmt, ap);
677         va_end(ap);
678         score->error = talloc_append_string(score->error, "\n");
679
680         if (verbose < 2 && strcount(score->error, "\n") > 5)
681                 score->error = talloc_append_string(score->error,
682                                     "... more (use -vv to see them all)\n");
683 }