]> git.ozlabs.org Git - ccan/blob - tools/ccanlint/file_analysis.c
configurator: Add test for glibc's qsort_r.
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/talloc_link/talloc_link.h>
6 #include <ccan/hash/hash.h>
7 #include <ccan/htable/htable_type.h>
8 #include <ccan/grab_file/grab_file.h>
9 #include <ccan/noerr/noerr.h>
10 #include <ccan/foreach/foreach.h>
11 #include <ccan/asort/asort.h>
12 #include "../tools.h"
13 #include <unistd.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include <fcntl.h>
17 #include <err.h>
18 #include <errno.h>
19 #include <dirent.h>
20 #include <ctype.h>
21 #include <stdarg.h>
22 #include <assert.h>
23
24 const char *ccan_dir;
25
26 static size_t dir_hash(const char *name)
27 {
28         return hash(name, strlen(name), 0);
29 }
30
31 static const char *manifest_name(const struct manifest *m)
32 {
33         return m->dir;
34 }
35
36 static bool dir_cmp(const struct manifest *m, const char *dir)
37 {
38         return strcmp(m->dir, dir) == 0;
39 }
40
41 HTABLE_DEFINE_TYPE(struct manifest, manifest_name, dir_hash, dir_cmp, manifest);
42 static struct htable_manifest *manifests;
43
44 const char *get_ccan_file_contents(struct ccan_file *f)
45 {
46         if (!f->contents) {
47                 f->contents = grab_file(f, f->fullname, &f->contents_size);
48                 if (!f->contents)
49                         err(1, "Reading file %s", f->fullname);
50         }
51         return f->contents;
52 }
53
54 char **get_ccan_file_lines(struct ccan_file *f)
55 {
56         if (!f->lines)
57                 f->lines = strsplit(f, get_ccan_file_contents(f), "\n");
58
59         /* FIXME: is f->num_lines necessary? */
60         f->num_lines = talloc_array_length(f->lines) - 1;
61         return f->lines;
62 }
63
64 struct list_head *get_ccan_file_docs(struct ccan_file *f)
65 {
66         if (!f->doc_sections) {
67                 get_ccan_file_lines(f);
68                 f->doc_sections = extract_doc_sections(f->lines);
69         }
70         return f->doc_sections;
71 }
72
73 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
74 {
75         struct ccan_file *f;
76
77         assert(dir[0] == '/');
78
79         f = talloc(ctx, struct ccan_file);
80         f->lines = NULL;
81         f->line_info = NULL;
82         f->doc_sections = NULL;
83         f->compiled = NULL;
84         f->name = talloc_steal(f, name);
85         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
86         f->contents = NULL;
87         f->cov_compiled = NULL;
88         return f;
89 }
90
91 static void add_files(struct manifest *m, const char *dir)
92 {
93         DIR *d;
94         struct dirent *ent;
95         char **subs = NULL;
96
97         if (dir[0])
98                 d = opendir(dir);
99         else
100                 d = opendir(".");
101         if (!d)
102                 err(1, "Opening directory %s", dir[0] ? dir : ".");
103
104         while ((ent = readdir(d)) != NULL) {
105                 struct stat st;
106                 struct ccan_file *f;
107                 struct list_head *dest;
108                 bool is_c_src;
109
110                 if (ent->d_name[0] == '.')
111                         continue;
112
113                 f = new_ccan_file(m, m->dir,
114                                   talloc_asprintf(m, "%s%s",
115                                                   dir, ent->d_name));
116                 if (lstat(f->name, &st) != 0)
117                         err(1, "lstat %s", f->name);
118
119                 if (S_ISDIR(st.st_mode)) {
120                         size_t len = talloc_array_length(subs);
121                         subs = talloc_realloc(m, subs, char *, len+1);
122                         subs[len] = talloc_append_string(f->name, "/");
123                         continue;
124                 }
125                 if (!S_ISREG(st.st_mode)) {
126                         talloc_free(f);
127                         continue;
128                 }
129
130                 if (streq(f->name, "_info")) {
131                         m->info_file = f;
132                         continue;
133                 }
134
135                 is_c_src = strends(f->name, ".c");
136                 if (!is_c_src && !strends(f->name, ".h")) {
137                         dest = &m->other_files;
138                 } else if (!strchr(f->name, '/')) {
139                         if (is_c_src)
140                                 dest = &m->c_files;
141                         else
142                                 dest = &m->h_files;
143                 } else if (strstarts(f->name, "test/")) {
144                         if (is_c_src) {
145                                 if (strstarts(f->name, "test/api"))
146                                         dest = &m->api_tests;
147                                 else if (strstarts(f->name, "test/run"))
148                                         dest = &m->run_tests;
149                                 else if (strstarts(f->name, "test/compile_ok"))
150                                         dest = &m->compile_ok_tests;
151                                 else if (strstarts(f->name, "test/compile_fail"))
152                                         dest = &m->compile_fail_tests;
153                                 else
154                                         dest = &m->other_test_c_files;
155                         } else
156                                 dest = &m->other_test_files;
157                 } else
158                         dest = &m->other_files;
159
160                 list_add(dest, &f->list);
161         }
162         closedir(d);
163
164         /* Before we recurse, sanity check this is a ccan module. */ 
165         if (!dir[0]) {
166                 size_t i;
167
168                 if (!m->info_file
169                     && list_empty(&m->c_files)
170                     && list_empty(&m->h_files))
171                         errx(1, "No _info, C or H files found here!");
172
173                 for (i = 0; i < talloc_array_length(subs); i++)
174                         add_files(m, subs[i]);
175         }
176         talloc_free(subs);
177 }
178
179 static int cmp_names(struct ccan_file *const *a, struct ccan_file *const *b,
180                      void *unused)
181 {
182         return strcmp((*a)->name, (*b)->name);
183 }
184
185 static void sort_files(struct list_head *list)
186 {
187         struct ccan_file **files = NULL, *f;
188         unsigned int i, num;
189
190         num = 0;
191         while ((f = list_top(list, struct ccan_file, list)) != NULL) {
192                 files = talloc_realloc(NULL, files, struct ccan_file *, num+1);
193                 files[num++] = f;
194                 list_del(&f->list);
195         }
196         asort(files, num, cmp_names, NULL);
197
198         for (i = 0; i < num; i++)
199                 list_add_tail(list, &files[i]->list);
200         talloc_free(files);
201 }
202
203 struct manifest *get_manifest(const void *ctx, const char *dir)
204 {
205         struct manifest *m;
206         char *olddir, *canon_dir;
207         unsigned int len;
208         struct list_head *list;
209
210         if (!manifests)
211                 manifests = htable_manifest_new();
212
213         olddir = talloc_getcwd(NULL);
214         if (!olddir)
215                 err(1, "Getting current directory");
216
217         if (chdir(dir) != 0)
218                 err(1, "Failed to chdir to %s", dir);
219
220         canon_dir = talloc_getcwd(olddir);
221         if (!canon_dir)
222                 err(1, "Getting current directory");
223
224         m = htable_manifest_get(manifests, canon_dir);
225         if (m)
226                 goto done;
227
228         m = talloc_linked(ctx, talloc(NULL, struct manifest));
229         m->info_file = NULL;
230         m->compiled = NULL;
231         m->dir = talloc_steal(m, canon_dir);
232         list_head_init(&m->c_files);
233         list_head_init(&m->h_files);
234         list_head_init(&m->api_tests);
235         list_head_init(&m->run_tests);
236         list_head_init(&m->compile_ok_tests);
237         list_head_init(&m->compile_fail_tests);
238         list_head_init(&m->other_test_c_files);
239         list_head_init(&m->other_test_files);
240         list_head_init(&m->other_files);
241         list_head_init(&m->examples);
242         list_head_init(&m->mangled_examples);
243         list_head_init(&m->deps);
244
245         len = strlen(m->dir);
246         while (len && m->dir[len-1] == '/')
247                 m->dir[--len] = '\0';
248
249         m->basename = strrchr(m->dir, '/');
250         if (!m->basename)
251                 errx(1, "I don't expect to be run from the root directory");
252         m->basename++;
253
254         /* We expect the ccan dir to be two levels above module dir. */
255         if (!ccan_dir) {
256                 char *p, *dir;
257                 dir = talloc_strdup(NULL, m->dir);
258                 p = strrchr(dir, '/');
259                 if (!p)
260                         errx(1, "I expect the ccan root directory in ../..");
261                 *p = '\0';
262                 p = strrchr(dir, '/');
263                 if (!p)
264                         errx(1, "I expect the ccan root directory in ../..");
265                 *p = '\0';
266                 ccan_dir = dir;
267         }
268
269         add_files(m, "");
270
271         /* Nicer to run tests in a predictable order. */
272         foreach_ptr(list, &m->api_tests, &m->run_tests, &m->compile_ok_tests,
273                     &m->compile_fail_tests)
274                 sort_files(list);
275
276         htable_manifest_add(manifests, m);
277
278 done:
279         if (chdir(olddir) != 0)
280                 err(1, "Returning to original directory '%s'", olddir);
281         talloc_free(olddir);
282
283         return m;
284 }
285
286
287 /**
288  * remove_comments - strip comments from a line, return copy.
289  * @line: line to copy
290  * @in_comment: are we already within a comment (from prev line).
291  * @unterminated: are we still in a comment for next line.
292  */
293 static char *remove_comments(const char *line, bool in_comment,
294                              bool *unterminated)
295 {
296         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
297
298         p = ret;
299         for (;;) {
300                 if (!in_comment) {
301                         /* Find first comment. */
302                         const char *old_comment = strstr(line, "/*");
303                         const char *new_comment = strstr(line, "//");
304                         const char *comment;
305
306                         if (new_comment && old_comment)
307                                 comment = new_comment < old_comment
308                                         ? new_comment : old_comment;
309                         else if (old_comment)
310                                 comment = old_comment;
311                         else if (new_comment)
312                                 comment = new_comment;
313                         else {
314                                 /* Nothing more. */
315                                 strcpy(p, line);
316                                 *unterminated = false;
317                                 break;
318                         }
319
320                         /* Copy up to comment. */
321                         memcpy(p, line, comment - line);
322                         p += comment - line;
323                         line += comment - line + 2;
324
325                         if (comment == new_comment) {
326                                 /* We're done: goes to EOL. */
327                                 p[0] = '\0';
328                                 *unterminated = false;
329                                 break;
330                         }
331                         in_comment = true;
332                 }
333
334                 if (in_comment) {
335                         const char *end = strstr(line, "*/");
336                         if (!end) {
337                                 *unterminated = true;
338                                 p[0] = '\0';
339                                 break;
340                         }
341                         line = end+2;
342                         in_comment = false;
343                 }
344         }
345         return ret;
346 }
347
348 static bool is_empty(const char *line)
349 {
350         return strspn(line, " \t") == strlen(line);
351 }
352
353 static bool continues(const char *line)
354 {
355         /* Technically, any odd number of these.  But who cares? */
356         return strends(line, "\\");
357 }
358
359 /* Get token if it's equal to token. */
360 bool get_token(const char **line, const char *token)
361 {
362         unsigned int toklen;
363
364         *line += strspn(*line, " \t");
365         if (cisalnum(token[0]) || token[0] == '_')
366                 toklen = strspn(*line, IDENT_CHARS);
367         else {
368                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
369                 toklen = strlen(token);
370         }
371
372         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
373                 *line += toklen;
374                 return true;
375         }
376         return false;
377 }
378
379 char *get_symbol_token(void *ctx, const char **line)
380 {
381         unsigned int toklen;
382         char *ret;
383
384         *line += strspn(*line, " \t");
385         toklen = strspn(*line, IDENT_CHARS);
386         if (!toklen)
387                 return NULL;
388         ret = talloc_strndup(ctx, *line, toklen);
389         *line += toklen;
390         return ret;
391 }
392
393 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
394 {
395         bool brackets, defined;
396
397         cond->inverse = get_token(line, "!");
398         defined = get_token(line, "defined");
399         brackets = get_token(line, "(");
400         cond->symbol = get_symbol_token(cond, line);
401         if (!cond->symbol)
402                 return false;
403         if (brackets && !get_token(line, ")"))
404                 return false;
405         if (!defined)
406                 cond->type = PP_COND_IF;
407
408         /* FIXME: We just chain them, ignoring operators. */
409         if (get_token(line, "||") || get_token(line, "&&")) {
410                 struct pp_conditions *sub = talloc(cond, struct pp_conditions);
411
412                 sub->parent = cond->parent;
413                 sub->type = PP_COND_IFDEF;
414                 if (parse_hash_if(sub, line))
415                         cond->parent = sub;
416         }
417
418         return true;
419 }
420
421 /* FIXME: Get serious! */
422 static struct pp_conditions *analyze_directive(struct ccan_file *f,
423                                                const char *line,
424                                                struct pp_conditions *parent)
425 {
426         struct pp_conditions *cond = talloc(f, struct pp_conditions);
427         bool unused;
428
429         line = remove_comments(line, false, &unused);
430
431         cond->parent = parent;
432         cond->type = PP_COND_IFDEF;
433
434         if (!get_token(&line, "#"))
435                 abort();
436
437         if (get_token(&line, "if")) {
438                 if (!parse_hash_if(cond, &line))
439                         goto unknown;
440         } else if (get_token(&line, "elif")) {
441                 /* Malformed? */
442                 if (!parent)
443                         return NULL;
444                 cond->parent = parent->parent;
445                 /* FIXME: Not quite true.  This implies !parent, but we don't
446                  * do multiple conditionals yet. */
447                 if (!parse_hash_if(cond, &line))
448                         goto unknown;
449         } else if (get_token(&line, "ifdef")) {
450                 bool brackets;
451                 cond->inverse = false;
452                 brackets = get_token(&line, "(");
453                 cond->symbol = get_symbol_token(cond, &line);
454                 if (!cond->symbol)
455                         goto unknown;
456                 if (brackets && !get_token(&line, ")"))
457                         goto unknown;
458         } else if (get_token(&line, "ifndef")) {
459                 bool brackets;
460                 cond->inverse = true;
461                 brackets = get_token(&line, "(");
462                 cond->symbol = get_symbol_token(cond, &line);
463                 if (!cond->symbol)
464                         goto unknown;
465                 if (brackets && !get_token(&line, ")"))
466                         goto unknown;
467         } else if (get_token(&line, "else")) {
468                 /* Malformed? */
469                 if (!parent)
470                         return NULL;
471
472                 *cond = *parent;
473                 cond->inverse = !cond->inverse;
474                 return cond;
475         } else if (get_token(&line, "endif")) {
476                 talloc_free(cond);
477                 /* Malformed? */
478                 if (!parent)
479                         return NULL;
480                 /* Back up one! */
481                 return parent->parent;
482         } else {
483                 /* Not a conditional. */
484                 talloc_free(cond);
485                 return parent;
486         }
487
488         if (!is_empty(line))
489                 goto unknown;
490         return cond;
491
492 unknown:
493         cond->type = PP_COND_UNKNOWN;
494         return cond;
495 }
496
497 /* This parser is rough, but OK if code is reasonably neat. */
498 struct line_info *get_ccan_line_info(struct ccan_file *f)
499 {
500         bool continued = false, in_comment = false;
501         struct pp_conditions *cond = NULL;
502         unsigned int i;
503
504         if (f->line_info)
505                 return f->line_info;
506
507         get_ccan_file_lines(f);
508         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
509
510         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
511                 char *p;
512                 bool still_doc_line;
513
514                 /* Current conditions apply to this line. */
515                 f->line_info[i].cond = cond;
516                 f->line_info[i].continued = continued;
517
518                 if (continued) {
519                         /* Same as last line. */
520                         f->line_info[i].type = f->line_info[i-1].type;
521                         /* Update in_comment. */
522                         remove_comments(f->lines[i], in_comment, &in_comment);
523                         continue;
524                 }
525
526                 /* Preprocessor directive? */
527                 if (!in_comment
528                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
529                         f->line_info[i].type = PREPROC_LINE;
530                         cond = analyze_directive(f, f->lines[i], cond);
531                         continue;
532                 }
533
534                 still_doc_line = (in_comment
535                                   && f->line_info[i-1].type == DOC_LINE);
536
537                 p = remove_comments(f->lines[i], in_comment, &in_comment);
538                 if (is_empty(p)) {
539                         if (strstarts(f->lines[i], "/**") || still_doc_line)
540                                 f->line_info[i].type = DOC_LINE;
541                         else
542                                 f->line_info[i].type = COMMENT_LINE;
543                 } else
544                         f->line_info[i].type = CODE_LINE;
545                 talloc_free(p);
546         }
547         return f->line_info;
548 }
549
550 struct symbol {
551         struct list_node list;
552         const char *name;
553         const unsigned int *value;
554 };
555
556 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
557 {
558         struct symbol *i;
559
560         list_for_each(syms, i, list)
561                 if (streq(sym, i->name))
562                         return i;
563         return NULL;
564 }
565
566 static enum line_compiled get_pp(struct pp_conditions *cond,
567                                  struct list_head *syms)
568 {
569         struct symbol *sym;
570         unsigned int val;
571         enum line_compiled parent, ret;
572
573         /* No conditions?  Easy. */
574         if (!cond)
575                 return COMPILED;
576
577         /* Check we get here at all. */
578         parent = get_pp(cond->parent, syms);
579         if (parent == NOT_COMPILED)
580                 return NOT_COMPILED;
581
582         if (cond->type == PP_COND_UNKNOWN)
583                 return MAYBE_COMPILED;
584
585         sym = find_symbol(syms, cond->symbol);
586         if (!sym)
587                 return MAYBE_COMPILED;
588
589         switch (cond->type) {
590         case PP_COND_IF:
591                 /* Undefined is 0. */
592                 val = sym->value ? *sym->value : 0;
593                 if (!val == cond->inverse)
594                         ret = COMPILED;
595                 else
596                         ret = NOT_COMPILED;
597                 break;
598
599         case PP_COND_IFDEF:
600                 if (cond->inverse == !sym->value)
601                         ret = COMPILED;
602                 else
603                         ret = NOT_COMPILED;
604                 break;
605
606         default:
607                 abort();
608         }
609
610         /* If parent didn't know, NO == NO, but YES == MAYBE. */
611         if (parent == MAYBE_COMPILED && ret == COMPILED)
612                 ret = MAYBE_COMPILED;
613         return ret;
614 }
615
616 static void add_symbol(struct list_head *head,
617                        const char *symbol, const unsigned int *value)
618 {
619         struct symbol *sym = talloc(head, struct symbol);
620         sym->name = symbol;
621         sym->value = value;
622         list_add(head, &sym->list);
623 }
624         
625 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
626                                     const char *symbol,
627                                     const unsigned int *value,
628                                     ...)
629 {
630         enum line_compiled ret;
631         struct list_head *head;
632         va_list ap;
633
634         head = talloc(NULL, struct list_head);
635         list_head_init(head);
636
637         va_start(ap, value);
638         add_symbol(head, symbol, value);
639
640         while ((symbol = va_arg(ap, const char *)) != NULL) {
641                 value = va_arg(ap, const unsigned int *);
642                 add_symbol(head, symbol, value);
643         }
644         ret = get_pp(cond, head);
645         talloc_free(head);
646         return ret;
647 }
648
649 void score_file_error(struct score *score, struct ccan_file *f, unsigned line,
650                       const char *errorfmt, ...)
651 {
652         va_list ap;
653
654         struct file_error *fe = talloc(score, struct file_error);
655         fe->file = f;
656         fe->line = line;
657         list_add_tail(&score->per_file_errors, &fe->list);
658
659         if (!score->error)
660                 score->error = talloc_strdup(score, "");
661         
662         if (verbose < 2 && strcount(score->error, "\n") > 5)
663                 return;
664
665         if (line)
666                 score->error = talloc_asprintf_append(score->error,
667                                                       "%s:%u:",
668                                                       f->fullname, line);
669         else
670                 score->error = talloc_asprintf_append(score->error,
671                                                       "%s:", f->fullname);
672
673         va_start(ap, errorfmt);
674         score->error = talloc_vasprintf_append(score->error, errorfmt, ap);
675         va_end(ap);
676         score->error = talloc_append_string(score->error, "\n");
677
678         if (verbose < 2 && strcount(score->error, "\n") > 5)
679                 score->error = talloc_append_string(score->error,
680                                     "... more (use -vv to see them all)\n");
681 }