configurator: HAVE_PROC_SELF_MAPS
[ccan] / tools / ccanlint / file_analysis.c
1 #include "config.h"
2 #include "ccanlint.h"
3 #include <ccan/talloc/talloc.h>
4 #include <ccan/str/str.h>
5 #include <ccan/str_talloc/str_talloc.h>
6 #include <ccan/talloc_link/talloc_link.h>
7 #include <ccan/hash/hash.h>
8 #include <ccan/htable/htable_type.h>
9 #include <ccan/grab_file/grab_file.h>
10 #include <ccan/noerr/noerr.h>
11 #include <ccan/foreach/foreach.h>
12 #include <ccan/asort/asort.h>
13 #include <ccan/array_size/array_size.h>
14 #include "../tools.h"
15 #include <unistd.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <fcntl.h>
19 #include <err.h>
20 #include <errno.h>
21 #include <dirent.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <assert.h>
25
26 const char *ccan_dir;
27
28 static size_t dir_hash(const char *name)
29 {
30         return hash(name, strlen(name), 0);
31 }
32
33 static const char *manifest_name(const struct manifest *m)
34 {
35         return m->dir;
36 }
37
38 static bool dir_cmp(const struct manifest *m, const char *dir)
39 {
40         return strcmp(m->dir, dir) == 0;
41 }
42
43 HTABLE_DEFINE_TYPE(struct manifest, manifest_name, dir_hash, dir_cmp,
44                    htable_manifest);
45 static struct htable_manifest *manifests;
46
47 const char *get_ccan_file_contents(struct ccan_file *f)
48 {
49         if (!f->contents) {
50                 f->contents = grab_file(f, f->fullname, &f->contents_size);
51                 if (!f->contents)
52                         err(1, "Reading file %s", f->fullname);
53         }
54         return f->contents;
55 }
56
57 char **get_ccan_file_lines(struct ccan_file *f)
58 {
59         if (!f->lines)
60                 f->lines = strsplit(f, get_ccan_file_contents(f), "\n");
61
62         /* FIXME: is f->num_lines necessary? */
63         f->num_lines = talloc_array_length(f->lines) - 1;
64         return f->lines;
65 }
66
67 struct list_head *get_ccan_file_docs(struct ccan_file *f)
68 {
69         if (!f->doc_sections) {
70                 get_ccan_file_lines(f);
71                 f->doc_sections = extract_doc_sections(f->lines);
72         }
73         return f->doc_sections;
74 }
75
76 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
77 {
78         struct ccan_file *f;
79         unsigned int i;
80
81         assert(dir[0] == '/');
82
83         f = talloc(ctx, struct ccan_file);
84         f->lines = NULL;
85         f->line_info = NULL;
86         f->doc_sections = NULL;
87         for (i = 0; i < ARRAY_SIZE(f->compiled); i++)
88                 f->compiled[i] = NULL;
89         f->name = talloc_steal(f, name);
90         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
91         f->contents = NULL;
92         f->simplified = NULL;
93         return f;
94 }
95
96 static void add_files(struct manifest *m, const char *dir)
97 {
98         DIR *d;
99         struct dirent *ent;
100         char **subs = NULL;
101
102         if (dir[0])
103                 d = opendir(dir);
104         else
105                 d = opendir(".");
106         if (!d)
107                 err(1, "Opening directory %s", dir[0] ? dir : ".");
108
109         while ((ent = readdir(d)) != NULL) {
110                 struct stat st;
111                 struct ccan_file *f;
112                 struct list_head *dest;
113                 bool is_c_src;
114
115                 if (ent->d_name[0] == '.')
116                         continue;
117
118                 f = new_ccan_file(m, m->dir,
119                                   talloc_asprintf(m, "%s%s",
120                                                   dir, ent->d_name));
121                 if (lstat(f->name, &st) != 0)
122                         err(1, "lstat %s", f->name);
123
124                 if (S_ISDIR(st.st_mode)) {
125                         size_t len = talloc_array_length(subs);
126                         subs = talloc_realloc(m, subs, char *, len+1);
127                         subs[len] = talloc_append_string(f->name, "/");
128                         continue;
129                 }
130                 if (!S_ISREG(st.st_mode)) {
131                         talloc_free(f);
132                         continue;
133                 }
134
135                 if (streq(f->name, "_info")) {
136                         m->info_file = f;
137                         continue;
138                 }
139
140                 is_c_src = strends(f->name, ".c");
141                 if (!is_c_src && !strends(f->name, ".h")) {
142                         dest = &m->other_files;
143                 } else if (!strchr(f->name, '/')) {
144                         if (is_c_src)
145                                 dest = &m->c_files;
146                         else
147                                 dest = &m->h_files;
148                 } else if (strstarts(f->name, "test/")) {
149                         if (is_c_src) {
150                                 if (strstarts(f->name, "test/api"))
151                                         dest = &m->api_tests;
152                                 else if (strstarts(f->name, "test/run"))
153                                         dest = &m->run_tests;
154                                 else if (strstarts(f->name, "test/compile_ok"))
155                                         dest = &m->compile_ok_tests;
156                                 else if (strstarts(f->name, "test/compile_fail"))
157                                         dest = &m->compile_fail_tests;
158                                 else
159                                         dest = &m->other_test_c_files;
160                         } else
161                                 dest = &m->other_test_files;
162                 } else
163                         dest = &m->other_files;
164
165                 list_add(dest, &f->list);
166         }
167         closedir(d);
168
169         /* Before we recurse, sanity check this is a ccan module. */ 
170         if (!dir[0]) {
171                 size_t i;
172
173                 if (!m->info_file
174                     && list_empty(&m->c_files)
175                     && list_empty(&m->h_files))
176                         errx(1, "No _info, C or H files found here!");
177
178                 for (i = 0; i < talloc_array_length(subs); i++)
179                         add_files(m, subs[i]);
180         }
181         talloc_free(subs);
182 }
183
184 static int cmp_names(struct ccan_file *const *a, struct ccan_file *const *b,
185                      void *unused)
186 {
187         return strcmp((*a)->name, (*b)->name);
188 }
189
190 static void sort_files(struct list_head *list)
191 {
192         struct ccan_file **files = NULL, *f;
193         unsigned int i, num;
194
195         num = 0;
196         while ((f = list_top(list, struct ccan_file, list)) != NULL) {
197                 files = talloc_realloc(NULL, files, struct ccan_file *, num+1);
198                 files[num++] = f;
199                 list_del(&f->list);
200         }
201         asort(files, num, cmp_names, NULL);
202
203         for (i = 0; i < num; i++)
204                 list_add_tail(list, &files[i]->list);
205         talloc_free(files);
206 }
207
208 struct manifest *get_manifest(const void *ctx, const char *dir)
209 {
210         struct manifest *m;
211         char *olddir, *canon_dir;
212         unsigned int len;
213         struct list_head *list;
214
215         if (!manifests) {
216                 manifests = talloc(NULL, struct htable_manifest);
217                 htable_manifest_init(manifests);
218         }
219
220         olddir = talloc_getcwd(NULL);
221         if (!olddir)
222                 err(1, "Getting current directory");
223
224         if (chdir(dir) != 0)
225                 err(1, "Failed to chdir to %s", dir);
226
227         canon_dir = talloc_getcwd(olddir);
228         if (!canon_dir)
229                 err(1, "Getting current directory");
230
231         m = htable_manifest_get(manifests, canon_dir);
232         if (m)
233                 goto done;
234
235         m = talloc_linked(ctx, talloc(NULL, struct manifest));
236         m->info_file = NULL;
237         m->compiled[COMPILE_NORMAL] = m->compiled[COMPILE_NOFEAT] = NULL;
238         m->dir = talloc_steal(m, canon_dir);
239         list_head_init(&m->c_files);
240         list_head_init(&m->h_files);
241         list_head_init(&m->api_tests);
242         list_head_init(&m->run_tests);
243         list_head_init(&m->compile_ok_tests);
244         list_head_init(&m->compile_fail_tests);
245         list_head_init(&m->other_test_c_files);
246         list_head_init(&m->other_test_files);
247         list_head_init(&m->other_files);
248         list_head_init(&m->examples);
249         list_head_init(&m->mangled_examples);
250         list_head_init(&m->deps);
251
252         len = strlen(m->dir);
253         while (len && m->dir[len-1] == '/')
254                 m->dir[--len] = '\0';
255
256         m->basename = strrchr(m->dir, '/');
257         if (!m->basename)
258                 errx(1, "I don't expect to be run from the root directory");
259         m->basename++;
260
261         /* We expect the ccan dir to be two levels above module dir. */
262         if (!ccan_dir) {
263                 char *p, *dir;
264                 dir = talloc_strdup(NULL, m->dir);
265                 p = strrchr(dir, '/');
266                 if (!p)
267                         errx(1, "I expect the ccan root directory in ../..");
268                 *p = '\0';
269                 p = strrchr(dir, '/');
270                 if (!p)
271                         errx(1, "I expect the ccan root directory in ../..");
272                 *p = '\0';
273                 ccan_dir = dir;
274         }
275
276         add_files(m, "");
277
278         /* Nicer to run tests in a predictable order. */
279         foreach_ptr(list, &m->api_tests, &m->run_tests, &m->compile_ok_tests,
280                     &m->compile_fail_tests)
281                 sort_files(list);
282
283         htable_manifest_add(manifests, m);
284
285 done:
286         if (chdir(olddir) != 0)
287                 err(1, "Returning to original directory '%s'", olddir);
288         talloc_free(olddir);
289
290         return m;
291 }
292
293
294 /**
295  * remove_comments - strip comments from a line, return copy.
296  * @line: line to copy
297  * @in_comment: are we already within a comment (from prev line).
298  * @unterminated: are we still in a comment for next line.
299  */
300 static char *remove_comments(const char *line, bool in_comment,
301                              bool *unterminated)
302 {
303         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
304
305         p = ret;
306         for (;;) {
307                 if (!in_comment) {
308                         /* Find first comment. */
309                         const char *old_comment = strstr(line, "/*");
310                         const char *new_comment = strstr(line, "//");
311                         const char *comment;
312
313                         if (new_comment && old_comment)
314                                 comment = new_comment < old_comment
315                                         ? new_comment : old_comment;
316                         else if (old_comment)
317                                 comment = old_comment;
318                         else if (new_comment)
319                                 comment = new_comment;
320                         else {
321                                 /* Nothing more. */
322                                 strcpy(p, line);
323                                 *unterminated = false;
324                                 break;
325                         }
326
327                         /* Copy up to comment. */
328                         memcpy(p, line, comment - line);
329                         p += comment - line;
330                         line += comment - line + 2;
331
332                         if (comment == new_comment) {
333                                 /* We're done: goes to EOL. */
334                                 p[0] = '\0';
335                                 *unterminated = false;
336                                 break;
337                         }
338                         in_comment = true;
339                 }
340
341                 if (in_comment) {
342                         const char *end = strstr(line, "*/");
343                         if (!end) {
344                                 *unterminated = true;
345                                 p[0] = '\0';
346                                 break;
347                         }
348                         line = end+2;
349                         in_comment = false;
350                 }
351         }
352         return ret;
353 }
354
355 static bool is_empty(const char *line)
356 {
357         return strspn(line, " \r\t") == strlen(line);
358 }
359
360 static bool continues(const char *line)
361 {
362         /* Technically, any odd number of these.  But who cares? */
363         return strends(line, "\\");
364 }
365
366 /* Get token if it's equal to token. */
367 bool get_token(const char **line, const char *token)
368 {
369         unsigned int toklen;
370
371         *line += strspn(*line, " \t");
372         if (cisalnum(token[0]) || token[0] == '_')
373                 toklen = strspn(*line, IDENT_CHARS);
374         else {
375                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
376                 toklen = strlen(token);
377         }
378
379         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
380                 *line += toklen;
381                 return true;
382         }
383         return false;
384 }
385
386 char *get_symbol_token(void *ctx, const char **line)
387 {
388         unsigned int toklen;
389         char *ret;
390
391         *line += strspn(*line, " \t");
392         toklen = strspn(*line, IDENT_CHARS);
393         if (!toklen)
394                 return NULL;
395         ret = talloc_strndup(ctx, *line, toklen);
396         *line += toklen;
397         return ret;
398 }
399
400 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
401 {
402         bool brackets, defined;
403
404         cond->inverse = get_token(line, "!");
405         defined = get_token(line, "defined");
406         brackets = get_token(line, "(");
407         cond->symbol = get_symbol_token(cond, line);
408         if (!cond->symbol)
409                 return false;
410         if (brackets && !get_token(line, ")"))
411                 return false;
412         if (!defined)
413                 cond->type = PP_COND_IF;
414
415         /* FIXME: We just chain them, ignoring operators. */
416         if (get_token(line, "||") || get_token(line, "&&")) {
417                 struct pp_conditions *sub = talloc(cond, struct pp_conditions);
418
419                 sub->parent = cond->parent;
420                 sub->type = PP_COND_IFDEF;
421                 if (parse_hash_if(sub, line))
422                         cond->parent = sub;
423         }
424
425         return true;
426 }
427
428 /* FIXME: Get serious! */
429 static struct pp_conditions *analyze_directive(struct ccan_file *f,
430                                                const char *line,
431                                                struct pp_conditions *parent)
432 {
433         struct pp_conditions *cond = talloc(f, struct pp_conditions);
434         bool unused;
435
436         line = remove_comments(line, false, &unused);
437
438         cond->parent = parent;
439         cond->type = PP_COND_IFDEF;
440
441         if (!get_token(&line, "#"))
442                 abort();
443
444         if (get_token(&line, "if")) {
445                 if (!parse_hash_if(cond, &line))
446                         goto unknown;
447         } else if (get_token(&line, "elif")) {
448                 /* Malformed? */
449                 if (!parent)
450                         return NULL;
451                 cond->parent = parent->parent;
452                 /* FIXME: Not quite true.  This implies !parent, but we don't
453                  * do multiple conditionals yet. */
454                 if (!parse_hash_if(cond, &line))
455                         goto unknown;
456         } else if (get_token(&line, "ifdef")) {
457                 bool brackets;
458                 cond->inverse = false;
459                 brackets = get_token(&line, "(");
460                 cond->symbol = get_symbol_token(cond, &line);
461                 if (!cond->symbol)
462                         goto unknown;
463                 if (brackets && !get_token(&line, ")"))
464                         goto unknown;
465         } else if (get_token(&line, "ifndef")) {
466                 bool brackets;
467                 cond->inverse = true;
468                 brackets = get_token(&line, "(");
469                 cond->symbol = get_symbol_token(cond, &line);
470                 if (!cond->symbol)
471                         goto unknown;
472                 if (brackets && !get_token(&line, ")"))
473                         goto unknown;
474         } else if (get_token(&line, "else")) {
475                 /* Malformed? */
476                 if (!parent)
477                         return NULL;
478
479                 *cond = *parent;
480                 cond->inverse = !cond->inverse;
481                 return cond;
482         } else if (get_token(&line, "endif")) {
483                 talloc_free(cond);
484                 /* Malformed? */
485                 if (!parent)
486                         return NULL;
487                 /* Back up one! */
488                 return parent->parent;
489         } else {
490                 /* Not a conditional. */
491                 talloc_free(cond);
492                 return parent;
493         }
494
495         if (!is_empty(line))
496                 goto unknown;
497         return cond;
498
499 unknown:
500         cond->type = PP_COND_UNKNOWN;
501         return cond;
502 }
503
504 /* This parser is rough, but OK if code is reasonably neat. */
505 struct line_info *get_ccan_line_info(struct ccan_file *f)
506 {
507         bool continued = false, in_comment = false;
508         struct pp_conditions *cond = NULL;
509         unsigned int i;
510
511         if (f->line_info)
512                 return f->line_info;
513
514         get_ccan_file_lines(f);
515         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
516
517         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
518                 char *p;
519                 bool still_doc_line;
520
521                 /* Current conditions apply to this line. */
522                 f->line_info[i].cond = cond;
523                 f->line_info[i].continued = continued;
524
525                 if (continued) {
526                         /* Same as last line. */
527                         f->line_info[i].type = f->line_info[i-1].type;
528                         /* Update in_comment. */
529                         remove_comments(f->lines[i], in_comment, &in_comment);
530                         continue;
531                 }
532
533                 /* Preprocessor directive? */
534                 if (!in_comment
535                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
536                         f->line_info[i].type = PREPROC_LINE;
537                         cond = analyze_directive(f, f->lines[i], cond);
538                         continue;
539                 }
540
541                 still_doc_line = (in_comment
542                                   && f->line_info[i-1].type == DOC_LINE);
543
544                 p = remove_comments(f->lines[i], in_comment, &in_comment);
545                 if (is_empty(p)) {
546                         if (strstarts(f->lines[i], "/**") || still_doc_line)
547                                 f->line_info[i].type = DOC_LINE;
548                         else
549                                 f->line_info[i].type = COMMENT_LINE;
550                 } else
551                         f->line_info[i].type = CODE_LINE;
552                 talloc_free(p);
553         }
554         return f->line_info;
555 }
556
557 struct symbol {
558         struct list_node list;
559         const char *name;
560         const unsigned int *value;
561 };
562
563 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
564 {
565         struct symbol *i;
566
567         list_for_each(syms, i, list)
568                 if (streq(sym, i->name))
569                         return i;
570         return NULL;
571 }
572
573 static enum line_compiled get_pp(struct pp_conditions *cond,
574                                  struct list_head *syms)
575 {
576         struct symbol *sym;
577         unsigned int val;
578         enum line_compiled parent, ret;
579
580         /* No conditions?  Easy. */
581         if (!cond)
582                 return COMPILED;
583
584         /* Check we get here at all. */
585         parent = get_pp(cond->parent, syms);
586         if (parent == NOT_COMPILED)
587                 return NOT_COMPILED;
588
589         if (cond->type == PP_COND_UNKNOWN)
590                 return MAYBE_COMPILED;
591
592         sym = find_symbol(syms, cond->symbol);
593         if (!sym)
594                 return MAYBE_COMPILED;
595
596         switch (cond->type) {
597         case PP_COND_IF:
598                 /* Undefined is 0. */
599                 val = sym->value ? *sym->value : 0;
600                 if (!val == cond->inverse)
601                         ret = COMPILED;
602                 else
603                         ret = NOT_COMPILED;
604                 break;
605
606         case PP_COND_IFDEF:
607                 if (cond->inverse == !sym->value)
608                         ret = COMPILED;
609                 else
610                         ret = NOT_COMPILED;
611                 break;
612
613         default:
614                 abort();
615         }
616
617         /* If parent didn't know, NO == NO, but YES == MAYBE. */
618         if (parent == MAYBE_COMPILED && ret == COMPILED)
619                 ret = MAYBE_COMPILED;
620         return ret;
621 }
622
623 static void add_symbol(struct list_head *head,
624                        const char *symbol, const unsigned int *value)
625 {
626         struct symbol *sym = talloc(head, struct symbol);
627         sym->name = symbol;
628         sym->value = value;
629         list_add(head, &sym->list);
630 }
631         
632 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
633                                     const char *symbol,
634                                     const unsigned int *value,
635                                     ...)
636 {
637         enum line_compiled ret;
638         struct list_head *head;
639         va_list ap;
640
641         head = talloc(NULL, struct list_head);
642         list_head_init(head);
643
644         va_start(ap, value);
645         add_symbol(head, symbol, value);
646
647         while ((symbol = va_arg(ap, const char *)) != NULL) {
648                 value = va_arg(ap, const unsigned int *);
649                 add_symbol(head, symbol, value);
650         }
651         ret = get_pp(cond, head);
652         talloc_free(head);
653         return ret;
654 }
655
656 void score_file_error(struct score *score, struct ccan_file *f, unsigned line,
657                       const char *errorfmt, ...)
658 {
659         va_list ap;
660
661         struct file_error *fe = talloc(score, struct file_error);
662         fe->file = f;
663         fe->line = line;
664         list_add_tail(&score->per_file_errors, &fe->list);
665
666         if (!score->error)
667                 score->error = talloc_strdup(score, "");
668         
669         if (verbose < 2 && strcount(score->error, "\n") > 5)
670                 return;
671
672         if (line)
673                 score->error = talloc_asprintf_append(score->error,
674                                                       "%s:%u:",
675                                                       f->fullname, line);
676         else
677                 score->error = talloc_asprintf_append(score->error,
678                                                       "%s:", f->fullname);
679
680         va_start(ap, errorfmt);
681         score->error = talloc_vasprintf_append(score->error, errorfmt, ap);
682         va_end(ap);
683         score->error = talloc_append_string(score->error, "\n");
684
685         if (verbose < 2 && strcount(score->error, "\n") > 5)
686                 score->error = talloc_append_string(score->error,
687                                     "... more (use -vv to see them all)\n");
688 }