ccanlint: fix error with --target=build
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include <ccan/foreach/foreach.h>
8 #include <ccan/asort/asort.h>
9 #include "../tools.h"
10 #include <unistd.h>
11 #include <sys/types.h>
12 #include <sys/stat.h>
13 #include <fcntl.h>
14 #include <err.h>
15 #include <errno.h>
16 #include <dirent.h>
17 #include <ctype.h>
18 #include <stdarg.h>
19 #include <assert.h>
20
21 const char *ccan_dir;
22
23 const char *get_ccan_file_contents(struct ccan_file *f)
24 {
25         if (!f->contents) {
26                 f->contents = grab_file(f, f->fullname, &f->contents_size);
27                 if (!f->contents)
28                         err(1, "Reading file %s", f->fullname);
29         }
30         return f->contents;
31 }
32
33 char **get_ccan_file_lines(struct ccan_file *f)
34 {
35         if (!f->lines)
36                 f->lines = strsplit(f, get_ccan_file_contents(f),
37                                     "\n", &f->num_lines);
38
39         return f->lines;
40 }
41
42 struct list_head *get_ccan_file_docs(struct ccan_file *f)
43 {
44         if (!f->doc_sections) {
45                 get_ccan_file_lines(f);
46                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
47         }
48         return f->doc_sections;
49 }
50
51 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
52 {
53         struct ccan_file *f;
54
55         assert(dir[0] == '/');
56
57         f = talloc(ctx, struct ccan_file);
58         f->lines = NULL;
59         f->line_info = NULL;
60         f->doc_sections = NULL;
61         f->compiled = NULL;
62         f->name = talloc_steal(f, name);
63         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
64         f->contents = NULL;
65         f->cov_compiled = NULL;
66         return f;
67 }
68
69 static void add_files(struct manifest *m, const char *dir)
70 {
71         DIR *d;
72         struct dirent *ent;
73
74         if (dir[0])
75                 d = opendir(dir);
76         else
77                 d = opendir(".");
78         if (!d)
79                 err(1, "Opening directory %s", dir[0] ? dir : ".");
80
81         while ((ent = readdir(d)) != NULL) {
82                 struct stat st;
83                 struct ccan_file *f;
84                 struct list_head *dest;
85                 bool is_c_src;
86
87                 if (ent->d_name[0] == '.')
88                         continue;
89
90                 f = new_ccan_file(m, m->dir,
91                                   talloc_asprintf(m, "%s%s",
92                                                   dir, ent->d_name));
93                 if (lstat(f->name, &st) != 0)
94                         err(1, "lstat %s", f->name);
95
96                 if (S_ISDIR(st.st_mode)) {
97                         f->name = talloc_append_string(f->name, "/");
98                         add_files(m, f->name);
99                         continue;
100                 }
101                 if (!S_ISREG(st.st_mode)) {
102                         talloc_free(f);
103                         continue;
104                 }
105
106                 if (streq(f->name, "_info")) {
107                         m->info_file = f;
108                         continue;
109                 }
110
111                 is_c_src = strends(f->name, ".c");
112                 if (!is_c_src && !strends(f->name, ".h")) {
113                         dest = &m->other_files;
114                         continue;
115                 }
116
117                 if (!strchr(f->name, '/')) {
118                         if (is_c_src)
119                                 dest = &m->c_files;
120                         else
121                                 dest = &m->h_files;
122                 } else if (strstarts(f->name, "test/")) {
123                         if (is_c_src) {
124                                 if (strstarts(f->name, "test/api"))
125                                         dest = &m->api_tests;
126                                 else if (strstarts(f->name, "test/run"))
127                                         dest = &m->run_tests;
128                                 else if (strstarts(f->name, "test/compile_ok"))
129                                         dest = &m->compile_ok_tests;
130                                 else if (strstarts(f->name, "test/compile_fail"))
131                                         dest = &m->compile_fail_tests;
132                                 else
133                                         dest = &m->other_test_c_files;
134                         } else
135                                 dest = &m->other_test_files;
136                 } else
137                         dest = &m->other_files;
138
139                 list_add(dest, &f->list);
140         }
141         closedir(d);
142 }
143
144 static int cmp_names(struct ccan_file *const *a, struct ccan_file *const *b,
145                      void *unused)
146 {
147         return strcmp((*a)->name, (*b)->name);
148 }
149
150 static void sort_files(struct list_head *list)
151 {
152         struct ccan_file **files = NULL, *f;
153         unsigned int i, num;
154
155         num = 0;
156         while ((f = list_top(list, struct ccan_file, list)) != NULL) {
157                 files = talloc_realloc(NULL, files, struct ccan_file *, num+1);
158                 files[num++] = f;
159                 list_del(&f->list);
160         }
161         asort(files, num, cmp_names, NULL);
162
163         for (i = 0; i < num; i++)
164                 list_add_tail(list, &files[i]->list);
165         talloc_free(files);
166 }
167
168 struct manifest *get_manifest(const void *ctx, const char *dir)
169 {
170         struct manifest *m = talloc(ctx, struct manifest);
171         char *olddir;
172         unsigned int len;
173         struct list_head *list;
174
175         m->info_file = NULL;
176         m->compiled = NULL;
177         list_head_init(&m->c_files);
178         list_head_init(&m->h_files);
179         list_head_init(&m->api_tests);
180         list_head_init(&m->run_tests);
181         list_head_init(&m->compile_ok_tests);
182         list_head_init(&m->compile_fail_tests);
183         list_head_init(&m->other_test_c_files);
184         list_head_init(&m->other_test_files);
185         list_head_init(&m->other_files);
186         list_head_init(&m->examples);
187         list_head_init(&m->mangled_examples);
188         list_head_init(&m->deps);
189
190         olddir = talloc_getcwd(NULL);
191         if (!olddir)
192                 err(1, "Getting current directory");
193
194         if (chdir(dir) != 0)
195                 err(1, "Failed to chdir to %s", dir);
196
197         m->dir = talloc_getcwd(m);
198         if (!m->dir)
199                 err(1, "Getting current directory");
200
201         len = strlen(m->dir);
202         while (len && m->dir[len-1] == '/')
203                 m->dir[--len] = '\0';
204
205         m->basename = strrchr(m->dir, '/');
206         if (!m->basename)
207                 errx(1, "I don't expect to be run from the root directory");
208         m->basename++;
209
210         /* We expect the ccan dir to be two levels above module dir. */
211         if (!ccan_dir) {
212                 char *p;
213                 ccan_dir = talloc_strdup(NULL, m->dir);
214                 p = strrchr(ccan_dir, '/');
215                 *p = '\0';
216                 p = strrchr(ccan_dir, '/');
217                 *p = '\0';
218         }
219
220         add_files(m, "");
221
222         /* Nicer to run tests in a predictable order. */
223         foreach_ptr(list, &m->api_tests, &m->run_tests, &m->compile_ok_tests,
224                     &m->compile_fail_tests)
225                 sort_files(list);
226
227         if (chdir(olddir) != 0)
228                 err(1, "Returning to original directory '%s'", olddir);
229         talloc_free(olddir);
230
231         return m;
232 }
233
234
235 /**
236  * remove_comments - strip comments from a line, return copy.
237  * @line: line to copy
238  * @in_comment: are we already within a comment (from prev line).
239  * @unterminated: are we still in a comment for next line.
240  */
241 static char *remove_comments(const char *line, bool in_comment,
242                              bool *unterminated)
243 {
244         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
245
246         p = ret;
247         for (;;) {
248                 if (!in_comment) {
249                         /* Find first comment. */
250                         const char *old_comment = strstr(line, "/*");
251                         const char *new_comment = strstr(line, "//");
252                         const char *comment;
253
254                         if (new_comment && old_comment)
255                                 comment = new_comment < old_comment
256                                         ? new_comment : old_comment;
257                         else if (old_comment)
258                                 comment = old_comment;
259                         else if (new_comment)
260                                 comment = new_comment;
261                         else {
262                                 /* Nothing more. */
263                                 strcpy(p, line);
264                                 *unterminated = false;
265                                 break;
266                         }
267
268                         /* Copy up to comment. */
269                         memcpy(p, line, comment - line);
270                         p += comment - line;
271                         line += comment - line + 2;
272
273                         if (comment == new_comment) {
274                                 /* We're done: goes to EOL. */
275                                 p[0] = '\0';
276                                 *unterminated = false;
277                                 break;
278                         }
279                         in_comment = true;
280                 }
281
282                 if (in_comment) {
283                         const char *end = strstr(line, "*/");
284                         if (!end) {
285                                 *unterminated = true;
286                                 p[0] = '\0';
287                                 break;
288                         }
289                         line = end+2;
290                         in_comment = false;
291                 }
292         }
293         return ret;
294 }
295
296 static bool is_empty(const char *line)
297 {
298         return strspn(line, " \t") == strlen(line);
299 }
300
301 static bool continues(const char *line)
302 {
303         /* Technically, any odd number of these.  But who cares? */
304         return strends(line, "\\");
305 }
306
307 /* Get token if it's equal to token. */
308 bool get_token(const char **line, const char *token)
309 {
310         unsigned int toklen;
311
312         *line += strspn(*line, " \t");
313         if (isalnum(token[0]) || token[0] == '_')
314                 toklen = strspn(*line, IDENT_CHARS);
315         else {
316                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
317                 toklen = strlen(token);
318         }
319
320         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
321                 *line += toklen;
322                 return true;
323         }
324         return false;
325 }
326
327 char *get_symbol_token(void *ctx, const char **line)
328 {
329         unsigned int toklen;
330         char *ret;
331
332         *line += strspn(*line, " \t");
333         toklen = strspn(*line, IDENT_CHARS);
334         if (!toklen)
335                 return NULL;
336         ret = talloc_strndup(ctx, *line, toklen);
337         *line += toklen;
338         return ret;
339 }
340
341 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
342 {
343         bool brackets, defined;
344
345         cond->inverse = get_token(line, "!");
346         defined = get_token(line, "defined");
347         brackets = get_token(line, "(");
348         cond->symbol = get_symbol_token(cond, line);
349         if (!cond->symbol)
350                 return false;
351         if (brackets && !get_token(line, ")"))
352                 return false;
353         if (!defined)
354                 cond->type = PP_COND_IF;
355         return true;
356 }
357
358 /* FIXME: Get serious! */
359 static struct pp_conditions *analyze_directive(struct ccan_file *f,
360                                                const char *line,
361                                                struct pp_conditions *parent)
362 {
363         struct pp_conditions *cond = talloc(f, struct pp_conditions);
364         bool unused;
365
366         line = remove_comments(line, false, &unused);
367
368         cond->parent = parent;
369         cond->type = PP_COND_IFDEF;
370
371         if (!get_token(&line, "#"))
372                 abort();
373
374         if (get_token(&line, "if")) {
375                 if (!parse_hash_if(cond, &line))
376                         goto unknown;
377         } else if (get_token(&line, "elif")) {
378                 /* Malformed? */
379                 if (!parent)
380                         return NULL;
381                 cond->parent = parent->parent;
382                 /* FIXME: Not quite true.  This implies !parent, but we don't
383                  * do multiple conditionals yet. */
384                 if (!parse_hash_if(cond, &line))
385                         goto unknown;
386         } else if (get_token(&line, "ifdef")) {
387                 bool brackets;
388                 cond->inverse = false;
389                 brackets = get_token(&line, "(");
390                 cond->symbol = get_symbol_token(cond, &line);
391                 if (!cond->symbol)
392                         goto unknown;
393                 if (brackets && !get_token(&line, ")"))
394                         goto unknown;
395         } else if (get_token(&line, "ifndef")) {
396                 bool brackets;
397                 cond->inverse = true;
398                 brackets = get_token(&line, "(");
399                 cond->symbol = get_symbol_token(cond, &line);
400                 if (!cond->symbol)
401                         goto unknown;
402                 if (brackets && !get_token(&line, ")"))
403                         goto unknown;
404         } else if (get_token(&line, "else")) {
405                 /* Malformed? */
406                 if (!parent)
407                         return NULL;
408
409                 *cond = *parent;
410                 cond->inverse = !cond->inverse;
411                 return cond;
412         } else if (get_token(&line, "endif")) {
413                 talloc_free(cond);
414                 /* Malformed? */
415                 if (!parent)
416                         return NULL;
417                 /* Back up one! */
418                 return parent->parent;
419         } else {
420                 /* Not a conditional. */
421                 talloc_free(cond);
422                 return parent;
423         }
424
425         if (!is_empty(line))
426                 goto unknown;
427         return cond;
428
429 unknown:
430         cond->type = PP_COND_UNKNOWN;
431         return cond;
432 }
433
434 /* This parser is rough, but OK if code is reasonably neat. */
435 struct line_info *get_ccan_line_info(struct ccan_file *f)
436 {
437         bool continued = false, in_comment = false;
438         struct pp_conditions *cond = NULL;
439         unsigned int i;
440
441         if (f->line_info)
442                 return f->line_info;
443
444         get_ccan_file_lines(f);
445         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
446
447         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
448                 char *p;
449                 bool still_doc_line;
450
451                 /* Current conditions apply to this line. */
452                 f->line_info[i].cond = cond;
453                 f->line_info[i].continued = continued;
454
455                 if (continued) {
456                         /* Same as last line. */
457                         f->line_info[i].type = f->line_info[i-1].type;
458                         /* Update in_comment. */
459                         remove_comments(f->lines[i], in_comment, &in_comment);
460                         continue;
461                 }
462
463                 /* Preprocessor directive? */
464                 if (!in_comment
465                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
466                         f->line_info[i].type = PREPROC_LINE;
467                         cond = analyze_directive(f, f->lines[i], cond);
468                         continue;
469                 }
470
471                 still_doc_line = (in_comment
472                                   && f->line_info[i-1].type == DOC_LINE);
473
474                 p = remove_comments(f->lines[i], in_comment, &in_comment);
475                 if (is_empty(p)) {
476                         if (strstarts(f->lines[i], "/**") || still_doc_line)
477                                 f->line_info[i].type = DOC_LINE;
478                         else
479                                 f->line_info[i].type = COMMENT_LINE;
480                 } else
481                         f->line_info[i].type = CODE_LINE;
482                 talloc_free(p);
483         }
484         return f->line_info;
485 }
486
487 struct symbol {
488         struct list_node list;
489         const char *name;
490         const unsigned int *value;
491 };
492
493 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
494 {
495         struct symbol *i;
496
497         list_for_each(syms, i, list)
498                 if (streq(sym, i->name))
499                         return i;
500         return NULL;
501 }
502
503 static enum line_compiled get_pp(struct pp_conditions *cond,
504                                  struct list_head *syms)
505 {
506         struct symbol *sym;
507         unsigned int val;
508         enum line_compiled parent, ret;
509
510         /* No conditions?  Easy. */
511         if (!cond)
512                 return COMPILED;
513
514         /* Check we get here at all. */
515         parent = get_pp(cond->parent, syms);
516         if (parent == NOT_COMPILED)
517                 return NOT_COMPILED;
518
519         if (cond->type == PP_COND_UNKNOWN)
520                 return MAYBE_COMPILED;
521
522         sym = find_symbol(syms, cond->symbol);
523         if (!sym)
524                 return MAYBE_COMPILED;
525
526         switch (cond->type) {
527         case PP_COND_IF:
528                 /* Undefined is 0. */
529                 val = sym->value ? *sym->value : 0;
530                 if (!val == cond->inverse)
531                         ret = COMPILED;
532                 else
533                         ret = NOT_COMPILED;
534                 break;
535
536         case PP_COND_IFDEF:
537                 if (cond->inverse == !sym->value)
538                         ret = COMPILED;
539                 else
540                         ret = NOT_COMPILED;
541                 break;
542
543         default:
544                 abort();
545         }
546
547         /* If parent didn't know, NO == NO, but YES == MAYBE. */
548         if (parent == MAYBE_COMPILED && ret == COMPILED)
549                 ret = MAYBE_COMPILED;
550         return ret;
551 }
552
553 static void add_symbol(struct list_head *head,
554                        const char *symbol, const unsigned int *value)
555 {
556         struct symbol *sym = talloc(head, struct symbol);
557         sym->name = symbol;
558         sym->value = value;
559         list_add(head, &sym->list);
560 }
561         
562 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
563                                     const char *symbol,
564                                     const unsigned int *value,
565                                     ...)
566 {
567         enum line_compiled ret;
568         struct list_head *head;
569         va_list ap;
570
571         head = talloc(NULL, struct list_head);
572         list_head_init(head);
573
574         va_start(ap, value);
575         add_symbol(head, symbol, value);
576
577         while ((symbol = va_arg(ap, const char *)) != NULL) {
578                 value = va_arg(ap, const unsigned int *);
579                 add_symbol(head, symbol, value);
580         }
581         ret = get_pp(cond, head);
582         talloc_free(head);
583         return ret;
584 }
585
586 void score_file_error(struct score *score, struct ccan_file *f, unsigned line,
587                       const char *error)
588 {
589         struct file_error *fe = talloc(score, struct file_error);
590         fe->file = f;
591         fe->line = line;
592         fe->error = error;
593         list_add_tail(&score->per_file_errors, &fe->list);
594 }