ccanlint: use familiar names for temporary files, show them with -vv.
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include "../tools.h"
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <err.h>
13 #include <errno.h>
14 #include <dirent.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17 #include <assert.h>
18
19 const char *ccan_dir;
20
21 char **get_ccan_file_lines(struct ccan_file *f)
22 {
23         if (!f->lines)
24                 f->lines = strsplit(f, f->contents, "\n", &f->num_lines);
25
26         return f->lines;
27 }
28
29 struct list_head *get_ccan_file_docs(struct ccan_file *f)
30 {
31         if (!f->doc_sections) {
32                 get_ccan_file_lines(f);
33                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
34         }
35         return f->doc_sections;
36 }
37
38 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
39 {
40         struct ccan_file *f;
41
42         assert(dir[0] == '/');
43
44         f = talloc(ctx, struct ccan_file);
45         f->lines = NULL;
46         f->line_info = NULL;
47         f->doc_sections = NULL;
48         f->compiled = NULL;
49         f->name = talloc_steal(f, name);
50         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
51         return f;
52 }
53
54 static void add_files(struct manifest *m, const char *dir)
55 {
56         DIR *d;
57         struct dirent *ent;
58
59         if (dir[0])
60                 d = opendir(dir);
61         else
62                 d = opendir(".");
63         if (!d)
64                 err(1, "Opening directory %s", dir[0] ? dir : ".");
65
66         while ((ent = readdir(d)) != NULL) {
67                 struct stat st;
68                 struct ccan_file *f;
69                 struct list_head *dest;
70                 bool is_c_src;
71
72                 if (ent->d_name[0] == '.')
73                         continue;
74
75                 f = new_ccan_file(m, m->dir,
76                                   talloc_asprintf(m, "%s%s",
77                                                   dir, ent->d_name));
78                 if (lstat(f->name, &st) != 0)
79                         err(1, "lstat %s", f->name);
80
81                 if (S_ISDIR(st.st_mode)) {
82                         f->name = talloc_append_string(f->name, "/");
83                         add_files(m, f->name);
84                         continue;
85                 }
86                 if (!S_ISREG(st.st_mode)) {
87                         talloc_free(f);
88                         continue;
89                 }
90
91                 if (streq(f->name, "_info")) {
92                         m->info_file = f;
93                         f->contents = grab_file(f, f->name, &f->contents_size);
94                         if (!f->contents)
95                                 err(1, "Reading file %s", f->name);
96                         continue;
97                 }
98
99                 is_c_src = strends(f->name, ".c");
100                 if (!is_c_src && !strends(f->name, ".h")) {
101                         /* We don't pull in contents of non-source files */
102                         dest = &m->other_files;
103                         continue;
104                 }
105
106                 f->contents = grab_file(f, f->name, &f->contents_size);
107                 if (!f->contents)
108                         err(1, "Reading file %s", f->name);
109
110                 if (!strchr(f->name, '/')) {
111                         if (is_c_src)
112                                 dest = &m->c_files;
113                         else
114                                 dest = &m->h_files;
115                 } else if (strstarts(f->name, "test/")) {
116                         if (is_c_src) {
117                                 if (strstarts(f->name, "test/api"))
118                                         dest = &m->api_tests;
119                                 else if (strstarts(f->name, "test/run"))
120                                         dest = &m->run_tests;
121                                 else if (strstarts(f->name, "test/compile_ok"))
122                                         dest = &m->compile_ok_tests;
123                                 else if (strstarts(f->name, "test/compile_fail"))
124                                         dest = &m->compile_fail_tests;
125                                 else
126                                         dest = &m->other_test_c_files;
127                         } else
128                                 dest = &m->other_test_files;
129                 } else
130                         dest = &m->other_files;
131
132                 list_add(dest, &f->list);
133         }
134         closedir(d);
135 }
136
137 char *report_on_lines(struct list_head *files,
138                       char *(*report)(const char *),
139                       char *sofar)
140 {
141         struct ccan_file *f;
142
143         list_for_each(files, f, list) {
144                 unsigned int i;
145                 char **lines = get_ccan_file_lines(f);
146
147                 for (i = 0; i < f->num_lines; i++) {
148                         char *r = report(lines[i]);
149                         if (!r)
150                                 continue;
151
152                         sofar = talloc_asprintf_append(sofar,
153                                                        "%s:%u:%s\n",
154                                                        f->name, i+1, r);
155                         talloc_free(r);
156                 }
157         }
158         return sofar;
159 }
160
161 struct manifest *get_manifest(const void *ctx, const char *dir)
162 {
163         struct manifest *m = talloc(ctx, struct manifest);
164         char *olddir;
165         unsigned int len;
166
167         m->info_file = NULL;
168         list_head_init(&m->c_files);
169         list_head_init(&m->h_files);
170         list_head_init(&m->api_tests);
171         list_head_init(&m->run_tests);
172         list_head_init(&m->compile_ok_tests);
173         list_head_init(&m->compile_fail_tests);
174         list_head_init(&m->other_test_c_files);
175         list_head_init(&m->other_test_files);
176         list_head_init(&m->other_files);
177         list_head_init(&m->dep_dirs);
178
179         olddir = talloc_getcwd(NULL);
180         if (!olddir)
181                 err(1, "Getting current directory");
182
183         if (chdir(dir) != 0)
184                 err(1, "Failed to chdir to %s", dir);
185
186         m->dir = talloc_getcwd(m);
187         if (!m->dir)
188                 err(1, "Getting current directory");
189
190         len = strlen(m->dir);
191         while (len && m->dir[len-1] == '/')
192                 m->dir[--len] = '\0';
193
194         m->basename = strrchr(m->dir, '/');
195         if (!m->basename)
196                 errx(1, "I don't expect to be run from the root directory");
197         m->basename++;
198
199         /* We expect the ccan dir to be two levels above module dir. */
200         if (!ccan_dir) {
201                 char *p;
202                 ccan_dir = talloc_strdup(NULL, m->dir);
203                 p = strrchr(ccan_dir, '/');
204                 *p = '\0';
205                 p = strrchr(ccan_dir, '/');
206                 *p = '\0';
207         }
208
209         add_files(m, "");
210
211         if (chdir(olddir) != 0)
212                 err(1, "Returning to original directory '%s'", olddir);
213         talloc_free(olddir);
214
215         return m;
216 }
217
218
219 /**
220  * remove_comments - strip comments from a line, return copy.
221  * @line: line to copy
222  * @in_comment: are we already within a comment (from prev line).
223  * @unterminated: are we still in a comment for next line.
224  */
225 static char *remove_comments(const char *line, bool in_comment,
226                              bool *unterminated)
227 {
228         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
229
230         p = ret;
231         for (;;) {
232                 if (!in_comment) {
233                         /* Find first comment. */
234                         const char *old_comment = strstr(line, "/*");
235                         const char *new_comment = strstr(line, "//");
236                         const char *comment;
237
238                         if (new_comment && old_comment)
239                                 comment = new_comment < old_comment
240                                         ? new_comment : old_comment;
241                         else if (old_comment)
242                                 comment = old_comment;
243                         else if (new_comment)
244                                 comment = new_comment;
245                         else {
246                                 /* Nothing more. */
247                                 strcpy(p, line);
248                                 *unterminated = false;
249                                 break;
250                         }
251
252                         /* Copy up to comment. */
253                         memcpy(p, line, comment - line);
254                         p += comment - line;
255                         line += comment - line + 2;
256
257                         if (comment == new_comment) {
258                                 /* We're done: goes to EOL. */
259                                 p[0] = '\0';
260                                 *unterminated = false;
261                                 break;
262                         }
263                         in_comment = true;
264                 }
265
266                 if (in_comment) {
267                         const char *end = strstr(line, "*/");
268                         if (!end) {
269                                 *unterminated = true;
270                                 p[0] = '\0';
271                                 break;
272                         }
273                         line = end+2;
274                         in_comment = false;
275                 }
276         }
277         return ret;
278 }
279
280 static bool is_empty(const char *line)
281 {
282         return strspn(line, " \t") == strlen(line);
283 }
284
285 static bool continues(const char *line)
286 {
287         /* Technically, any odd number of these.  But who cares? */
288         return strends(line, "\\");
289 }
290
291 /* Get token if it's equal to token. */
292 bool get_token(const char **line, const char *token)
293 {
294         unsigned int toklen;
295
296         *line += strspn(*line, " \t");
297         if (isalnum(token[0]) || token[0] == '_')
298                 toklen = strspn(*line, IDENT_CHARS);
299         else {
300                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
301                 toklen = strlen(token);
302         }
303
304         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
305                 *line += toklen;
306                 return true;
307         }
308         return false;
309 }
310
311 char *get_symbol_token(void *ctx, const char **line)
312 {
313         unsigned int toklen;
314         char *ret;
315
316         *line += strspn(*line, " \t");
317         toklen = strspn(*line, IDENT_CHARS);
318         if (!toklen)
319                 return NULL;
320         ret = talloc_strndup(ctx, *line, toklen);
321         *line += toklen;
322         return ret;
323 }
324
325 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
326 {
327         bool brackets, defined;
328
329         cond->inverse = get_token(line, "!");
330         defined = get_token(line, "defined");
331         brackets = get_token(line, "(");
332         cond->symbol = get_symbol_token(cond, line);
333         if (!cond->symbol)
334                 return false;
335         if (brackets && !get_token(line, ")"))
336                 return false;
337         if (!defined)
338                 cond->type = PP_COND_IF;
339         return true;
340 }
341
342 /* FIXME: Get serious! */
343 static struct pp_conditions *analyze_directive(struct ccan_file *f,
344                                                const char *line,
345                                                struct pp_conditions *parent)
346 {
347         struct pp_conditions *cond = talloc(f, struct pp_conditions);
348         bool unused;
349
350         line = remove_comments(line, false, &unused);
351
352         cond->parent = parent;
353         cond->type = PP_COND_IFDEF;
354
355         if (!get_token(&line, "#"))
356                 abort();
357
358         if (get_token(&line, "if")) {
359                 if (!parse_hash_if(cond, &line))
360                         goto unknown;
361         } else if (get_token(&line, "elif")) {
362                 /* Malformed? */
363                 if (!parent)
364                         return NULL;
365                 cond->parent = parent->parent;
366                 /* FIXME: Not quite true.  This implies !parent, but we don't
367                  * do multiple conditionals yet. */
368                 if (!parse_hash_if(cond, &line))
369                         goto unknown;
370         } else if (get_token(&line, "ifdef")) {
371                 bool brackets;
372                 cond->inverse = false;
373                 brackets = get_token(&line, "(");
374                 cond->symbol = get_symbol_token(cond, &line);
375                 if (!cond->symbol)
376                         goto unknown;
377                 if (brackets && !get_token(&line, ")"))
378                         goto unknown;
379         } else if (get_token(&line, "ifndef")) {
380                 bool brackets;
381                 cond->inverse = true;
382                 brackets = get_token(&line, "(");
383                 cond->symbol = get_symbol_token(cond, &line);
384                 if (!cond->symbol)
385                         goto unknown;
386                 if (brackets && !get_token(&line, ")"))
387                         goto unknown;
388         } else if (get_token(&line, "else")) {
389                 /* Malformed? */
390                 if (!parent)
391                         return NULL;
392
393                 *cond = *parent;
394                 cond->inverse = !cond->inverse;
395                 return cond;
396         } else if (get_token(&line, "endif")) {
397                 talloc_free(cond);
398                 /* Malformed? */
399                 if (!parent)
400                         return NULL;
401                 /* Back up one! */
402                 return parent->parent;
403         } else {
404                 /* Not a conditional. */
405                 talloc_free(cond);
406                 return parent;
407         }
408
409         if (!is_empty(line))
410                 goto unknown;
411         return cond;
412
413 unknown:
414         cond->type = PP_COND_UNKNOWN;
415         return cond;
416 }
417
418 /* This parser is rough, but OK if code is reasonably neat. */
419 struct line_info *get_ccan_line_info(struct ccan_file *f)
420 {
421         bool continued = false, in_comment = false;
422         struct pp_conditions *cond = NULL;
423         unsigned int i;
424
425         if (f->line_info)
426                 return f->line_info;
427
428         get_ccan_file_lines(f);
429         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
430
431         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
432                 char *p;
433                 bool still_doc_line;
434
435                 /* Current conditions apply to this line. */
436                 f->line_info[i].cond = cond;
437                 f->line_info[i].continued = continued;
438
439                 if (continued) {
440                         /* Same as last line. */
441                         f->line_info[i].type = f->line_info[i-1].type;
442                         /* Update in_comment. */
443                         remove_comments(f->lines[i], in_comment, &in_comment);
444                         continue;
445                 }
446
447                 /* Preprocessor directive? */
448                 if (!in_comment
449                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
450                         f->line_info[i].type = PREPROC_LINE;
451                         cond = analyze_directive(f, f->lines[i], cond);
452                         continue;
453                 }
454
455                 still_doc_line = (in_comment
456                                   && f->line_info[i-1].type == DOC_LINE);
457
458                 p = remove_comments(f->lines[i], in_comment, &in_comment);
459                 if (is_empty(p)) {
460                         if (strstarts(f->lines[i], "/**") || still_doc_line)
461                                 f->line_info[i].type = DOC_LINE;
462                         else
463                                 f->line_info[i].type = COMMENT_LINE;
464                 } else
465                         f->line_info[i].type = CODE_LINE;
466                 talloc_free(p);
467         }
468         return f->line_info;
469 }
470
471 struct symbol {
472         struct list_node list;
473         const char *name;
474         const unsigned int *value;
475 };
476
477 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
478 {
479         struct symbol *i;
480
481         list_for_each(syms, i, list)
482                 if (streq(sym, i->name))
483                         return i;
484         return NULL;
485 }
486
487 static enum line_compiled get_pp(struct pp_conditions *cond,
488                                  struct list_head *syms)
489 {
490         struct symbol *sym;
491         unsigned int val;
492         enum line_compiled parent, ret;
493
494         /* No conditions?  Easy. */
495         if (!cond)
496                 return COMPILED;
497
498         /* Check we get here at all. */
499         parent = get_pp(cond->parent, syms);
500         if (parent == NOT_COMPILED)
501                 return NOT_COMPILED;
502
503         if (cond->type == PP_COND_UNKNOWN)
504                 return MAYBE_COMPILED;
505
506         sym = find_symbol(syms, cond->symbol);
507         if (!sym)
508                 return MAYBE_COMPILED;
509
510         switch (cond->type) {
511         case PP_COND_IF:
512                 /* Undefined is 0. */
513                 val = sym->value ? *sym->value : 0;
514                 if (!val == cond->inverse)
515                         ret = COMPILED;
516                 else
517                         ret = NOT_COMPILED;
518                 break;
519
520         case PP_COND_IFDEF:
521                 if (cond->inverse == !sym->value)
522                         ret = COMPILED;
523                 else
524                         ret = NOT_COMPILED;
525                 break;
526
527         default:
528                 abort();
529         }
530
531         /* If parent didn't know, NO == NO, but YES == MAYBE. */
532         if (parent == MAYBE_COMPILED && ret == COMPILED)
533                 ret = MAYBE_COMPILED;
534         return ret;
535 }
536
537 static void add_symbol(struct list_head *head,
538                        const char *symbol, const unsigned int *value)
539 {
540         struct symbol *sym = talloc(head, struct symbol);
541         sym->name = symbol;
542         sym->value = value;
543         list_add(head, &sym->list);
544 }
545         
546 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
547                                     const char *symbol,
548                                     const unsigned int *value,
549                                     ...)
550 {
551         enum line_compiled ret;
552         struct list_head *head;
553         va_list ap;
554
555         head = talloc(NULL, struct list_head);
556         list_head_init(head);
557
558         va_start(ap, value);
559         add_symbol(head, symbol, value);
560
561         while ((symbol = va_arg(ap, const char *)) != NULL) {
562                 value = va_arg(ap, const unsigned int *);
563                 add_symbol(head, symbol, value);
564         }
565         ret = get_pp(cond, head);
566         talloc_free(head);
567         return ret;
568 }
569