c03931d8b45874cd97366647adbe50bf269bae80
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include "../tools.h"
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <err.h>
13 #include <errno.h>
14 #include <dirent.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17
18 const char *ccan_dir;
19
20 char **get_ccan_file_lines(struct ccan_file *f)
21 {
22         if (!f->lines)
23                 f->lines = strsplit(f, f->contents, "\n", &f->num_lines);
24
25         return f->lines;
26 }
27
28 struct list_head *get_ccan_file_docs(struct ccan_file *f)
29 {
30         if (!f->doc_sections) {
31                 get_ccan_file_lines(f);
32                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
33         }
34         return f->doc_sections;
35 }
36
37 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
38 {
39         struct ccan_file *f;
40
41         f = talloc(ctx, struct ccan_file);
42         f->lines = NULL;
43         f->line_info = NULL;
44         f->doc_sections = NULL;
45         f->compiled = NULL;
46         f->name = talloc_steal(f, name);
47         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
48         return f;
49 }
50
51 static void add_files(struct manifest *m, const char *dir)
52 {
53         DIR *d;
54         struct dirent *ent;
55
56         if (dir[0])
57                 d = opendir(dir);
58         else
59                 d = opendir(".");
60         if (!d)
61                 err(1, "Opening directory %s", dir[0] ? dir : ".");
62
63         while ((ent = readdir(d)) != NULL) {
64                 struct stat st;
65                 struct ccan_file *f;
66                 struct list_head *dest;
67                 bool is_c_src;
68
69                 if (ent->d_name[0] == '.')
70                         continue;
71
72                 f = new_ccan_file(m, m->dir,
73                                   talloc_asprintf(m, "%s%s",
74                                                   dir, ent->d_name));
75                 if (lstat(f->name, &st) != 0)
76                         err(1, "lstat %s", f->name);
77
78                 if (S_ISDIR(st.st_mode)) {
79                         f->name = talloc_append_string(f->name, "/");
80                         add_files(m, f->name);
81                         continue;
82                 }
83                 if (!S_ISREG(st.st_mode)) {
84                         talloc_free(f);
85                         continue;
86                 }
87
88                 if (streq(f->name, "_info")) {
89                         m->info_file = f;
90                         f->contents = grab_file(f, f->name, &f->contents_size);
91                         if (!f->contents)
92                                 err(1, "Reading file %s", f->name);
93                         continue;
94                 }
95
96                 is_c_src = strends(f->name, ".c");
97                 if (!is_c_src && !strends(f->name, ".h")) {
98                         /* We don't pull in contents of non-source files */
99                         dest = &m->other_files;
100                         continue;
101                 }
102
103                 f->contents = grab_file(f, f->name, &f->contents_size);
104                 if (!f->contents)
105                         err(1, "Reading file %s", f->name);
106
107                 if (!strchr(f->name, '/')) {
108                         if (is_c_src)
109                                 dest = &m->c_files;
110                         else
111                                 dest = &m->h_files;
112                 } else if (strstarts(f->name, "test/")) {
113                         if (is_c_src) {
114                                 if (strstarts(f->name, "test/api"))
115                                         dest = &m->api_tests;
116                                 else if (strstarts(f->name, "test/run"))
117                                         dest = &m->run_tests;
118                                 else if (strstarts(f->name, "test/compile_ok"))
119                                         dest = &m->compile_ok_tests;
120                                 else if (strstarts(f->name, "test/compile_fail"))
121                                         dest = &m->compile_fail_tests;
122                                 else
123                                         dest = &m->other_test_c_files;
124                         } else
125                                 dest = &m->other_test_files;
126                 } else
127                         dest = &m->other_files;
128
129                 list_add(dest, &f->list);
130         }
131         closedir(d);
132 }
133
134 char *report_on_lines(struct list_head *files,
135                       char *(*report)(const char *),
136                       char *sofar)
137 {
138         struct ccan_file *f;
139
140         list_for_each(files, f, list) {
141                 unsigned int i;
142                 char **lines = get_ccan_file_lines(f);
143
144                 for (i = 0; i < f->num_lines; i++) {
145                         char *r = report(lines[i]);
146                         if (!r)
147                                 continue;
148
149                         sofar = talloc_asprintf_append(sofar,
150                                                        "%s:%u:%s\n",
151                                                        f->name, i+1, r);
152                         talloc_free(r);
153                 }
154         }
155         return sofar;
156 }
157
158 struct manifest *get_manifest(const void *ctx, const char *dir)
159 {
160         struct manifest *m = talloc(ctx, struct manifest);
161         char *olddir;
162         unsigned int len;
163
164         m->info_file = NULL;
165         list_head_init(&m->c_files);
166         list_head_init(&m->h_files);
167         list_head_init(&m->api_tests);
168         list_head_init(&m->run_tests);
169         list_head_init(&m->compile_ok_tests);
170         list_head_init(&m->compile_fail_tests);
171         list_head_init(&m->other_test_c_files);
172         list_head_init(&m->other_test_files);
173         list_head_init(&m->other_files);
174         list_head_init(&m->dep_dirs);
175         list_head_init(&m->dep_objs);
176
177         olddir = talloc_getcwd(NULL);
178         if (!olddir)
179                 err(1, "Getting current directory");
180
181         if (chdir(dir) != 0)
182                 err(1, "Failed to chdir to %s", dir);
183
184         m->dir = talloc_getcwd(m);
185         if (!m->dir)
186                 err(1, "Getting current directory");
187
188         len = strlen(m->dir);
189         while (len && m->dir[len-1] == '/')
190                 m->dir[--len] = '\0';
191
192         m->basename = strrchr(m->dir, '/');
193         if (!m->basename)
194                 errx(1, "I don't expect to be run from the root directory");
195         m->basename++;
196
197         /* We expect the ccan dir to be two levels above module dir. */
198         if (!ccan_dir) {
199                 char *p;
200                 ccan_dir = talloc_strdup(NULL, m->dir);
201                 p = strrchr(ccan_dir, '/');
202                 *p = '\0';
203                 p = strrchr(ccan_dir, '/');
204                 *p = '\0';
205         }
206
207         add_files(m, "");
208
209         if (chdir(olddir) != 0)
210                 err(1, "Returning to original directory '%s'", olddir);
211         talloc_free(olddir);
212
213         return m;
214 }
215
216
217 /**
218  * remove_comments - strip comments from a line, return copy.
219  * @line: line to copy
220  * @in_comment: are we already within a comment (from prev line).
221  * @unterminated: are we still in a comment for next line.
222  */
223 static char *remove_comments(const char *line, bool in_comment,
224                              bool *unterminated)
225 {
226         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
227
228         p = ret;
229         for (;;) {
230                 if (!in_comment) {
231                         /* Find first comment. */
232                         const char *old_comment = strstr(line, "/*");
233                         const char *new_comment = strstr(line, "//");
234                         const char *comment;
235
236                         if (new_comment && old_comment)
237                                 comment = new_comment < old_comment
238                                         ? new_comment : old_comment;
239                         else if (old_comment)
240                                 comment = old_comment;
241                         else if (new_comment)
242                                 comment = new_comment;
243                         else {
244                                 /* Nothing more. */
245                                 strcpy(p, line);
246                                 *unterminated = false;
247                                 break;
248                         }
249
250                         /* Copy up to comment. */
251                         memcpy(p, line, comment - line);
252                         p += comment - line;
253                         line += comment - line + 2;
254
255                         if (comment == new_comment) {
256                                 /* We're done: goes to EOL. */
257                                 p[0] = '\0';
258                                 *unterminated = false;
259                                 break;
260                         }
261                         in_comment = true;
262                 }
263
264                 if (in_comment) {
265                         const char *end = strstr(line, "*/");
266                         if (!end) {
267                                 *unterminated = true;
268                                 p[0] = '\0';
269                                 break;
270                         }
271                         line = end+2;
272                         in_comment = false;
273                 }
274         }
275         return ret;
276 }
277
278 static bool is_empty(const char *line)
279 {
280         return strspn(line, " \t") == strlen(line);
281 }
282
283 static bool continues(const char *line)
284 {
285         /* Technically, any odd number of these.  But who cares? */
286         return strends(line, "\\");
287 }
288
289 /* Get token if it's equal to token. */
290 bool get_token(const char **line, const char *token)
291 {
292         unsigned int toklen;
293
294         *line += strspn(*line, " \t");
295         if (isalnum(token[0]) || token[0] == '_')
296                 toklen = strspn(*line, IDENT_CHARS);
297         else {
298                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
299                 toklen = strlen(token);
300         }
301
302         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
303                 *line += toklen;
304                 return true;
305         }
306         return false;
307 }
308
309 char *get_symbol_token(void *ctx, const char **line)
310 {
311         unsigned int toklen;
312         char *ret;
313
314         *line += strspn(*line, " \t");
315         toklen = strspn(*line, IDENT_CHARS);
316         if (!toklen)
317                 return NULL;
318         ret = talloc_strndup(ctx, *line, toklen);
319         *line += toklen;
320         return ret;
321 }
322
323 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
324 {
325         bool brackets, defined;
326
327         cond->inverse = get_token(line, "!");
328         defined = get_token(line, "defined");
329         brackets = get_token(line, "(");
330         cond->symbol = get_symbol_token(cond, line);
331         if (!cond->symbol)
332                 return false;
333         if (brackets && !get_token(line, ")"))
334                 return false;
335         if (!defined)
336                 cond->type = PP_COND_IF;
337         return true;
338 }
339
340 /* FIXME: Get serious! */
341 static struct pp_conditions *analyze_directive(struct ccan_file *f,
342                                                const char *line,
343                                                struct pp_conditions *parent)
344 {
345         struct pp_conditions *cond = talloc(f, struct pp_conditions);
346         bool unused;
347
348         line = remove_comments(line, false, &unused);
349
350         cond->parent = parent;
351         cond->type = PP_COND_IFDEF;
352
353         if (!get_token(&line, "#"))
354                 abort();
355
356         if (get_token(&line, "if")) {
357                 if (!parse_hash_if(cond, &line))
358                         goto unknown;
359         } else if (get_token(&line, "elif")) {
360                 /* Malformed? */
361                 if (!parent)
362                         return NULL;
363                 cond->parent = parent->parent;
364                 /* FIXME: Not quite true.  This implies !parent, but we don't
365                  * do multiple conditionals yet. */
366                 if (!parse_hash_if(cond, &line))
367                         goto unknown;
368         } else if (get_token(&line, "ifdef")) {
369                 bool brackets;
370                 cond->inverse = false;
371                 brackets = get_token(&line, "(");
372                 cond->symbol = get_symbol_token(cond, &line);
373                 if (!cond->symbol)
374                         goto unknown;
375                 if (brackets && !get_token(&line, ")"))
376                         goto unknown;
377         } else if (get_token(&line, "ifndef")) {
378                 bool brackets;
379                 cond->inverse = true;
380                 brackets = get_token(&line, "(");
381                 cond->symbol = get_symbol_token(cond, &line);
382                 if (!cond->symbol)
383                         goto unknown;
384                 if (brackets && !get_token(&line, ")"))
385                         goto unknown;
386         } else if (get_token(&line, "else")) {
387                 /* Malformed? */
388                 if (!parent)
389                         return NULL;
390
391                 *cond = *parent;
392                 cond->inverse = !cond->inverse;
393                 return cond;
394         } else if (get_token(&line, "endif")) {
395                 talloc_free(cond);
396                 /* Malformed? */
397                 if (!parent)
398                         return NULL;
399                 /* Back up one! */
400                 return parent->parent;
401         } else {
402                 /* Not a conditional. */
403                 talloc_free(cond);
404                 return parent;
405         }
406
407         if (!is_empty(line))
408                 goto unknown;
409         return cond;
410
411 unknown:
412         cond->type = PP_COND_UNKNOWN;
413         return cond;
414 }
415
416 /* This parser is rough, but OK if code is reasonably neat. */
417 struct line_info *get_ccan_line_info(struct ccan_file *f)
418 {
419         bool continued = false, in_comment = false;
420         struct pp_conditions *cond = NULL;
421         unsigned int i;
422
423         if (f->line_info)
424                 return f->line_info;
425
426         get_ccan_file_lines(f);
427         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
428
429         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
430                 char *p;
431                 bool still_doc_line;
432
433                 /* Current conditions apply to this line. */
434                 f->line_info[i].cond = cond;
435                 f->line_info[i].continued = continued;
436
437                 if (continued) {
438                         /* Same as last line. */
439                         f->line_info[i].type = f->line_info[i-1].type;
440                         /* Update in_comment. */
441                         remove_comments(f->lines[i], in_comment, &in_comment);
442                         continue;
443                 }
444
445                 /* Preprocessor directive? */
446                 if (!in_comment
447                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
448                         f->line_info[i].type = PREPROC_LINE;
449                         cond = analyze_directive(f, f->lines[i], cond);
450                         continue;
451                 }
452
453                 still_doc_line = (in_comment
454                                   && f->line_info[i-1].type == DOC_LINE);
455
456                 p = remove_comments(f->lines[i], in_comment, &in_comment);
457                 if (is_empty(p)) {
458                         if (strstarts(f->lines[i], "/**") || still_doc_line)
459                                 f->line_info[i].type = DOC_LINE;
460                         else
461                                 f->line_info[i].type = COMMENT_LINE;
462                 } else
463                         f->line_info[i].type = CODE_LINE;
464                 talloc_free(p);
465         }
466         return f->line_info;
467 }
468
469 struct symbol {
470         struct list_node list;
471         const char *name;
472         const unsigned int *value;
473 };
474
475 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
476 {
477         struct symbol *i;
478
479         list_for_each(syms, i, list)
480                 if (streq(sym, i->name))
481                         return i;
482         return NULL;
483 }
484
485 static enum line_compiled get_pp(struct pp_conditions *cond,
486                                  struct list_head *syms)
487 {
488         struct symbol *sym;
489         unsigned int val;
490         enum line_compiled parent, ret;
491
492         /* No conditions?  Easy. */
493         if (!cond)
494                 return COMPILED;
495
496         /* Check we get here at all. */
497         parent = get_pp(cond->parent, syms);
498         if (parent == NOT_COMPILED)
499                 return NOT_COMPILED;
500
501         if (cond->type == PP_COND_UNKNOWN)
502                 return MAYBE_COMPILED;
503
504         sym = find_symbol(syms, cond->symbol);
505         if (!sym)
506                 return MAYBE_COMPILED;
507
508         switch (cond->type) {
509         case PP_COND_IF:
510                 /* Undefined is 0. */
511                 val = sym->value ? *sym->value : 0;
512                 if (!val == cond->inverse)
513                         ret = COMPILED;
514                 else
515                         ret = NOT_COMPILED;
516                 break;
517
518         case PP_COND_IFDEF:
519                 if (cond->inverse == !sym->value)
520                         ret = COMPILED;
521                 else
522                         ret = NOT_COMPILED;
523                 break;
524
525         default:
526                 abort();
527         }
528
529         /* If parent didn't know, NO == NO, but YES == MAYBE. */
530         if (parent == MAYBE_COMPILED && ret == COMPILED)
531                 ret = MAYBE_COMPILED;
532         return ret;
533 }
534
535 static void add_symbol(struct list_head *head,
536                        const char *symbol, const unsigned int *value)
537 {
538         struct symbol *sym = talloc(head, struct symbol);
539         sym->name = symbol;
540         sym->value = value;
541         list_add(head, &sym->list);
542 }
543         
544 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
545                                     const char *symbol,
546                                     const unsigned int *value,
547                                     ...)
548 {
549         enum line_compiled ret;
550         struct list_head *head;
551         va_list ap;
552
553         head = talloc(NULL, struct list_head);
554         list_head_init(head);
555
556         va_start(ap, value);
557         add_symbol(head, symbol, value);
558
559         while ((symbol = va_arg(ap, const char *)) != NULL) {
560                 value = va_arg(ap, const unsigned int *);
561                 add_symbol(head, symbol, value);
562         }
563         ret = get_pp(cond, head);
564         talloc_free(head);
565         return ret;
566 }
567