4908a895423b98eddcd70fc5d973dc60e871d4b9
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include "../tools.h"
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <err.h>
13 #include <errno.h>
14 #include <dirent.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17 #include <assert.h>
18
19 const char *ccan_dir;
20
21 char **get_ccan_file_lines(struct ccan_file *f)
22 {
23         if (!f->lines)
24                 f->lines = strsplit(f, f->contents, "\n", &f->num_lines);
25
26         return f->lines;
27 }
28
29 struct list_head *get_ccan_file_docs(struct ccan_file *f)
30 {
31         if (!f->doc_sections) {
32                 get_ccan_file_lines(f);
33                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
34         }
35         return f->doc_sections;
36 }
37
38 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
39 {
40         struct ccan_file *f;
41
42         assert(dir[0] == '/');
43
44         f = talloc(ctx, struct ccan_file);
45         f->lines = NULL;
46         f->line_info = NULL;
47         f->doc_sections = NULL;
48         f->compiled = NULL;
49         f->name = talloc_steal(f, name);
50         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
51         return f;
52 }
53
54 static void add_files(struct manifest *m, const char *dir)
55 {
56         DIR *d;
57         struct dirent *ent;
58
59         if (dir[0])
60                 d = opendir(dir);
61         else
62                 d = opendir(".");
63         if (!d)
64                 err(1, "Opening directory %s", dir[0] ? dir : ".");
65
66         while ((ent = readdir(d)) != NULL) {
67                 struct stat st;
68                 struct ccan_file *f;
69                 struct list_head *dest;
70                 bool is_c_src;
71
72                 if (ent->d_name[0] == '.')
73                         continue;
74
75                 f = new_ccan_file(m, m->dir,
76                                   talloc_asprintf(m, "%s%s",
77                                                   dir, ent->d_name));
78                 if (lstat(f->name, &st) != 0)
79                         err(1, "lstat %s", f->name);
80
81                 if (S_ISDIR(st.st_mode)) {
82                         f->name = talloc_append_string(f->name, "/");
83                         add_files(m, f->name);
84                         continue;
85                 }
86                 if (!S_ISREG(st.st_mode)) {
87                         talloc_free(f);
88                         continue;
89                 }
90
91                 if (streq(f->name, "_info")) {
92                         m->info_file = f;
93                         f->contents = grab_file(f, f->name, &f->contents_size);
94                         if (!f->contents)
95                                 err(1, "Reading file %s", f->name);
96                         continue;
97                 }
98
99                 is_c_src = strends(f->name, ".c");
100                 if (!is_c_src && !strends(f->name, ".h")) {
101                         /* We don't pull in contents of non-source files */
102                         dest = &m->other_files;
103                         continue;
104                 }
105
106                 f->contents = grab_file(f, f->name, &f->contents_size);
107                 if (!f->contents)
108                         err(1, "Reading file %s", f->name);
109
110                 if (!strchr(f->name, '/')) {
111                         if (is_c_src)
112                                 dest = &m->c_files;
113                         else
114                                 dest = &m->h_files;
115                 } else if (strstarts(f->name, "test/")) {
116                         if (is_c_src) {
117                                 if (strstarts(f->name, "test/api"))
118                                         dest = &m->api_tests;
119                                 else if (strstarts(f->name, "test/run"))
120                                         dest = &m->run_tests;
121                                 else if (strstarts(f->name, "test/compile_ok"))
122                                         dest = &m->compile_ok_tests;
123                                 else if (strstarts(f->name, "test/compile_fail"))
124                                         dest = &m->compile_fail_tests;
125                                 else
126                                         dest = &m->other_test_c_files;
127                         } else
128                                 dest = &m->other_test_files;
129                 } else
130                         dest = &m->other_files;
131
132                 list_add(dest, &f->list);
133         }
134         closedir(d);
135 }
136
137 char *report_on_lines(struct list_head *files,
138                       char *(*report)(const char *),
139                       char *sofar)
140 {
141         struct ccan_file *f;
142
143         list_for_each(files, f, list) {
144                 unsigned int i;
145                 char **lines = get_ccan_file_lines(f);
146
147                 for (i = 0; i < f->num_lines; i++) {
148                         char *r = report(lines[i]);
149                         if (!r)
150                                 continue;
151
152                         sofar = talloc_asprintf_append(sofar,
153                                                        "%s:%u:%s\n",
154                                                        f->name, i+1, r);
155                         talloc_free(r);
156                 }
157         }
158         return sofar;
159 }
160
161 struct manifest *get_manifest(const void *ctx, const char *dir)
162 {
163         struct manifest *m = talloc(ctx, struct manifest);
164         char *olddir;
165         unsigned int len;
166
167         m->info_file = NULL;
168         list_head_init(&m->c_files);
169         list_head_init(&m->h_files);
170         list_head_init(&m->api_tests);
171         list_head_init(&m->run_tests);
172         list_head_init(&m->compile_ok_tests);
173         list_head_init(&m->compile_fail_tests);
174         list_head_init(&m->other_test_c_files);
175         list_head_init(&m->other_test_files);
176         list_head_init(&m->other_files);
177         list_head_init(&m->examples);
178         list_head_init(&m->dep_dirs);
179
180         olddir = talloc_getcwd(NULL);
181         if (!olddir)
182                 err(1, "Getting current directory");
183
184         if (chdir(dir) != 0)
185                 err(1, "Failed to chdir to %s", dir);
186
187         m->dir = talloc_getcwd(m);
188         if (!m->dir)
189                 err(1, "Getting current directory");
190
191         len = strlen(m->dir);
192         while (len && m->dir[len-1] == '/')
193                 m->dir[--len] = '\0';
194
195         m->basename = strrchr(m->dir, '/');
196         if (!m->basename)
197                 errx(1, "I don't expect to be run from the root directory");
198         m->basename++;
199
200         /* We expect the ccan dir to be two levels above module dir. */
201         if (!ccan_dir) {
202                 char *p;
203                 ccan_dir = talloc_strdup(NULL, m->dir);
204                 p = strrchr(ccan_dir, '/');
205                 *p = '\0';
206                 p = strrchr(ccan_dir, '/');
207                 *p = '\0';
208         }
209
210         add_files(m, "");
211
212         if (chdir(olddir) != 0)
213                 err(1, "Returning to original directory '%s'", olddir);
214         talloc_free(olddir);
215
216         return m;
217 }
218
219
220 /**
221  * remove_comments - strip comments from a line, return copy.
222  * @line: line to copy
223  * @in_comment: are we already within a comment (from prev line).
224  * @unterminated: are we still in a comment for next line.
225  */
226 static char *remove_comments(const char *line, bool in_comment,
227                              bool *unterminated)
228 {
229         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
230
231         p = ret;
232         for (;;) {
233                 if (!in_comment) {
234                         /* Find first comment. */
235                         const char *old_comment = strstr(line, "/*");
236                         const char *new_comment = strstr(line, "//");
237                         const char *comment;
238
239                         if (new_comment && old_comment)
240                                 comment = new_comment < old_comment
241                                         ? new_comment : old_comment;
242                         else if (old_comment)
243                                 comment = old_comment;
244                         else if (new_comment)
245                                 comment = new_comment;
246                         else {
247                                 /* Nothing more. */
248                                 strcpy(p, line);
249                                 *unterminated = false;
250                                 break;
251                         }
252
253                         /* Copy up to comment. */
254                         memcpy(p, line, comment - line);
255                         p += comment - line;
256                         line += comment - line + 2;
257
258                         if (comment == new_comment) {
259                                 /* We're done: goes to EOL. */
260                                 p[0] = '\0';
261                                 *unterminated = false;
262                                 break;
263                         }
264                         in_comment = true;
265                 }
266
267                 if (in_comment) {
268                         const char *end = strstr(line, "*/");
269                         if (!end) {
270                                 *unterminated = true;
271                                 p[0] = '\0';
272                                 break;
273                         }
274                         line = end+2;
275                         in_comment = false;
276                 }
277         }
278         return ret;
279 }
280
281 static bool is_empty(const char *line)
282 {
283         return strspn(line, " \t") == strlen(line);
284 }
285
286 static bool continues(const char *line)
287 {
288         /* Technically, any odd number of these.  But who cares? */
289         return strends(line, "\\");
290 }
291
292 /* Get token if it's equal to token. */
293 bool get_token(const char **line, const char *token)
294 {
295         unsigned int toklen;
296
297         *line += strspn(*line, " \t");
298         if (isalnum(token[0]) || token[0] == '_')
299                 toklen = strspn(*line, IDENT_CHARS);
300         else {
301                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
302                 toklen = strlen(token);
303         }
304
305         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
306                 *line += toklen;
307                 return true;
308         }
309         return false;
310 }
311
312 char *get_symbol_token(void *ctx, const char **line)
313 {
314         unsigned int toklen;
315         char *ret;
316
317         *line += strspn(*line, " \t");
318         toklen = strspn(*line, IDENT_CHARS);
319         if (!toklen)
320                 return NULL;
321         ret = talloc_strndup(ctx, *line, toklen);
322         *line += toklen;
323         return ret;
324 }
325
326 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
327 {
328         bool brackets, defined;
329
330         cond->inverse = get_token(line, "!");
331         defined = get_token(line, "defined");
332         brackets = get_token(line, "(");
333         cond->symbol = get_symbol_token(cond, line);
334         if (!cond->symbol)
335                 return false;
336         if (brackets && !get_token(line, ")"))
337                 return false;
338         if (!defined)
339                 cond->type = PP_COND_IF;
340         return true;
341 }
342
343 /* FIXME: Get serious! */
344 static struct pp_conditions *analyze_directive(struct ccan_file *f,
345                                                const char *line,
346                                                struct pp_conditions *parent)
347 {
348         struct pp_conditions *cond = talloc(f, struct pp_conditions);
349         bool unused;
350
351         line = remove_comments(line, false, &unused);
352
353         cond->parent = parent;
354         cond->type = PP_COND_IFDEF;
355
356         if (!get_token(&line, "#"))
357                 abort();
358
359         if (get_token(&line, "if")) {
360                 if (!parse_hash_if(cond, &line))
361                         goto unknown;
362         } else if (get_token(&line, "elif")) {
363                 /* Malformed? */
364                 if (!parent)
365                         return NULL;
366                 cond->parent = parent->parent;
367                 /* FIXME: Not quite true.  This implies !parent, but we don't
368                  * do multiple conditionals yet. */
369                 if (!parse_hash_if(cond, &line))
370                         goto unknown;
371         } else if (get_token(&line, "ifdef")) {
372                 bool brackets;
373                 cond->inverse = false;
374                 brackets = get_token(&line, "(");
375                 cond->symbol = get_symbol_token(cond, &line);
376                 if (!cond->symbol)
377                         goto unknown;
378                 if (brackets && !get_token(&line, ")"))
379                         goto unknown;
380         } else if (get_token(&line, "ifndef")) {
381                 bool brackets;
382                 cond->inverse = true;
383                 brackets = get_token(&line, "(");
384                 cond->symbol = get_symbol_token(cond, &line);
385                 if (!cond->symbol)
386                         goto unknown;
387                 if (brackets && !get_token(&line, ")"))
388                         goto unknown;
389         } else if (get_token(&line, "else")) {
390                 /* Malformed? */
391                 if (!parent)
392                         return NULL;
393
394                 *cond = *parent;
395                 cond->inverse = !cond->inverse;
396                 return cond;
397         } else if (get_token(&line, "endif")) {
398                 talloc_free(cond);
399                 /* Malformed? */
400                 if (!parent)
401                         return NULL;
402                 /* Back up one! */
403                 return parent->parent;
404         } else {
405                 /* Not a conditional. */
406                 talloc_free(cond);
407                 return parent;
408         }
409
410         if (!is_empty(line))
411                 goto unknown;
412         return cond;
413
414 unknown:
415         cond->type = PP_COND_UNKNOWN;
416         return cond;
417 }
418
419 /* This parser is rough, but OK if code is reasonably neat. */
420 struct line_info *get_ccan_line_info(struct ccan_file *f)
421 {
422         bool continued = false, in_comment = false;
423         struct pp_conditions *cond = NULL;
424         unsigned int i;
425
426         if (f->line_info)
427                 return f->line_info;
428
429         get_ccan_file_lines(f);
430         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
431
432         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
433                 char *p;
434                 bool still_doc_line;
435
436                 /* Current conditions apply to this line. */
437                 f->line_info[i].cond = cond;
438                 f->line_info[i].continued = continued;
439
440                 if (continued) {
441                         /* Same as last line. */
442                         f->line_info[i].type = f->line_info[i-1].type;
443                         /* Update in_comment. */
444                         remove_comments(f->lines[i], in_comment, &in_comment);
445                         continue;
446                 }
447
448                 /* Preprocessor directive? */
449                 if (!in_comment
450                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
451                         f->line_info[i].type = PREPROC_LINE;
452                         cond = analyze_directive(f, f->lines[i], cond);
453                         continue;
454                 }
455
456                 still_doc_line = (in_comment
457                                   && f->line_info[i-1].type == DOC_LINE);
458
459                 p = remove_comments(f->lines[i], in_comment, &in_comment);
460                 if (is_empty(p)) {
461                         if (strstarts(f->lines[i], "/**") || still_doc_line)
462                                 f->line_info[i].type = DOC_LINE;
463                         else
464                                 f->line_info[i].type = COMMENT_LINE;
465                 } else
466                         f->line_info[i].type = CODE_LINE;
467                 talloc_free(p);
468         }
469         return f->line_info;
470 }
471
472 struct symbol {
473         struct list_node list;
474         const char *name;
475         const unsigned int *value;
476 };
477
478 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
479 {
480         struct symbol *i;
481
482         list_for_each(syms, i, list)
483                 if (streq(sym, i->name))
484                         return i;
485         return NULL;
486 }
487
488 static enum line_compiled get_pp(struct pp_conditions *cond,
489                                  struct list_head *syms)
490 {
491         struct symbol *sym;
492         unsigned int val;
493         enum line_compiled parent, ret;
494
495         /* No conditions?  Easy. */
496         if (!cond)
497                 return COMPILED;
498
499         /* Check we get here at all. */
500         parent = get_pp(cond->parent, syms);
501         if (parent == NOT_COMPILED)
502                 return NOT_COMPILED;
503
504         if (cond->type == PP_COND_UNKNOWN)
505                 return MAYBE_COMPILED;
506
507         sym = find_symbol(syms, cond->symbol);
508         if (!sym)
509                 return MAYBE_COMPILED;
510
511         switch (cond->type) {
512         case PP_COND_IF:
513                 /* Undefined is 0. */
514                 val = sym->value ? *sym->value : 0;
515                 if (!val == cond->inverse)
516                         ret = COMPILED;
517                 else
518                         ret = NOT_COMPILED;
519                 break;
520
521         case PP_COND_IFDEF:
522                 if (cond->inverse == !sym->value)
523                         ret = COMPILED;
524                 else
525                         ret = NOT_COMPILED;
526                 break;
527
528         default:
529                 abort();
530         }
531
532         /* If parent didn't know, NO == NO, but YES == MAYBE. */
533         if (parent == MAYBE_COMPILED && ret == COMPILED)
534                 ret = MAYBE_COMPILED;
535         return ret;
536 }
537
538 static void add_symbol(struct list_head *head,
539                        const char *symbol, const unsigned int *value)
540 {
541         struct symbol *sym = talloc(head, struct symbol);
542         sym->name = symbol;
543         sym->value = value;
544         list_add(head, &sym->list);
545 }
546         
547 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
548                                     const char *symbol,
549                                     const unsigned int *value,
550                                     ...)
551 {
552         enum line_compiled ret;
553         struct list_head *head;
554         va_list ap;
555
556         head = talloc(NULL, struct list_head);
557         list_head_init(head);
558
559         va_start(ap, value);
560         add_symbol(head, symbol, value);
561
562         while ((symbol = va_arg(ap, const char *)) != NULL) {
563                 value = va_arg(ap, const unsigned int *);
564                 add_symbol(head, symbol, value);
565         }
566         ret = get_pp(cond, head);
567         talloc_free(head);
568         return ret;
569 }
570