Fix tracing for tdb_chainunlock.
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include "../tools.h"
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <err.h>
13 #include <errno.h>
14 #include <dirent.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17
18 char **get_ccan_file_lines(struct ccan_file *f)
19 {
20         if (!f->lines)
21                 f->lines = strsplit(f, f->contents, "\n", &f->num_lines);
22
23         return f->lines;
24 }
25
26 struct list_head *get_ccan_file_docs(struct ccan_file *f)
27 {
28         if (!f->doc_sections) {
29                 get_ccan_file_lines(f);
30                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
31         }
32         return f->doc_sections;
33 }
34
35 static void add_files(struct manifest *m, const char *dir)
36 {
37         DIR *d;
38         struct dirent *ent;
39
40         if (dir[0])
41                 d = opendir(dir);
42         else
43                 d = opendir(".");
44         if (!d)
45                 err(1, "Opening directory %s", dir[0] ? dir : ".");
46
47         while ((ent = readdir(d)) != NULL) {
48                 struct stat st;
49                 struct ccan_file *f;
50                 struct list_head *dest;
51                 bool is_c_src;
52
53                 if (ent->d_name[0] == '.')
54                         continue;
55
56                 f = talloc(m, struct ccan_file);
57                 f->lines = NULL;
58                 f->line_info = NULL;
59                 f->doc_sections = NULL;
60                 f->name = talloc_asprintf(f, "%s%s", dir, ent->d_name);
61                 if (lstat(f->name, &st) != 0)
62                         err(1, "lstat %s", f->name);
63
64                 if (S_ISDIR(st.st_mode)) {
65                         f->name = talloc_append_string(f->name, "/");
66                         add_files(m, f->name);
67                         continue;
68                 }
69                 if (!S_ISREG(st.st_mode)) {
70                         talloc_free(f);
71                         continue;
72                 }
73
74                 if (streq(f->name, "_info")) {
75                         m->info_file = f;
76                         f->contents = grab_file(f, f->name, &f->contents_size);
77                         if (!f->contents)
78                                 err(1, "Reading file %s", f->name);
79                         continue;
80                 }
81
82                 is_c_src = strends(f->name, ".c");
83                 if (!is_c_src && !strends(f->name, ".h")) {
84                         /* We don't pull in contents of non-source files */
85                         dest = &m->other_files;
86                         continue;
87                 }
88
89                 f->contents = grab_file(f, f->name, &f->contents_size);
90                 if (!f->contents)
91                         err(1, "Reading file %s", f->name);
92
93                 if (!strchr(f->name, '/')) {
94                         if (is_c_src)
95                                 dest = &m->c_files;
96                         else
97                                 dest = &m->h_files;
98                 } else if (strstarts(f->name, "test/")) {
99                         if (is_c_src) {
100                                 if (strstarts(f->name, "test/api"))
101                                         dest = &m->api_tests;
102                                 else if (strstarts(f->name, "test/run"))
103                                         dest = &m->run_tests;
104                                 else if (strstarts(f->name, "test/compile_ok"))
105                                         dest = &m->compile_ok_tests;
106                                 else if (strstarts(f->name, "test/compile_fail"))
107                                         dest = &m->compile_fail_tests;
108                                 else
109                                         dest = &m->other_test_files;
110                         } else
111                                 dest = &m->other_test_files;
112                 } else
113                         dest = &m->other_files;
114
115                 list_add(dest, &f->list);
116         }
117         closedir(d);
118 }
119
120 char *report_on_lines(struct list_head *files,
121                       char *(*report)(const char *),
122                       char *sofar)
123 {
124         struct ccan_file *f;
125
126         list_for_each(files, f, list) {
127                 unsigned int i;
128                 char **lines = get_ccan_file_lines(f);
129
130                 for (i = 0; i < f->num_lines; i++) {
131                         char *r = report(lines[i]);
132                         if (!r)
133                                 continue;
134
135                         sofar = talloc_asprintf_append(sofar,
136                                                        "%s:%u:%s\n",
137                                                        f->name, i+1, r);
138                         talloc_free(r);
139                 }
140         }
141         return sofar;
142 }
143
144 struct manifest *get_manifest(void)
145 {
146         struct manifest *m = talloc(NULL, struct manifest);
147         unsigned int len;
148
149         m->info_file = NULL;
150         list_head_init(&m->c_files);
151         list_head_init(&m->h_files);
152         list_head_init(&m->api_tests);
153         list_head_init(&m->run_tests);
154         list_head_init(&m->compile_ok_tests);
155         list_head_init(&m->compile_fail_tests);
156         list_head_init(&m->other_test_files);
157         list_head_init(&m->other_files);
158
159         /* *This* is why people hate C. */
160         len = 32;
161         m->basename = talloc_array(m, char, len);
162         while (!getcwd(m->basename, len)) {
163                 if (errno != ERANGE)
164                         err(1, "Getting current directory");
165                 m->basename = talloc_realloc(m, m->basename, char, len *= 2);
166         }
167
168         len = strlen(m->basename);
169         while (len && m->basename[len-1] == '/')
170                 m->basename[--len] = '\0';
171
172         m->basename = strrchr(m->basename, '/');
173         if (!m->basename)
174                 errx(1, "I don't expect to be run from the root directory");
175         m->basename++;
176
177         add_files(m, "");
178         return m;
179 }
180
181
182 /**
183  * remove_comments - strip comments from a line, return copy.
184  * @line: line to copy
185  * @in_comment: are we already within a comment (from prev line).
186  * @unterminated: are we still in a comment for next line.
187  */
188 static char *remove_comments(const char *line, bool in_comment,
189                              bool *unterminated)
190 {
191         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
192
193         p = ret;
194         for (;;) {
195                 if (!in_comment) {
196                         /* Find first comment. */
197                         const char *old_comment = strstr(line, "/*");
198                         const char *new_comment = strstr(line, "//");
199                         const char *comment;
200
201                         if (new_comment && old_comment)
202                                 comment = new_comment < old_comment
203                                         ? new_comment : old_comment;
204                         else if (old_comment)
205                                 comment = old_comment;
206                         else if (new_comment)
207                                 comment = new_comment;
208                         else {
209                                 /* Nothing more. */
210                                 strcpy(p, line);
211                                 *unterminated = false;
212                                 break;
213                         }
214
215                         /* Copy up to comment. */
216                         memcpy(p, line, comment - line);
217                         p += comment - line;
218                         line += comment - line + 2;
219
220                         if (comment == new_comment) {
221                                 /* We're done: goes to EOL. */
222                                 p[0] = '\0';
223                                 *unterminated = false;
224                                 break;
225                         }
226                         in_comment = true;
227                 }
228
229                 if (in_comment) {
230                         const char *end = strstr(line, "*/");
231                         if (!end) {
232                                 *unterminated = true;
233                                 p[0] = '\0';
234                                 break;
235                         }
236                         line = end+2;
237                         in_comment = false;
238                 }
239         }
240         return ret;
241 }
242
243 static bool is_empty(const char *line)
244 {
245         return strspn(line, " \t") == strlen(line);
246 }
247
248 static bool continues(const char *line)
249 {
250         /* Technically, any odd number of these.  But who cares? */
251         return strends(line, "\\");
252 }
253
254 /* Get token if it's equal to token. */
255 bool get_token(const char **line, const char *token)
256 {
257         unsigned int toklen;
258
259         *line += strspn(*line, " \t");
260         if (isalnum(token[0]) || token[0] == '_')
261                 toklen = strspn(*line, IDENT_CHARS);
262         else {
263                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
264                 toklen = strlen(token);
265         }
266
267         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
268                 *line += toklen;
269                 return true;
270         }
271         return false;
272 }
273
274 char *get_symbol_token(void *ctx, const char **line)
275 {
276         unsigned int toklen;
277         char *ret;
278
279         *line += strspn(*line, " \t");
280         toklen = strspn(*line, IDENT_CHARS);
281         if (!toklen)
282                 return NULL;
283         ret = talloc_strndup(ctx, *line, toklen);
284         *line += toklen;
285         return ret;
286 }
287
288 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
289 {
290         bool brackets, defined;
291
292         cond->inverse = get_token(line, "!");
293         defined = get_token(line, "defined");
294         brackets = get_token(line, "(");
295         cond->symbol = get_symbol_token(cond, line);
296         if (!cond->symbol)
297                 return false;
298         if (brackets && !get_token(line, ")"))
299                 return false;
300         if (!defined)
301                 cond->type = PP_COND_IF;
302         return true;
303 }
304
305 /* FIXME: Get serious! */
306 static struct pp_conditions *analyze_directive(struct ccan_file *f,
307                                                const char *line,
308                                                struct pp_conditions *parent)
309 {
310         struct pp_conditions *cond = talloc(f, struct pp_conditions);
311         bool unused;
312
313         line = remove_comments(line, false, &unused);
314
315         cond->parent = parent;
316         cond->type = PP_COND_IFDEF;
317
318         if (!get_token(&line, "#"))
319                 abort();
320
321         if (get_token(&line, "if")) {
322                 if (!parse_hash_if(cond, &line))
323                         goto unknown;
324         } else if (get_token(&line, "elif")) {
325                 /* Malformed? */
326                 if (!parent)
327                         return NULL;
328                 cond->parent = parent->parent;
329                 /* FIXME: Not quite true.  This implies !parent, but we don't
330                  * do multiple conditionals yet. */
331                 if (!parse_hash_if(cond, &line))
332                         goto unknown;
333         } else if (get_token(&line, "ifdef")) {
334                 bool brackets;
335                 cond->inverse = false;
336                 brackets = get_token(&line, "(");
337                 cond->symbol = get_symbol_token(cond, &line);
338                 if (!cond->symbol)
339                         goto unknown;
340                 if (brackets && !get_token(&line, ")"))
341                         goto unknown;
342         } else if (get_token(&line, "ifndef")) {
343                 bool brackets;
344                 cond->inverse = true;
345                 brackets = get_token(&line, "(");
346                 cond->symbol = get_symbol_token(cond, &line);
347                 if (!cond->symbol)
348                         goto unknown;
349                 if (brackets && !get_token(&line, ")"))
350                         goto unknown;
351         } else if (get_token(&line, "else")) {
352                 /* Malformed? */
353                 if (!parent)
354                         return NULL;
355
356                 *cond = *parent;
357                 cond->inverse = !cond->inverse;
358                 return cond;
359         } else if (get_token(&line, "endif")) {
360                 talloc_free(cond);
361                 /* Malformed? */
362                 if (!parent)
363                         return NULL;
364                 /* Back up one! */
365                 return parent->parent;
366         } else {
367                 /* Not a conditional. */
368                 talloc_free(cond);
369                 return parent;
370         }
371
372         if (!is_empty(line))
373                 goto unknown;
374         return cond;
375
376 unknown:
377         cond->type = PP_COND_UNKNOWN;
378         return cond;
379 }
380
381 /* This parser is rough, but OK if code is reasonably neat. */
382 struct line_info *get_ccan_line_info(struct ccan_file *f)
383 {
384         bool continued = false, in_comment = false;
385         struct pp_conditions *cond = NULL;
386         unsigned int i;
387
388         if (f->line_info)
389                 return f->line_info;
390
391         get_ccan_file_lines(f);
392         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
393
394         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
395                 char *p;
396                 bool still_doc_line;
397
398                 /* Current conditions apply to this line. */
399                 f->line_info[i].cond = cond;
400                 f->line_info[i].continued = continued;
401
402                 if (continued) {
403                         /* Same as last line. */
404                         f->line_info[i].type = f->line_info[i-1].type;
405                         /* Update in_comment. */
406                         remove_comments(f->lines[i], in_comment, &in_comment);
407                         continue;
408                 }
409
410                 /* Preprocessor directive? */
411                 if (!in_comment
412                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
413                         f->line_info[i].type = PREPROC_LINE;
414                         cond = analyze_directive(f, f->lines[i], cond);
415                         continue;
416                 }
417
418                 still_doc_line = (in_comment
419                                   && f->line_info[i-1].type == DOC_LINE);
420
421                 p = remove_comments(f->lines[i], in_comment, &in_comment);
422                 if (is_empty(p)) {
423                         if (strstarts(f->lines[i], "/**") || still_doc_line)
424                                 f->line_info[i].type = DOC_LINE;
425                         else
426                                 f->line_info[i].type = COMMENT_LINE;
427                 } else
428                         f->line_info[i].type = CODE_LINE;
429                 talloc_free(p);
430         }
431         return f->line_info;
432 }
433
434 struct symbol {
435         struct list_node list;
436         const char *name;
437         const unsigned int *value;
438 };
439
440 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
441 {
442         struct symbol *i;
443
444         list_for_each(syms, i, list)
445                 if (streq(sym, i->name))
446                         return i;
447         return NULL;
448 }
449
450 static enum line_compiled get_pp(struct pp_conditions *cond,
451                                  struct list_head *syms)
452 {
453         struct symbol *sym;
454         unsigned int val;
455         enum line_compiled parent, ret;
456
457         /* No conditions?  Easy. */
458         if (!cond)
459                 return COMPILED;
460
461         /* Check we get here at all. */
462         parent = get_pp(cond->parent, syms);
463         if (parent == NOT_COMPILED)
464                 return NOT_COMPILED;
465
466         if (cond->type == PP_COND_UNKNOWN)
467                 return MAYBE_COMPILED;
468
469         sym = find_symbol(syms, cond->symbol);
470         if (!sym)
471                 return MAYBE_COMPILED;
472
473         switch (cond->type) {
474         case PP_COND_IF:
475                 /* Undefined is 0. */
476                 val = sym->value ? *sym->value : 0;
477                 if (!val == cond->inverse)
478                         ret = COMPILED;
479                 else
480                         ret = NOT_COMPILED;
481                 break;
482
483         case PP_COND_IFDEF:
484                 if (cond->inverse == !sym->value)
485                         ret = COMPILED;
486                 else
487                         ret = NOT_COMPILED;
488                 break;
489
490         default:
491                 abort();
492         }
493
494         /* If parent didn't know, NO == NO, but YES == MAYBE. */
495         if (parent == MAYBE_COMPILED && ret == COMPILED)
496                 ret = MAYBE_COMPILED;
497         return ret;
498 }
499
500 static void add_symbol(struct list_head *head,
501                        const char *symbol, const unsigned int *value)
502 {
503         struct symbol *sym = talloc(head, struct symbol);
504         sym->name = symbol;
505         sym->value = value;
506         list_add(head, &sym->list);
507 }
508         
509 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
510                                     const char *symbol,
511                                     const unsigned int *value,
512                                     ...)
513 {
514         enum line_compiled ret;
515         struct list_head *head;
516         va_list ap;
517
518         head = talloc(NULL, struct list_head);
519         list_head_init(head);
520
521         va_start(ap, value);
522         add_symbol(head, symbol, value);
523
524         while ((symbol = va_arg(ap, const char *)) != NULL) {
525                 value = va_arg(ap, const unsigned int *);
526                 add_symbol(head, symbol, value);
527         }
528         ret = get_pp(cond, head);
529         talloc_free(head);
530         return ret;
531 }
532