]> git.ozlabs.org Git - ccan/blob - tools/ccanlint/file_analysis.c
3152cd1e16a555e8b19ebca25c2b1836df11aed2
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include "../tools.h"
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <err.h>
13 #include <errno.h>
14 #include <dirent.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17
18 char **get_ccan_file_lines(struct ccan_file *f)
19 {
20         if (!f->lines)
21                 f->lines = strsplit(f, f->contents, "\n", &f->num_lines);
22
23         return f->lines;
24 }
25
26 struct list_head *get_ccan_file_docs(struct ccan_file *f)
27 {
28         if (!f->doc_sections) {
29                 get_ccan_file_lines(f);
30                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
31         }
32         return f->doc_sections;
33 }
34
35 struct ccan_file *new_ccan_file(const void *ctx, char *name)
36 {
37         struct ccan_file *f;
38
39         f = talloc(ctx, struct ccan_file);
40         f->lines = NULL;
41         f->line_info = NULL;
42         f->doc_sections = NULL;
43         f->compiled = NULL;
44         f->name = talloc_steal(f, name);
45         return f;
46 }
47
48 static void add_files(struct manifest *m, const char *dir)
49 {
50         DIR *d;
51         struct dirent *ent;
52
53         if (dir[0])
54                 d = opendir(dir);
55         else
56                 d = opendir(".");
57         if (!d)
58                 err(1, "Opening directory %s", dir[0] ? dir : ".");
59
60         while ((ent = readdir(d)) != NULL) {
61                 struct stat st;
62                 struct ccan_file *f;
63                 struct list_head *dest;
64                 bool is_c_src;
65
66                 if (ent->d_name[0] == '.')
67                         continue;
68
69                 f = new_ccan_file(m, talloc_asprintf(m, "%s%s",
70                                                      dir, ent->d_name));
71                 if (lstat(f->name, &st) != 0)
72                         err(1, "lstat %s", f->name);
73
74                 if (S_ISDIR(st.st_mode)) {
75                         f->name = talloc_append_string(f->name, "/");
76                         add_files(m, f->name);
77                         continue;
78                 }
79                 if (!S_ISREG(st.st_mode)) {
80                         talloc_free(f);
81                         continue;
82                 }
83
84                 if (streq(f->name, "_info")) {
85                         m->info_file = f;
86                         f->contents = grab_file(f, f->name, &f->contents_size);
87                         if (!f->contents)
88                                 err(1, "Reading file %s", f->name);
89                         continue;
90                 }
91
92                 is_c_src = strends(f->name, ".c");
93                 if (!is_c_src && !strends(f->name, ".h")) {
94                         /* We don't pull in contents of non-source files */
95                         dest = &m->other_files;
96                         continue;
97                 }
98
99                 f->contents = grab_file(f, f->name, &f->contents_size);
100                 if (!f->contents)
101                         err(1, "Reading file %s", f->name);
102
103                 if (!strchr(f->name, '/')) {
104                         if (is_c_src)
105                                 dest = &m->c_files;
106                         else
107                                 dest = &m->h_files;
108                 } else if (strstarts(f->name, "test/")) {
109                         if (is_c_src) {
110                                 if (strstarts(f->name, "test/api"))
111                                         dest = &m->api_tests;
112                                 else if (strstarts(f->name, "test/run"))
113                                         dest = &m->run_tests;
114                                 else if (strstarts(f->name, "test/compile_ok"))
115                                         dest = &m->compile_ok_tests;
116                                 else if (strstarts(f->name, "test/compile_fail"))
117                                         dest = &m->compile_fail_tests;
118                                 else
119                                         dest = &m->other_test_c_files;
120                         } else
121                                 dest = &m->other_test_files;
122                 } else
123                         dest = &m->other_files;
124
125                 list_add(dest, &f->list);
126         }
127         closedir(d);
128 }
129
130 char *report_on_lines(struct list_head *files,
131                       char *(*report)(const char *),
132                       char *sofar)
133 {
134         struct ccan_file *f;
135
136         list_for_each(files, f, list) {
137                 unsigned int i;
138                 char **lines = get_ccan_file_lines(f);
139
140                 for (i = 0; i < f->num_lines; i++) {
141                         char *r = report(lines[i]);
142                         if (!r)
143                                 continue;
144
145                         sofar = talloc_asprintf_append(sofar,
146                                                        "%s:%u:%s\n",
147                                                        f->name, i+1, r);
148                         talloc_free(r);
149                 }
150         }
151         return sofar;
152 }
153
154 struct manifest *get_manifest(const void *ctx)
155 {
156         struct manifest *m = talloc(ctx, struct manifest);
157         unsigned int len;
158
159         m->info_file = NULL;
160         list_head_init(&m->c_files);
161         list_head_init(&m->h_files);
162         list_head_init(&m->api_tests);
163         list_head_init(&m->run_tests);
164         list_head_init(&m->compile_ok_tests);
165         list_head_init(&m->compile_fail_tests);
166         list_head_init(&m->other_test_c_files);
167         list_head_init(&m->other_test_files);
168         list_head_init(&m->other_files);
169         list_head_init(&m->dep_dirs);
170         list_head_init(&m->dep_objs);
171
172         m->basename = talloc_getcwd(m);
173         if (!m->basename)
174                 err(1, "Getting current directory");
175         len = strlen(m->basename);
176         while (len && m->basename[len-1] == '/')
177                 m->basename[--len] = '\0';
178
179         m->basename = strrchr(m->basename, '/');
180         if (!m->basename)
181                 errx(1, "I don't expect to be run from the root directory");
182         m->basename++;
183
184         add_files(m, "");
185         return m;
186 }
187
188
189 /**
190  * remove_comments - strip comments from a line, return copy.
191  * @line: line to copy
192  * @in_comment: are we already within a comment (from prev line).
193  * @unterminated: are we still in a comment for next line.
194  */
195 static char *remove_comments(const char *line, bool in_comment,
196                              bool *unterminated)
197 {
198         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
199
200         p = ret;
201         for (;;) {
202                 if (!in_comment) {
203                         /* Find first comment. */
204                         const char *old_comment = strstr(line, "/*");
205                         const char *new_comment = strstr(line, "//");
206                         const char *comment;
207
208                         if (new_comment && old_comment)
209                                 comment = new_comment < old_comment
210                                         ? new_comment : old_comment;
211                         else if (old_comment)
212                                 comment = old_comment;
213                         else if (new_comment)
214                                 comment = new_comment;
215                         else {
216                                 /* Nothing more. */
217                                 strcpy(p, line);
218                                 *unterminated = false;
219                                 break;
220                         }
221
222                         /* Copy up to comment. */
223                         memcpy(p, line, comment - line);
224                         p += comment - line;
225                         line += comment - line + 2;
226
227                         if (comment == new_comment) {
228                                 /* We're done: goes to EOL. */
229                                 p[0] = '\0';
230                                 *unterminated = false;
231                                 break;
232                         }
233                         in_comment = true;
234                 }
235
236                 if (in_comment) {
237                         const char *end = strstr(line, "*/");
238                         if (!end) {
239                                 *unterminated = true;
240                                 p[0] = '\0';
241                                 break;
242                         }
243                         line = end+2;
244                         in_comment = false;
245                 }
246         }
247         return ret;
248 }
249
250 static bool is_empty(const char *line)
251 {
252         return strspn(line, " \t") == strlen(line);
253 }
254
255 static bool continues(const char *line)
256 {
257         /* Technically, any odd number of these.  But who cares? */
258         return strends(line, "\\");
259 }
260
261 /* Get token if it's equal to token. */
262 bool get_token(const char **line, const char *token)
263 {
264         unsigned int toklen;
265
266         *line += strspn(*line, " \t");
267         if (isalnum(token[0]) || token[0] == '_')
268                 toklen = strspn(*line, IDENT_CHARS);
269         else {
270                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
271                 toklen = strlen(token);
272         }
273
274         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
275                 *line += toklen;
276                 return true;
277         }
278         return false;
279 }
280
281 char *get_symbol_token(void *ctx, const char **line)
282 {
283         unsigned int toklen;
284         char *ret;
285
286         *line += strspn(*line, " \t");
287         toklen = strspn(*line, IDENT_CHARS);
288         if (!toklen)
289                 return NULL;
290         ret = talloc_strndup(ctx, *line, toklen);
291         *line += toklen;
292         return ret;
293 }
294
295 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
296 {
297         bool brackets, defined;
298
299         cond->inverse = get_token(line, "!");
300         defined = get_token(line, "defined");
301         brackets = get_token(line, "(");
302         cond->symbol = get_symbol_token(cond, line);
303         if (!cond->symbol)
304                 return false;
305         if (brackets && !get_token(line, ")"))
306                 return false;
307         if (!defined)
308                 cond->type = PP_COND_IF;
309         return true;
310 }
311
312 /* FIXME: Get serious! */
313 static struct pp_conditions *analyze_directive(struct ccan_file *f,
314                                                const char *line,
315                                                struct pp_conditions *parent)
316 {
317         struct pp_conditions *cond = talloc(f, struct pp_conditions);
318         bool unused;
319
320         line = remove_comments(line, false, &unused);
321
322         cond->parent = parent;
323         cond->type = PP_COND_IFDEF;
324
325         if (!get_token(&line, "#"))
326                 abort();
327
328         if (get_token(&line, "if")) {
329                 if (!parse_hash_if(cond, &line))
330                         goto unknown;
331         } else if (get_token(&line, "elif")) {
332                 /* Malformed? */
333                 if (!parent)
334                         return NULL;
335                 cond->parent = parent->parent;
336                 /* FIXME: Not quite true.  This implies !parent, but we don't
337                  * do multiple conditionals yet. */
338                 if (!parse_hash_if(cond, &line))
339                         goto unknown;
340         } else if (get_token(&line, "ifdef")) {
341                 bool brackets;
342                 cond->inverse = false;
343                 brackets = get_token(&line, "(");
344                 cond->symbol = get_symbol_token(cond, &line);
345                 if (!cond->symbol)
346                         goto unknown;
347                 if (brackets && !get_token(&line, ")"))
348                         goto unknown;
349         } else if (get_token(&line, "ifndef")) {
350                 bool brackets;
351                 cond->inverse = true;
352                 brackets = get_token(&line, "(");
353                 cond->symbol = get_symbol_token(cond, &line);
354                 if (!cond->symbol)
355                         goto unknown;
356                 if (brackets && !get_token(&line, ")"))
357                         goto unknown;
358         } else if (get_token(&line, "else")) {
359                 /* Malformed? */
360                 if (!parent)
361                         return NULL;
362
363                 *cond = *parent;
364                 cond->inverse = !cond->inverse;
365                 return cond;
366         } else if (get_token(&line, "endif")) {
367                 talloc_free(cond);
368                 /* Malformed? */
369                 if (!parent)
370                         return NULL;
371                 /* Back up one! */
372                 return parent->parent;
373         } else {
374                 /* Not a conditional. */
375                 talloc_free(cond);
376                 return parent;
377         }
378
379         if (!is_empty(line))
380                 goto unknown;
381         return cond;
382
383 unknown:
384         cond->type = PP_COND_UNKNOWN;
385         return cond;
386 }
387
388 /* This parser is rough, but OK if code is reasonably neat. */
389 struct line_info *get_ccan_line_info(struct ccan_file *f)
390 {
391         bool continued = false, in_comment = false;
392         struct pp_conditions *cond = NULL;
393         unsigned int i;
394
395         if (f->line_info)
396                 return f->line_info;
397
398         get_ccan_file_lines(f);
399         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
400
401         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
402                 char *p;
403                 bool still_doc_line;
404
405                 /* Current conditions apply to this line. */
406                 f->line_info[i].cond = cond;
407                 f->line_info[i].continued = continued;
408
409                 if (continued) {
410                         /* Same as last line. */
411                         f->line_info[i].type = f->line_info[i-1].type;
412                         /* Update in_comment. */
413                         remove_comments(f->lines[i], in_comment, &in_comment);
414                         continue;
415                 }
416
417                 /* Preprocessor directive? */
418                 if (!in_comment
419                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
420                         f->line_info[i].type = PREPROC_LINE;
421                         cond = analyze_directive(f, f->lines[i], cond);
422                         continue;
423                 }
424
425                 still_doc_line = (in_comment
426                                   && f->line_info[i-1].type == DOC_LINE);
427
428                 p = remove_comments(f->lines[i], in_comment, &in_comment);
429                 if (is_empty(p)) {
430                         if (strstarts(f->lines[i], "/**") || still_doc_line)
431                                 f->line_info[i].type = DOC_LINE;
432                         else
433                                 f->line_info[i].type = COMMENT_LINE;
434                 } else
435                         f->line_info[i].type = CODE_LINE;
436                 talloc_free(p);
437         }
438         return f->line_info;
439 }
440
441 struct symbol {
442         struct list_node list;
443         const char *name;
444         const unsigned int *value;
445 };
446
447 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
448 {
449         struct symbol *i;
450
451         list_for_each(syms, i, list)
452                 if (streq(sym, i->name))
453                         return i;
454         return NULL;
455 }
456
457 static enum line_compiled get_pp(struct pp_conditions *cond,
458                                  struct list_head *syms)
459 {
460         struct symbol *sym;
461         unsigned int val;
462         enum line_compiled parent, ret;
463
464         /* No conditions?  Easy. */
465         if (!cond)
466                 return COMPILED;
467
468         /* Check we get here at all. */
469         parent = get_pp(cond->parent, syms);
470         if (parent == NOT_COMPILED)
471                 return NOT_COMPILED;
472
473         if (cond->type == PP_COND_UNKNOWN)
474                 return MAYBE_COMPILED;
475
476         sym = find_symbol(syms, cond->symbol);
477         if (!sym)
478                 return MAYBE_COMPILED;
479
480         switch (cond->type) {
481         case PP_COND_IF:
482                 /* Undefined is 0. */
483                 val = sym->value ? *sym->value : 0;
484                 if (!val == cond->inverse)
485                         ret = COMPILED;
486                 else
487                         ret = NOT_COMPILED;
488                 break;
489
490         case PP_COND_IFDEF:
491                 if (cond->inverse == !sym->value)
492                         ret = COMPILED;
493                 else
494                         ret = NOT_COMPILED;
495                 break;
496
497         default:
498                 abort();
499         }
500
501         /* If parent didn't know, NO == NO, but YES == MAYBE. */
502         if (parent == MAYBE_COMPILED && ret == COMPILED)
503                 ret = MAYBE_COMPILED;
504         return ret;
505 }
506
507 static void add_symbol(struct list_head *head,
508                        const char *symbol, const unsigned int *value)
509 {
510         struct symbol *sym = talloc(head, struct symbol);
511         sym->name = symbol;
512         sym->value = value;
513         list_add(head, &sym->list);
514 }
515         
516 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
517                                     const char *symbol,
518                                     const unsigned int *value,
519                                     ...)
520 {
521         enum line_compiled ret;
522         struct list_head *head;
523         va_list ap;
524
525         head = talloc(NULL, struct list_head);
526         list_head_init(head);
527
528         va_start(ap, value);
529         add_symbol(head, symbol, value);
530
531         while ((symbol = va_arg(ap, const char *)) != NULL) {
532                 value = va_arg(ap, const unsigned int *);
533                 add_symbol(head, symbol, value);
534         }
535         ret = get_pp(cond, head);
536         talloc_free(head);
537         return ret;
538 }
539