]> git.ozlabs.org Git - ccan/blob - tools/ccanlint/file_analysis.c
3c8930f3d847b10b5dd6ad1ad5a4eca19af47bdf
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include "../tools.h"
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <err.h>
13 #include <errno.h>
14 #include <dirent.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17
18 char **get_ccan_file_lines(struct ccan_file *f)
19 {
20         if (!f->lines)
21                 f->lines = strsplit(f, f->contents, "\n", &f->num_lines);
22
23         return f->lines;
24 }
25
26 struct list_head *get_ccan_file_docs(struct ccan_file *f)
27 {
28         if (!f->doc_sections) {
29                 get_ccan_file_lines(f);
30                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
31         }
32         return f->doc_sections;
33 }
34
35 struct ccan_file *new_ccan_file(const void *ctx, char *name)
36 {
37         struct ccan_file *f;
38
39         f = talloc(ctx, struct ccan_file);
40         f->lines = NULL;
41         f->line_info = NULL;
42         f->doc_sections = NULL;
43         f->name = talloc_steal(f, name);
44         return f;
45 }
46
47 static void add_files(struct manifest *m, const char *dir)
48 {
49         DIR *d;
50         struct dirent *ent;
51
52         if (dir[0])
53                 d = opendir(dir);
54         else
55                 d = opendir(".");
56         if (!d)
57                 err(1, "Opening directory %s", dir[0] ? dir : ".");
58
59         while ((ent = readdir(d)) != NULL) {
60                 struct stat st;
61                 struct ccan_file *f;
62                 struct list_head *dest;
63                 bool is_c_src;
64
65                 if (ent->d_name[0] == '.')
66                         continue;
67
68                 f = new_ccan_file(m, talloc_asprintf(m, "%s%s",
69                                                      dir, ent->d_name));
70                 if (lstat(f->name, &st) != 0)
71                         err(1, "lstat %s", f->name);
72
73                 if (S_ISDIR(st.st_mode)) {
74                         f->name = talloc_append_string(f->name, "/");
75                         add_files(m, f->name);
76                         continue;
77                 }
78                 if (!S_ISREG(st.st_mode)) {
79                         talloc_free(f);
80                         continue;
81                 }
82
83                 if (streq(f->name, "_info")) {
84                         m->info_file = f;
85                         f->contents = grab_file(f, f->name, &f->contents_size);
86                         if (!f->contents)
87                                 err(1, "Reading file %s", f->name);
88                         continue;
89                 }
90
91                 is_c_src = strends(f->name, ".c");
92                 if (!is_c_src && !strends(f->name, ".h")) {
93                         /* We don't pull in contents of non-source files */
94                         dest = &m->other_files;
95                         continue;
96                 }
97
98                 f->contents = grab_file(f, f->name, &f->contents_size);
99                 if (!f->contents)
100                         err(1, "Reading file %s", f->name);
101
102                 if (!strchr(f->name, '/')) {
103                         if (is_c_src)
104                                 dest = &m->c_files;
105                         else
106                                 dest = &m->h_files;
107                 } else if (strstarts(f->name, "test/")) {
108                         if (is_c_src) {
109                                 if (strstarts(f->name, "test/api"))
110                                         dest = &m->api_tests;
111                                 else if (strstarts(f->name, "test/run"))
112                                         dest = &m->run_tests;
113                                 else if (strstarts(f->name, "test/compile_ok"))
114                                         dest = &m->compile_ok_tests;
115                                 else if (strstarts(f->name, "test/compile_fail"))
116                                         dest = &m->compile_fail_tests;
117                                 else
118                                         dest = &m->other_test_files;
119                         } else
120                                 dest = &m->other_test_files;
121                 } else
122                         dest = &m->other_files;
123
124                 list_add(dest, &f->list);
125         }
126         closedir(d);
127 }
128
129 char *report_on_lines(struct list_head *files,
130                       char *(*report)(const char *),
131                       char *sofar)
132 {
133         struct ccan_file *f;
134
135         list_for_each(files, f, list) {
136                 unsigned int i;
137                 char **lines = get_ccan_file_lines(f);
138
139                 for (i = 0; i < f->num_lines; i++) {
140                         char *r = report(lines[i]);
141                         if (!r)
142                                 continue;
143
144                         sofar = talloc_asprintf_append(sofar,
145                                                        "%s:%u:%s\n",
146                                                        f->name, i+1, r);
147                         talloc_free(r);
148                 }
149         }
150         return sofar;
151 }
152
153 struct manifest *get_manifest(const void *ctx)
154 {
155         struct manifest *m = talloc(ctx, struct manifest);
156         unsigned int len;
157
158         m->info_file = NULL;
159         list_head_init(&m->c_files);
160         list_head_init(&m->h_files);
161         list_head_init(&m->api_tests);
162         list_head_init(&m->run_tests);
163         list_head_init(&m->compile_ok_tests);
164         list_head_init(&m->compile_fail_tests);
165         list_head_init(&m->other_test_files);
166         list_head_init(&m->other_files);
167
168         m->basename = talloc_getcwd(m);
169         if (!m->basename)
170                 err(1, "Getting current directory");
171         len = strlen(m->basename);
172         while (len && m->basename[len-1] == '/')
173                 m->basename[--len] = '\0';
174
175         m->basename = strrchr(m->basename, '/');
176         if (!m->basename)
177                 errx(1, "I don't expect to be run from the root directory");
178         m->basename++;
179
180         add_files(m, "");
181         return m;
182 }
183
184
185 /**
186  * remove_comments - strip comments from a line, return copy.
187  * @line: line to copy
188  * @in_comment: are we already within a comment (from prev line).
189  * @unterminated: are we still in a comment for next line.
190  */
191 static char *remove_comments(const char *line, bool in_comment,
192                              bool *unterminated)
193 {
194         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
195
196         p = ret;
197         for (;;) {
198                 if (!in_comment) {
199                         /* Find first comment. */
200                         const char *old_comment = strstr(line, "/*");
201                         const char *new_comment = strstr(line, "//");
202                         const char *comment;
203
204                         if (new_comment && old_comment)
205                                 comment = new_comment < old_comment
206                                         ? new_comment : old_comment;
207                         else if (old_comment)
208                                 comment = old_comment;
209                         else if (new_comment)
210                                 comment = new_comment;
211                         else {
212                                 /* Nothing more. */
213                                 strcpy(p, line);
214                                 *unterminated = false;
215                                 break;
216                         }
217
218                         /* Copy up to comment. */
219                         memcpy(p, line, comment - line);
220                         p += comment - line;
221                         line += comment - line + 2;
222
223                         if (comment == new_comment) {
224                                 /* We're done: goes to EOL. */
225                                 p[0] = '\0';
226                                 *unterminated = false;
227                                 break;
228                         }
229                         in_comment = true;
230                 }
231
232                 if (in_comment) {
233                         const char *end = strstr(line, "*/");
234                         if (!end) {
235                                 *unterminated = true;
236                                 p[0] = '\0';
237                                 break;
238                         }
239                         line = end+2;
240                         in_comment = false;
241                 }
242         }
243         return ret;
244 }
245
246 static bool is_empty(const char *line)
247 {
248         return strspn(line, " \t") == strlen(line);
249 }
250
251 static bool continues(const char *line)
252 {
253         /* Technically, any odd number of these.  But who cares? */
254         return strends(line, "\\");
255 }
256
257 /* Get token if it's equal to token. */
258 bool get_token(const char **line, const char *token)
259 {
260         unsigned int toklen;
261
262         *line += strspn(*line, " \t");
263         if (isalnum(token[0]) || token[0] == '_')
264                 toklen = strspn(*line, IDENT_CHARS);
265         else {
266                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
267                 toklen = strlen(token);
268         }
269
270         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
271                 *line += toklen;
272                 return true;
273         }
274         return false;
275 }
276
277 char *get_symbol_token(void *ctx, const char **line)
278 {
279         unsigned int toklen;
280         char *ret;
281
282         *line += strspn(*line, " \t");
283         toklen = strspn(*line, IDENT_CHARS);
284         if (!toklen)
285                 return NULL;
286         ret = talloc_strndup(ctx, *line, toklen);
287         *line += toklen;
288         return ret;
289 }
290
291 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
292 {
293         bool brackets, defined;
294
295         cond->inverse = get_token(line, "!");
296         defined = get_token(line, "defined");
297         brackets = get_token(line, "(");
298         cond->symbol = get_symbol_token(cond, line);
299         if (!cond->symbol)
300                 return false;
301         if (brackets && !get_token(line, ")"))
302                 return false;
303         if (!defined)
304                 cond->type = PP_COND_IF;
305         return true;
306 }
307
308 /* FIXME: Get serious! */
309 static struct pp_conditions *analyze_directive(struct ccan_file *f,
310                                                const char *line,
311                                                struct pp_conditions *parent)
312 {
313         struct pp_conditions *cond = talloc(f, struct pp_conditions);
314         bool unused;
315
316         line = remove_comments(line, false, &unused);
317
318         cond->parent = parent;
319         cond->type = PP_COND_IFDEF;
320
321         if (!get_token(&line, "#"))
322                 abort();
323
324         if (get_token(&line, "if")) {
325                 if (!parse_hash_if(cond, &line))
326                         goto unknown;
327         } else if (get_token(&line, "elif")) {
328                 /* Malformed? */
329                 if (!parent)
330                         return NULL;
331                 cond->parent = parent->parent;
332                 /* FIXME: Not quite true.  This implies !parent, but we don't
333                  * do multiple conditionals yet. */
334                 if (!parse_hash_if(cond, &line))
335                         goto unknown;
336         } else if (get_token(&line, "ifdef")) {
337                 bool brackets;
338                 cond->inverse = false;
339                 brackets = get_token(&line, "(");
340                 cond->symbol = get_symbol_token(cond, &line);
341                 if (!cond->symbol)
342                         goto unknown;
343                 if (brackets && !get_token(&line, ")"))
344                         goto unknown;
345         } else if (get_token(&line, "ifndef")) {
346                 bool brackets;
347                 cond->inverse = true;
348                 brackets = get_token(&line, "(");
349                 cond->symbol = get_symbol_token(cond, &line);
350                 if (!cond->symbol)
351                         goto unknown;
352                 if (brackets && !get_token(&line, ")"))
353                         goto unknown;
354         } else if (get_token(&line, "else")) {
355                 /* Malformed? */
356                 if (!parent)
357                         return NULL;
358
359                 *cond = *parent;
360                 cond->inverse = !cond->inverse;
361                 return cond;
362         } else if (get_token(&line, "endif")) {
363                 talloc_free(cond);
364                 /* Malformed? */
365                 if (!parent)
366                         return NULL;
367                 /* Back up one! */
368                 return parent->parent;
369         } else {
370                 /* Not a conditional. */
371                 talloc_free(cond);
372                 return parent;
373         }
374
375         if (!is_empty(line))
376                 goto unknown;
377         return cond;
378
379 unknown:
380         cond->type = PP_COND_UNKNOWN;
381         return cond;
382 }
383
384 /* This parser is rough, but OK if code is reasonably neat. */
385 struct line_info *get_ccan_line_info(struct ccan_file *f)
386 {
387         bool continued = false, in_comment = false;
388         struct pp_conditions *cond = NULL;
389         unsigned int i;
390
391         if (f->line_info)
392                 return f->line_info;
393
394         get_ccan_file_lines(f);
395         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
396
397         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
398                 char *p;
399                 bool still_doc_line;
400
401                 /* Current conditions apply to this line. */
402                 f->line_info[i].cond = cond;
403                 f->line_info[i].continued = continued;
404
405                 if (continued) {
406                         /* Same as last line. */
407                         f->line_info[i].type = f->line_info[i-1].type;
408                         /* Update in_comment. */
409                         remove_comments(f->lines[i], in_comment, &in_comment);
410                         continue;
411                 }
412
413                 /* Preprocessor directive? */
414                 if (!in_comment
415                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
416                         f->line_info[i].type = PREPROC_LINE;
417                         cond = analyze_directive(f, f->lines[i], cond);
418                         continue;
419                 }
420
421                 still_doc_line = (in_comment
422                                   && f->line_info[i-1].type == DOC_LINE);
423
424                 p = remove_comments(f->lines[i], in_comment, &in_comment);
425                 if (is_empty(p)) {
426                         if (strstarts(f->lines[i], "/**") || still_doc_line)
427                                 f->line_info[i].type = DOC_LINE;
428                         else
429                                 f->line_info[i].type = COMMENT_LINE;
430                 } else
431                         f->line_info[i].type = CODE_LINE;
432                 talloc_free(p);
433         }
434         return f->line_info;
435 }
436
437 struct symbol {
438         struct list_node list;
439         const char *name;
440         const unsigned int *value;
441 };
442
443 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
444 {
445         struct symbol *i;
446
447         list_for_each(syms, i, list)
448                 if (streq(sym, i->name))
449                         return i;
450         return NULL;
451 }
452
453 static enum line_compiled get_pp(struct pp_conditions *cond,
454                                  struct list_head *syms)
455 {
456         struct symbol *sym;
457         unsigned int val;
458         enum line_compiled parent, ret;
459
460         /* No conditions?  Easy. */
461         if (!cond)
462                 return COMPILED;
463
464         /* Check we get here at all. */
465         parent = get_pp(cond->parent, syms);
466         if (parent == NOT_COMPILED)
467                 return NOT_COMPILED;
468
469         if (cond->type == PP_COND_UNKNOWN)
470                 return MAYBE_COMPILED;
471
472         sym = find_symbol(syms, cond->symbol);
473         if (!sym)
474                 return MAYBE_COMPILED;
475
476         switch (cond->type) {
477         case PP_COND_IF:
478                 /* Undefined is 0. */
479                 val = sym->value ? *sym->value : 0;
480                 if (!val == cond->inverse)
481                         ret = COMPILED;
482                 else
483                         ret = NOT_COMPILED;
484                 break;
485
486         case PP_COND_IFDEF:
487                 if (cond->inverse == !sym->value)
488                         ret = COMPILED;
489                 else
490                         ret = NOT_COMPILED;
491                 break;
492
493         default:
494                 abort();
495         }
496
497         /* If parent didn't know, NO == NO, but YES == MAYBE. */
498         if (parent == MAYBE_COMPILED && ret == COMPILED)
499                 ret = MAYBE_COMPILED;
500         return ret;
501 }
502
503 static void add_symbol(struct list_head *head,
504                        const char *symbol, const unsigned int *value)
505 {
506         struct symbol *sym = talloc(head, struct symbol);
507         sym->name = symbol;
508         sym->value = value;
509         list_add(head, &sym->list);
510 }
511         
512 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
513                                     const char *symbol,
514                                     const unsigned int *value,
515                                     ...)
516 {
517         enum line_compiled ret;
518         struct list_head *head;
519         va_list ap;
520
521         head = talloc(NULL, struct list_head);
522         list_head_init(head);
523
524         va_start(ap, value);
525         add_symbol(head, symbol, value);
526
527         while ((symbol = va_arg(ap, const char *)) != NULL) {
528                 value = va_arg(ap, const unsigned int *);
529                 add_symbol(head, symbol, value);
530         }
531         ret = get_pp(cond, head);
532         talloc_free(head);
533         return ret;
534 }
535