]> git.ozlabs.org Git - ccan/blob - tools/ccanlint/file_analysis.c
dc23eb964b2f5ba3c49bf03a3e52dd3de1311d1b
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include "../tools.h"
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <err.h>
13 #include <errno.h>
14 #include <dirent.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17
18 char **get_ccan_file_lines(struct ccan_file *f)
19 {
20         if (!f->lines) {
21                 char *buffer = grab_file(f, f->name, NULL);
22                 if (!buffer)
23                         err(1, "Getting file %s", f->name);
24                 f->lines = strsplit(f, buffer, "\n", &f->num_lines);
25         }
26         return f->lines;
27 }
28
29 struct list_head *get_ccan_file_docs(struct ccan_file *f)
30 {
31         if (!f->doc_sections) {
32                 get_ccan_file_lines(f);
33                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
34         }
35         return f->doc_sections;
36 }
37
38 static void add_files(struct manifest *m, const char *dir)
39 {
40         DIR *d;
41         struct dirent *ent;
42
43         if (dir[0])
44                 d = opendir(dir);
45         else
46                 d = opendir(".");
47         if (!d)
48                 err(1, "Opening directory %s", dir[0] ? dir : ".");
49
50         while ((ent = readdir(d)) != NULL) {
51                 struct stat st;
52                 struct ccan_file *f;
53                 struct list_head *dest;
54                 bool is_c_src;
55
56                 if (ent->d_name[0] == '.')
57                         continue;
58
59                 f = talloc(m, struct ccan_file);
60                 f->lines = NULL;
61                 f->line_info = NULL;
62                 f->doc_sections = NULL;
63                 f->name = talloc_asprintf(f, "%s%s", dir, ent->d_name);
64                 if (lstat(f->name, &st) != 0)
65                         err(1, "lstat %s", f->name);
66
67                 if (S_ISDIR(st.st_mode)) {
68                         f->name = talloc_append_string(f->name, "/");
69                         add_files(m, f->name);
70                         continue;
71                 }
72                 if (!S_ISREG(st.st_mode)) {
73                         talloc_free(f);
74                         continue;
75                 }
76
77                 if (streq(f->name, "_info.c")) {
78                         m->info_file = f;
79                         continue;
80                 }
81
82                 is_c_src = strends(f->name, ".c");
83                 if (!is_c_src && !strends(f->name, ".h"))
84                         dest = &m->other_files;
85                 else if (!strchr(f->name, '/')) {
86                         if (is_c_src)
87                                 dest = &m->c_files;
88                         else
89                                 dest = &m->h_files;
90                 } else if (strstarts(f->name, "test/")) {
91                         if (is_c_src) {
92                                 if (strstarts(f->name, "test/api"))
93                                         dest = &m->api_tests;
94                                 else if (strstarts(f->name, "test/run"))
95                                         dest = &m->run_tests;
96                                 else if (strstarts(f->name, "test/compile_ok"))
97                                         dest = &m->compile_ok_tests;
98                                 else if (strstarts(f->name, "test/compile_fail"))
99                                         dest = &m->compile_fail_tests;
100                                 else
101                                         dest = &m->other_test_files;
102                         } else
103                                 dest = &m->other_test_files;
104                 } else
105                         dest = &m->other_files;
106
107                 list_add(dest, &f->list);
108         }
109         closedir(d);
110 }
111
112 char *report_on_lines(struct list_head *files,
113                       char *(*report)(const char *),
114                       char *sofar)
115 {
116         struct ccan_file *f;
117
118         list_for_each(files, f, list) {
119                 unsigned int i;
120                 char **lines = get_ccan_file_lines(f);
121
122                 for (i = 0; i < f->num_lines; i++) {
123                         char *r = report(lines[i]);
124                         if (!r)
125                                 continue;
126
127                         sofar = talloc_asprintf_append(sofar,
128                                                        "%s:%u:%s\n",
129                                                        f->name, i+1, r);
130                         talloc_free(r);
131                 }
132         }
133         return sofar;
134 }
135
136 struct manifest *get_manifest(void)
137 {
138         struct manifest *m = talloc(NULL, struct manifest);
139         unsigned int len;
140
141         m->info_file = NULL;
142         list_head_init(&m->c_files);
143         list_head_init(&m->h_files);
144         list_head_init(&m->api_tests);
145         list_head_init(&m->run_tests);
146         list_head_init(&m->compile_ok_tests);
147         list_head_init(&m->compile_fail_tests);
148         list_head_init(&m->other_test_files);
149         list_head_init(&m->other_files);
150
151         /* *This* is why people hate C. */
152         len = 32;
153         m->basename = talloc_array(m, char, len);
154         while (!getcwd(m->basename, len)) {
155                 if (errno != ERANGE)
156                         err(1, "Getting current directory");
157                 m->basename = talloc_realloc(m, m->basename, char, len *= 2);
158         }
159
160         len = strlen(m->basename);
161         while (len && m->basename[len-1] == '/')
162                 m->basename[--len] = '\0';
163
164         m->basename = strrchr(m->basename, '/');
165         if (!m->basename)
166                 errx(1, "I don't expect to be run from the root directory");
167         m->basename++;
168
169         add_files(m, "");
170         return m;
171 }
172
173
174 /**
175  * remove_comments - strip comments from a line, return copy.
176  * @line: line to copy
177  * @in_comment: are we already within a comment (from prev line).
178  * @unterminated: are we still in a comment for next line.
179  */
180 static char *remove_comments(const char *line, bool in_comment,
181                              bool *unterminated)
182 {
183         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
184
185         p = ret;
186         for (;;) {
187                 if (!in_comment) {
188                         /* Find first comment. */
189                         const char *old_comment = strstr(line, "/*");
190                         const char *new_comment = strstr(line, "//");
191                         const char *comment;
192
193                         if (new_comment && old_comment)
194                                 comment = new_comment < old_comment
195                                         ? new_comment : old_comment;
196                         else if (old_comment)
197                                 comment = old_comment;
198                         else if (new_comment)
199                                 comment = new_comment;
200                         else {
201                                 /* Nothing more. */
202                                 strcpy(p, line);
203                                 *unterminated = false;
204                                 break;
205                         }
206
207                         /* Copy up to comment. */
208                         memcpy(p, line, comment - line);
209                         p += comment - line;
210                         line += comment - line + 2;
211
212                         if (comment == new_comment) {
213                                 /* We're done: goes to EOL. */
214                                 p[0] = '\0';
215                                 *unterminated = false;
216                                 break;
217                         }
218                         in_comment = true;
219                 }
220
221                 if (in_comment) {
222                         const char *end = strstr(line, "*/");
223                         if (!end) {
224                                 *unterminated = true;
225                                 p[0] = '\0';
226                                 break;
227                         }
228                         line = end+2;
229                         in_comment = false;
230                 }
231         }
232         return ret;
233 }
234
235 static bool is_empty(const char *line)
236 {
237         return strspn(line, " \t") == strlen(line);
238 }
239
240 static bool continues(const char *line)
241 {
242         /* Technically, any odd number of these.  But who cares? */
243         return strends(line, "\\");
244 }
245
246 /* Get token if it's equal to token. */
247 bool get_token(const char **line, const char *token)
248 {
249         unsigned int toklen;
250
251         *line += strspn(*line, " \t");
252         if (isalnum(token[0]) || token[0] == '_')
253                 toklen = strspn(*line, IDENT_CHARS);
254         else {
255                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
256                 toklen = strlen(token);
257         }
258
259         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
260                 *line += toklen;
261                 return true;
262         }
263         return false;
264 }
265
266 char *get_symbol_token(void *ctx, const char **line)
267 {
268         unsigned int toklen;
269         char *ret;
270
271         *line += strspn(*line, " \t");
272         toklen = strspn(*line, IDENT_CHARS);
273         if (!toklen)
274                 return NULL;
275         ret = talloc_strndup(ctx, *line, toklen);
276         *line += toklen;
277         return ret;
278 }
279
280 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
281 {
282         bool brackets, defined;
283
284         cond->inverse = get_token(line, "!");
285         defined = get_token(line, "defined");
286         brackets = get_token(line, "(");
287         cond->symbol = get_symbol_token(cond, line);
288         if (!cond->symbol)
289                 return false;
290         if (brackets && !get_token(line, ")"))
291                 return false;
292         if (!defined)
293                 cond->type = PP_COND_IF;
294         return true;
295 }
296
297 /* FIXME: Get serious! */
298 static struct pp_conditions *analyze_directive(struct ccan_file *f,
299                                                const char *line,
300                                                struct pp_conditions *parent)
301 {
302         struct pp_conditions *cond = talloc(f, struct pp_conditions);
303         bool unused;
304
305         line = remove_comments(line, false, &unused);
306
307         cond->parent = parent;
308         cond->type = PP_COND_IFDEF;
309
310         if (!get_token(&line, "#"))
311                 abort();
312
313         if (get_token(&line, "if")) {
314                 if (!parse_hash_if(cond, &line))
315                         goto unknown;
316         } else if (get_token(&line, "elif")) {
317                 /* Malformed? */
318                 if (!parent)
319                         return NULL;
320                 cond->parent = parent->parent;
321                 /* FIXME: Not quite true.  This implies !parent, but we don't
322                  * do multiple conditionals yet. */
323                 if (!parse_hash_if(cond, &line))
324                         goto unknown;
325         } else if (get_token(&line, "ifdef")) {
326                 bool brackets;
327                 cond->inverse = false;
328                 brackets = get_token(&line, "(");
329                 cond->symbol = get_symbol_token(cond, &line);
330                 if (!cond->symbol)
331                         goto unknown;
332                 if (brackets && !get_token(&line, ")"))
333                         goto unknown;
334         } else if (get_token(&line, "ifndef")) {
335                 bool brackets;
336                 cond->inverse = true;
337                 brackets = get_token(&line, "(");
338                 cond->symbol = get_symbol_token(cond, &line);
339                 if (!cond->symbol)
340                         goto unknown;
341                 if (brackets && !get_token(&line, ")"))
342                         goto unknown;
343         } else if (get_token(&line, "else")) {
344                 /* Malformed? */
345                 if (!parent)
346                         return NULL;
347
348                 *cond = *parent;
349                 cond->inverse = !cond->inverse;
350                 return cond;
351         } else if (get_token(&line, "endif")) {
352                 talloc_free(cond);
353                 /* Malformed? */
354                 if (!parent)
355                         return NULL;
356                 /* Back up one! */
357                 return parent->parent;
358         } else {
359                 /* Not a conditional. */
360                 talloc_free(cond);
361                 return parent;
362         }
363
364         if (!is_empty(line))
365                 goto unknown;
366         return cond;
367
368 unknown:
369         cond->type = PP_COND_UNKNOWN;
370         return cond;
371 }
372
373 /* This parser is rough, but OK if code is reasonably neat. */
374 struct line_info *get_ccan_line_info(struct ccan_file *f)
375 {
376         bool continued = false, in_comment = false;
377         struct pp_conditions *cond = NULL;
378         unsigned int i;
379
380         if (f->line_info)
381                 return f->line_info;
382
383         get_ccan_file_lines(f);
384         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
385
386         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
387                 char *p;
388                 bool still_doc_line;
389
390                 /* Current conditions apply to this line. */
391                 f->line_info[i].cond = cond;
392                 f->line_info[i].continued = continued;
393
394                 if (continued) {
395                         /* Same as last line. */
396                         f->line_info[i].type = f->line_info[i-1].type;
397                         /* Update in_comment. */
398                         remove_comments(f->lines[i], in_comment, &in_comment);
399                         continue;
400                 }
401
402                 /* Preprocessor directive? */
403                 if (!in_comment
404                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
405                         f->line_info[i].type = PREPROC_LINE;
406                         cond = analyze_directive(f, f->lines[i], cond);
407                         continue;
408                 }
409
410                 still_doc_line = (in_comment
411                                   && f->line_info[i-1].type == DOC_LINE);
412
413                 p = remove_comments(f->lines[i], in_comment, &in_comment);
414                 if (is_empty(p)) {
415                         if (strstarts(f->lines[i], "/**") || still_doc_line)
416                                 f->line_info[i].type = DOC_LINE;
417                         else
418                                 f->line_info[i].type = COMMENT_LINE;
419                 } else
420                         f->line_info[i].type = CODE_LINE;
421                 talloc_free(p);
422         }
423         return f->line_info;
424 }
425
426 struct symbol {
427         struct list_node list;
428         const char *name;
429         const unsigned int *value;
430 };
431
432 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
433 {
434         struct symbol *i;
435
436         list_for_each(syms, i, list)
437                 if (streq(sym, i->name))
438                         return i;
439         return NULL;
440 }
441
442 static enum line_compiled get_pp(struct pp_conditions *cond,
443                                  struct list_head *syms)
444 {
445         struct symbol *sym;
446         unsigned int val;
447         enum line_compiled parent, ret;
448
449         /* No conditions?  Easy. */
450         if (!cond)
451                 return COMPILED;
452
453         /* Check we get here at all. */
454         parent = get_pp(cond->parent, syms);
455         if (parent == NOT_COMPILED)
456                 return NOT_COMPILED;
457
458         if (cond->type == PP_COND_UNKNOWN)
459                 return MAYBE_COMPILED;
460
461         sym = find_symbol(syms, cond->symbol);
462         if (!sym)
463                 return MAYBE_COMPILED;
464
465         switch (cond->type) {
466         case PP_COND_IF:
467                 /* Undefined is 0. */
468                 val = sym->value ? *sym->value : 0;
469                 if (!val == cond->inverse)
470                         ret = COMPILED;
471                 else
472                         ret = NOT_COMPILED;
473                 break;
474
475         case PP_COND_IFDEF:
476                 if (cond->inverse == !sym->value)
477                         ret = COMPILED;
478                 else
479                         ret = NOT_COMPILED;
480                 break;
481
482         default:
483                 abort();
484         }
485
486         /* If parent didn't know, NO == NO, but YES == MAYBE. */
487         if (parent == MAYBE_COMPILED && ret == COMPILED)
488                 ret = MAYBE_COMPILED;
489         return ret;
490 }
491
492 static void add_symbol(struct list_head *head,
493                        const char *symbol, const unsigned int *value)
494 {
495         struct symbol *sym = talloc(head, struct symbol);
496         sym->name = symbol;
497         sym->value = value;
498         list_add(head, &sym->list);
499 }
500         
501 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
502                                     const char *symbol,
503                                     const unsigned int *value,
504                                     ...)
505 {
506         enum line_compiled ret;
507         struct list_head *head;
508         va_list ap;
509
510         head = talloc(NULL, struct list_head);
511         list_head_init(head);
512
513         va_start(ap, value);
514         add_symbol(head, symbol, value);
515
516         while ((symbol = va_arg(ap, const char *)) != NULL) {
517                 value = va_arg(ap, const unsigned int *);
518                 add_symbol(head, symbol, value);
519         }
520         ret = get_pp(cond, head);
521         talloc_free(head);
522         return ret;
523 }
524