]> git.ozlabs.org Git - ccan/blob - tools/ccanlint/file_analysis.c
44ea5c2dab9fd8a2760bc96a25f50a033cae22d9
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include "../tools.h"
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <err.h>
13 #include <errno.h>
14 #include <dirent.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17
18 char **get_ccan_file_lines(struct ccan_file *f)
19 {
20         if (!f->lines)
21                 f->lines = strsplit(f, f->contents, "\n", &f->num_lines);
22
23         return f->lines;
24 }
25
26 struct list_head *get_ccan_file_docs(struct ccan_file *f)
27 {
28         if (!f->doc_sections) {
29                 get_ccan_file_lines(f);
30                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
31         }
32         return f->doc_sections;
33 }
34
35 struct ccan_file *new_ccan_file(const void *ctx, char *name)
36 {
37         struct ccan_file *f;
38
39         f = talloc(ctx, struct ccan_file);
40         f->lines = NULL;
41         f->line_info = NULL;
42         f->doc_sections = NULL;
43         f->name = talloc_steal(f, name);
44         return f;
45 }
46
47 static void add_files(struct manifest *m, const char *dir)
48 {
49         DIR *d;
50         struct dirent *ent;
51
52         if (dir[0])
53                 d = opendir(dir);
54         else
55                 d = opendir(".");
56         if (!d)
57                 err(1, "Opening directory %s", dir[0] ? dir : ".");
58
59         while ((ent = readdir(d)) != NULL) {
60                 struct stat st;
61                 struct ccan_file *f;
62                 struct list_head *dest;
63                 bool is_c_src;
64
65                 if (ent->d_name[0] == '.')
66                         continue;
67
68                 f = new_ccan_file(m, talloc_asprintf(m, "%s%s",
69                                                      dir, ent->d_name));
70                 if (lstat(f->name, &st) != 0)
71                         err(1, "lstat %s", f->name);
72
73                 if (S_ISDIR(st.st_mode)) {
74                         f->name = talloc_append_string(f->name, "/");
75                         add_files(m, f->name);
76                         continue;
77                 }
78                 if (!S_ISREG(st.st_mode)) {
79                         talloc_free(f);
80                         continue;
81                 }
82
83                 if (streq(f->name, "_info")) {
84                         m->info_file = f;
85                         f->contents = grab_file(f, f->name, &f->contents_size);
86                         if (!f->contents)
87                                 err(1, "Reading file %s", f->name);
88                         continue;
89                 }
90
91                 is_c_src = strends(f->name, ".c");
92                 if (!is_c_src && !strends(f->name, ".h")) {
93                         /* We don't pull in contents of non-source files */
94                         dest = &m->other_files;
95                         continue;
96                 }
97
98                 f->contents = grab_file(f, f->name, &f->contents_size);
99                 if (!f->contents)
100                         err(1, "Reading file %s", f->name);
101
102                 if (!strchr(f->name, '/')) {
103                         if (is_c_src)
104                                 dest = &m->c_files;
105                         else
106                                 dest = &m->h_files;
107                 } else if (strstarts(f->name, "test/")) {
108                         if (is_c_src) {
109                                 if (strstarts(f->name, "test/api"))
110                                         dest = &m->api_tests;
111                                 else if (strstarts(f->name, "test/run"))
112                                         dest = &m->run_tests;
113                                 else if (strstarts(f->name, "test/compile_ok"))
114                                         dest = &m->compile_ok_tests;
115                                 else if (strstarts(f->name, "test/compile_fail"))
116                                         dest = &m->compile_fail_tests;
117                                 else
118                                         dest = &m->other_test_files;
119                         } else
120                                 dest = &m->other_test_files;
121                 } else
122                         dest = &m->other_files;
123
124                 list_add(dest, &f->list);
125         }
126         closedir(d);
127 }
128
129 char *report_on_lines(struct list_head *files,
130                       char *(*report)(const char *),
131                       char *sofar)
132 {
133         struct ccan_file *f;
134
135         list_for_each(files, f, list) {
136                 unsigned int i;
137                 char **lines = get_ccan_file_lines(f);
138
139                 for (i = 0; i < f->num_lines; i++) {
140                         char *r = report(lines[i]);
141                         if (!r)
142                                 continue;
143
144                         sofar = talloc_asprintf_append(sofar,
145                                                        "%s:%u:%s\n",
146                                                        f->name, i+1, r);
147                         talloc_free(r);
148                 }
149         }
150         return sofar;
151 }
152
153 struct manifest *get_manifest(const void *ctx)
154 {
155         struct manifest *m = talloc(ctx, struct manifest);
156         unsigned int len;
157
158         m->info_file = NULL;
159         list_head_init(&m->c_files);
160         list_head_init(&m->h_files);
161         list_head_init(&m->api_tests);
162         list_head_init(&m->run_tests);
163         list_head_init(&m->compile_ok_tests);
164         list_head_init(&m->compile_fail_tests);
165         list_head_init(&m->other_test_files);
166         list_head_init(&m->other_files);
167         list_head_init(&m->dep_dirs);
168         list_head_init(&m->dep_objs);
169
170         m->basename = talloc_getcwd(m);
171         if (!m->basename)
172                 err(1, "Getting current directory");
173         len = strlen(m->basename);
174         while (len && m->basename[len-1] == '/')
175                 m->basename[--len] = '\0';
176
177         m->basename = strrchr(m->basename, '/');
178         if (!m->basename)
179                 errx(1, "I don't expect to be run from the root directory");
180         m->basename++;
181
182         add_files(m, "");
183         return m;
184 }
185
186
187 /**
188  * remove_comments - strip comments from a line, return copy.
189  * @line: line to copy
190  * @in_comment: are we already within a comment (from prev line).
191  * @unterminated: are we still in a comment for next line.
192  */
193 static char *remove_comments(const char *line, bool in_comment,
194                              bool *unterminated)
195 {
196         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
197
198         p = ret;
199         for (;;) {
200                 if (!in_comment) {
201                         /* Find first comment. */
202                         const char *old_comment = strstr(line, "/*");
203                         const char *new_comment = strstr(line, "//");
204                         const char *comment;
205
206                         if (new_comment && old_comment)
207                                 comment = new_comment < old_comment
208                                         ? new_comment : old_comment;
209                         else if (old_comment)
210                                 comment = old_comment;
211                         else if (new_comment)
212                                 comment = new_comment;
213                         else {
214                                 /* Nothing more. */
215                                 strcpy(p, line);
216                                 *unterminated = false;
217                                 break;
218                         }
219
220                         /* Copy up to comment. */
221                         memcpy(p, line, comment - line);
222                         p += comment - line;
223                         line += comment - line + 2;
224
225                         if (comment == new_comment) {
226                                 /* We're done: goes to EOL. */
227                                 p[0] = '\0';
228                                 *unterminated = false;
229                                 break;
230                         }
231                         in_comment = true;
232                 }
233
234                 if (in_comment) {
235                         const char *end = strstr(line, "*/");
236                         if (!end) {
237                                 *unterminated = true;
238                                 p[0] = '\0';
239                                 break;
240                         }
241                         line = end+2;
242                         in_comment = false;
243                 }
244         }
245         return ret;
246 }
247
248 static bool is_empty(const char *line)
249 {
250         return strspn(line, " \t") == strlen(line);
251 }
252
253 static bool continues(const char *line)
254 {
255         /* Technically, any odd number of these.  But who cares? */
256         return strends(line, "\\");
257 }
258
259 /* Get token if it's equal to token. */
260 bool get_token(const char **line, const char *token)
261 {
262         unsigned int toklen;
263
264         *line += strspn(*line, " \t");
265         if (isalnum(token[0]) || token[0] == '_')
266                 toklen = strspn(*line, IDENT_CHARS);
267         else {
268                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
269                 toklen = strlen(token);
270         }
271
272         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
273                 *line += toklen;
274                 return true;
275         }
276         return false;
277 }
278
279 char *get_symbol_token(void *ctx, const char **line)
280 {
281         unsigned int toklen;
282         char *ret;
283
284         *line += strspn(*line, " \t");
285         toklen = strspn(*line, IDENT_CHARS);
286         if (!toklen)
287                 return NULL;
288         ret = talloc_strndup(ctx, *line, toklen);
289         *line += toklen;
290         return ret;
291 }
292
293 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
294 {
295         bool brackets, defined;
296
297         cond->inverse = get_token(line, "!");
298         defined = get_token(line, "defined");
299         brackets = get_token(line, "(");
300         cond->symbol = get_symbol_token(cond, line);
301         if (!cond->symbol)
302                 return false;
303         if (brackets && !get_token(line, ")"))
304                 return false;
305         if (!defined)
306                 cond->type = PP_COND_IF;
307         return true;
308 }
309
310 /* FIXME: Get serious! */
311 static struct pp_conditions *analyze_directive(struct ccan_file *f,
312                                                const char *line,
313                                                struct pp_conditions *parent)
314 {
315         struct pp_conditions *cond = talloc(f, struct pp_conditions);
316         bool unused;
317
318         line = remove_comments(line, false, &unused);
319
320         cond->parent = parent;
321         cond->type = PP_COND_IFDEF;
322
323         if (!get_token(&line, "#"))
324                 abort();
325
326         if (get_token(&line, "if")) {
327                 if (!parse_hash_if(cond, &line))
328                         goto unknown;
329         } else if (get_token(&line, "elif")) {
330                 /* Malformed? */
331                 if (!parent)
332                         return NULL;
333                 cond->parent = parent->parent;
334                 /* FIXME: Not quite true.  This implies !parent, but we don't
335                  * do multiple conditionals yet. */
336                 if (!parse_hash_if(cond, &line))
337                         goto unknown;
338         } else if (get_token(&line, "ifdef")) {
339                 bool brackets;
340                 cond->inverse = false;
341                 brackets = get_token(&line, "(");
342                 cond->symbol = get_symbol_token(cond, &line);
343                 if (!cond->symbol)
344                         goto unknown;
345                 if (brackets && !get_token(&line, ")"))
346                         goto unknown;
347         } else if (get_token(&line, "ifndef")) {
348                 bool brackets;
349                 cond->inverse = true;
350                 brackets = get_token(&line, "(");
351                 cond->symbol = get_symbol_token(cond, &line);
352                 if (!cond->symbol)
353                         goto unknown;
354                 if (brackets && !get_token(&line, ")"))
355                         goto unknown;
356         } else if (get_token(&line, "else")) {
357                 /* Malformed? */
358                 if (!parent)
359                         return NULL;
360
361                 *cond = *parent;
362                 cond->inverse = !cond->inverse;
363                 return cond;
364         } else if (get_token(&line, "endif")) {
365                 talloc_free(cond);
366                 /* Malformed? */
367                 if (!parent)
368                         return NULL;
369                 /* Back up one! */
370                 return parent->parent;
371         } else {
372                 /* Not a conditional. */
373                 talloc_free(cond);
374                 return parent;
375         }
376
377         if (!is_empty(line))
378                 goto unknown;
379         return cond;
380
381 unknown:
382         cond->type = PP_COND_UNKNOWN;
383         return cond;
384 }
385
386 /* This parser is rough, but OK if code is reasonably neat. */
387 struct line_info *get_ccan_line_info(struct ccan_file *f)
388 {
389         bool continued = false, in_comment = false;
390         struct pp_conditions *cond = NULL;
391         unsigned int i;
392
393         if (f->line_info)
394                 return f->line_info;
395
396         get_ccan_file_lines(f);
397         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
398
399         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
400                 char *p;
401                 bool still_doc_line;
402
403                 /* Current conditions apply to this line. */
404                 f->line_info[i].cond = cond;
405                 f->line_info[i].continued = continued;
406
407                 if (continued) {
408                         /* Same as last line. */
409                         f->line_info[i].type = f->line_info[i-1].type;
410                         /* Update in_comment. */
411                         remove_comments(f->lines[i], in_comment, &in_comment);
412                         continue;
413                 }
414
415                 /* Preprocessor directive? */
416                 if (!in_comment
417                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
418                         f->line_info[i].type = PREPROC_LINE;
419                         cond = analyze_directive(f, f->lines[i], cond);
420                         continue;
421                 }
422
423                 still_doc_line = (in_comment
424                                   && f->line_info[i-1].type == DOC_LINE);
425
426                 p = remove_comments(f->lines[i], in_comment, &in_comment);
427                 if (is_empty(p)) {
428                         if (strstarts(f->lines[i], "/**") || still_doc_line)
429                                 f->line_info[i].type = DOC_LINE;
430                         else
431                                 f->line_info[i].type = COMMENT_LINE;
432                 } else
433                         f->line_info[i].type = CODE_LINE;
434                 talloc_free(p);
435         }
436         return f->line_info;
437 }
438
439 struct symbol {
440         struct list_node list;
441         const char *name;
442         const unsigned int *value;
443 };
444
445 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
446 {
447         struct symbol *i;
448
449         list_for_each(syms, i, list)
450                 if (streq(sym, i->name))
451                         return i;
452         return NULL;
453 }
454
455 static enum line_compiled get_pp(struct pp_conditions *cond,
456                                  struct list_head *syms)
457 {
458         struct symbol *sym;
459         unsigned int val;
460         enum line_compiled parent, ret;
461
462         /* No conditions?  Easy. */
463         if (!cond)
464                 return COMPILED;
465
466         /* Check we get here at all. */
467         parent = get_pp(cond->parent, syms);
468         if (parent == NOT_COMPILED)
469                 return NOT_COMPILED;
470
471         if (cond->type == PP_COND_UNKNOWN)
472                 return MAYBE_COMPILED;
473
474         sym = find_symbol(syms, cond->symbol);
475         if (!sym)
476                 return MAYBE_COMPILED;
477
478         switch (cond->type) {
479         case PP_COND_IF:
480                 /* Undefined is 0. */
481                 val = sym->value ? *sym->value : 0;
482                 if (!val == cond->inverse)
483                         ret = COMPILED;
484                 else
485                         ret = NOT_COMPILED;
486                 break;
487
488         case PP_COND_IFDEF:
489                 if (cond->inverse == !sym->value)
490                         ret = COMPILED;
491                 else
492                         ret = NOT_COMPILED;
493                 break;
494
495         default:
496                 abort();
497         }
498
499         /* If parent didn't know, NO == NO, but YES == MAYBE. */
500         if (parent == MAYBE_COMPILED && ret == COMPILED)
501                 ret = MAYBE_COMPILED;
502         return ret;
503 }
504
505 static void add_symbol(struct list_head *head,
506                        const char *symbol, const unsigned int *value)
507 {
508         struct symbol *sym = talloc(head, struct symbol);
509         sym->name = symbol;
510         sym->value = value;
511         list_add(head, &sym->list);
512 }
513         
514 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
515                                     const char *symbol,
516                                     const unsigned int *value,
517                                     ...)
518 {
519         enum line_compiled ret;
520         struct list_head *head;
521         va_list ap;
522
523         head = talloc(NULL, struct list_head);
524         list_head_init(head);
525
526         va_start(ap, value);
527         add_symbol(head, symbol, value);
528
529         while ((symbol = va_arg(ap, const char *)) != NULL) {
530                 value = va_arg(ap, const unsigned int *);
531                 add_symbol(head, symbol, value);
532         }
533         ret = get_pp(cond, head);
534         talloc_free(head);
535         return ret;
536 }
537