d915b7ed8f59b131851621e9078410af93784fc1
[ccan] / tools / ccanlint / file_analysis.c
1 #include "config.h"
2 #include "ccanlint.h"
3 #include <ccan/talloc/talloc.h>
4 #include <ccan/str/str.h>
5 #include <ccan/str_talloc/str_talloc.h>
6 #include <ccan/talloc_link/talloc_link.h>
7 #include <ccan/hash/hash.h>
8 #include <ccan/htable/htable_type.h>
9 #include <ccan/grab_file/grab_file.h>
10 #include <ccan/noerr/noerr.h>
11 #include <ccan/foreach/foreach.h>
12 #include <ccan/asort/asort.h>
13 #include <ccan/array_size/array_size.h>
14 #include "../tools.h"
15 #include <unistd.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <fcntl.h>
19 #include <err.h>
20 #include <errno.h>
21 #include <dirent.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <assert.h>
25
26 struct list_head *get_ccan_file_docs(struct ccan_file *f)
27 {
28         if (!f->doc_sections) {
29                 get_ccan_file_lines(f);
30                 f->doc_sections = extract_doc_sections(f->lines);
31         }
32         return f->doc_sections;
33 }
34
35
36 /**
37  * remove_comments - strip comments from a line, return copy.
38  * @line: line to copy
39  * @in_comment: are we already within a comment (from prev line).
40  * @unterminated: are we still in a comment for next line.
41  */
42 static char *remove_comments(const char *line, bool in_comment,
43                              bool *unterminated)
44 {
45         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
46
47         p = ret;
48         for (;;) {
49                 if (!in_comment) {
50                         /* Find first comment. */
51                         const char *old_comment = strstr(line, "/*");
52                         const char *new_comment = strstr(line, "//");
53                         const char *comment;
54
55                         if (new_comment && old_comment)
56                                 comment = new_comment < old_comment
57                                         ? new_comment : old_comment;
58                         else if (old_comment)
59                                 comment = old_comment;
60                         else if (new_comment)
61                                 comment = new_comment;
62                         else {
63                                 /* Nothing more. */
64                                 strcpy(p, line);
65                                 *unterminated = false;
66                                 break;
67                         }
68
69                         /* Copy up to comment. */
70                         memcpy(p, line, comment - line);
71                         p += comment - line;
72                         line += comment - line + 2;
73
74                         if (comment == new_comment) {
75                                 /* We're done: goes to EOL. */
76                                 p[0] = '\0';
77                                 *unterminated = false;
78                                 break;
79                         }
80                         in_comment = true;
81                 }
82
83                 if (in_comment) {
84                         const char *end = strstr(line, "*/");
85                         if (!end) {
86                                 *unterminated = true;
87                                 p[0] = '\0';
88                                 break;
89                         }
90                         line = end+2;
91                         in_comment = false;
92                 }
93         }
94         return ret;
95 }
96
97 static bool is_empty(const char *line)
98 {
99         return strspn(line, " \r\t") == strlen(line);
100 }
101
102 static bool continues(const char *line)
103 {
104         /* Technically, any odd number of these.  But who cares? */
105         return strends(line, "\\");
106 }
107
108 /* Get token if it's equal to token. */
109 bool get_token(const char **line, const char *token)
110 {
111         unsigned int toklen;
112
113         *line += strspn(*line, " \t");
114         if (cisalnum(token[0]) || token[0] == '_')
115                 toklen = strspn(*line, IDENT_CHARS);
116         else {
117                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
118                 toklen = strlen(token);
119         }
120
121         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
122                 *line += toklen;
123                 return true;
124         }
125         return false;
126 }
127
128 char *get_symbol_token(void *ctx, const char **line)
129 {
130         unsigned int toklen;
131         char *ret;
132
133         *line += strspn(*line, " \t");
134         toklen = strspn(*line, IDENT_CHARS);
135         if (!toklen)
136                 return NULL;
137         ret = talloc_strndup(ctx, *line, toklen);
138         *line += toklen;
139         return ret;
140 }
141
142 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
143 {
144         bool brackets, defined;
145
146         cond->inverse = get_token(line, "!");
147         defined = get_token(line, "defined");
148         brackets = get_token(line, "(");
149         cond->symbol = get_symbol_token(cond, line);
150         if (!cond->symbol)
151                 return false;
152         if (brackets && !get_token(line, ")"))
153                 return false;
154         if (!defined)
155                 cond->type = PP_COND_IF;
156
157         /* FIXME: We just chain them, ignoring operators. */
158         if (get_token(line, "||") || get_token(line, "&&")) {
159                 struct pp_conditions *sub = talloc(cond, struct pp_conditions);
160
161                 sub->parent = cond->parent;
162                 sub->type = PP_COND_IFDEF;
163                 if (parse_hash_if(sub, line))
164                         cond->parent = sub;
165         }
166
167         return true;
168 }
169
170 /* FIXME: Get serious! */
171 static struct pp_conditions *analyze_directive(struct ccan_file *f,
172                                                const char *line,
173                                                struct pp_conditions *parent)
174 {
175         struct pp_conditions *cond = talloc(f, struct pp_conditions);
176         bool unused;
177
178         line = remove_comments(line, false, &unused);
179
180         cond->parent = parent;
181         cond->type = PP_COND_IFDEF;
182
183         if (!get_token(&line, "#"))
184                 abort();
185
186         if (get_token(&line, "if")) {
187                 if (!parse_hash_if(cond, &line))
188                         goto unknown;
189         } else if (get_token(&line, "elif")) {
190                 /* Malformed? */
191                 if (!parent)
192                         return NULL;
193                 cond->parent = parent->parent;
194                 /* FIXME: Not quite true.  This implies !parent, but we don't
195                  * do multiple conditionals yet. */
196                 if (!parse_hash_if(cond, &line))
197                         goto unknown;
198         } else if (get_token(&line, "ifdef")) {
199                 bool brackets;
200                 cond->inverse = false;
201                 brackets = get_token(&line, "(");
202                 cond->symbol = get_symbol_token(cond, &line);
203                 if (!cond->symbol)
204                         goto unknown;
205                 if (brackets && !get_token(&line, ")"))
206                         goto unknown;
207         } else if (get_token(&line, "ifndef")) {
208                 bool brackets;
209                 cond->inverse = true;
210                 brackets = get_token(&line, "(");
211                 cond->symbol = get_symbol_token(cond, &line);
212                 if (!cond->symbol)
213                         goto unknown;
214                 if (brackets && !get_token(&line, ")"))
215                         goto unknown;
216         } else if (get_token(&line, "else")) {
217                 /* Malformed? */
218                 if (!parent)
219                         return NULL;
220
221                 *cond = *parent;
222                 cond->inverse = !cond->inverse;
223                 return cond;
224         } else if (get_token(&line, "endif")) {
225                 talloc_free(cond);
226                 /* Malformed? */
227                 if (!parent)
228                         return NULL;
229                 /* Back up one! */
230                 return parent->parent;
231         } else {
232                 /* Not a conditional. */
233                 talloc_free(cond);
234                 return parent;
235         }
236
237         if (!is_empty(line))
238                 goto unknown;
239         return cond;
240
241 unknown:
242         cond->type = PP_COND_UNKNOWN;
243         return cond;
244 }
245
246 /* This parser is rough, but OK if code is reasonably neat. */
247 struct line_info *get_ccan_line_info(struct ccan_file *f)
248 {
249         bool continued = false, in_comment = false;
250         struct pp_conditions *cond = NULL;
251         unsigned int i;
252
253         if (f->line_info)
254                 return f->line_info;
255
256         get_ccan_file_lines(f);
257         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
258
259         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
260                 char *p;
261                 bool still_doc_line;
262
263                 /* Current conditions apply to this line. */
264                 f->line_info[i].cond = cond;
265                 f->line_info[i].continued = continued;
266
267                 if (continued) {
268                         /* Same as last line. */
269                         f->line_info[i].type = f->line_info[i-1].type;
270                         /* Update in_comment. */
271                         remove_comments(f->lines[i], in_comment, &in_comment);
272                         continue;
273                 }
274
275                 /* Preprocessor directive? */
276                 if (!in_comment
277                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
278                         f->line_info[i].type = PREPROC_LINE;
279                         cond = analyze_directive(f, f->lines[i], cond);
280                         continue;
281                 }
282
283                 still_doc_line = (in_comment
284                                   && f->line_info[i-1].type == DOC_LINE);
285
286                 p = remove_comments(f->lines[i], in_comment, &in_comment);
287                 if (is_empty(p)) {
288                         if (strstarts(f->lines[i], "/**") || still_doc_line)
289                                 f->line_info[i].type = DOC_LINE;
290                         else
291                                 f->line_info[i].type = COMMENT_LINE;
292                 } else
293                         f->line_info[i].type = CODE_LINE;
294                 talloc_free(p);
295         }
296         return f->line_info;
297 }
298
299 struct symbol {
300         struct list_node list;
301         const char *name;
302         const unsigned int *value;
303 };
304
305 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
306 {
307         struct symbol *i;
308
309         list_for_each(syms, i, list)
310                 if (streq(sym, i->name))
311                         return i;
312         return NULL;
313 }
314
315 static enum line_compiled get_pp(struct pp_conditions *cond,
316                                  struct list_head *syms)
317 {
318         struct symbol *sym;
319         unsigned int val;
320         enum line_compiled parent, ret;
321
322         /* No conditions?  Easy. */
323         if (!cond)
324                 return COMPILED;
325
326         /* Check we get here at all. */
327         parent = get_pp(cond->parent, syms);
328         if (parent == NOT_COMPILED)
329                 return NOT_COMPILED;
330
331         if (cond->type == PP_COND_UNKNOWN)
332                 return MAYBE_COMPILED;
333
334         sym = find_symbol(syms, cond->symbol);
335         if (!sym)
336                 return MAYBE_COMPILED;
337
338         switch (cond->type) {
339         case PP_COND_IF:
340                 /* Undefined is 0. */
341                 val = sym->value ? *sym->value : 0;
342                 if (!val == cond->inverse)
343                         ret = COMPILED;
344                 else
345                         ret = NOT_COMPILED;
346                 break;
347
348         case PP_COND_IFDEF:
349                 if (cond->inverse == !sym->value)
350                         ret = COMPILED;
351                 else
352                         ret = NOT_COMPILED;
353                 break;
354
355         default:
356                 abort();
357         }
358
359         /* If parent didn't know, NO == NO, but YES == MAYBE. */
360         if (parent == MAYBE_COMPILED && ret == COMPILED)
361                 ret = MAYBE_COMPILED;
362         return ret;
363 }
364
365 static void add_symbol(struct list_head *head,
366                        const char *symbol, const unsigned int *value)
367 {
368         struct symbol *sym = talloc(head, struct symbol);
369         sym->name = symbol;
370         sym->value = value;
371         list_add(head, &sym->list);
372 }
373         
374 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
375                                     const char *symbol,
376                                     const unsigned int *value,
377                                     ...)
378 {
379         enum line_compiled ret;
380         struct list_head *head;
381         va_list ap;
382
383         head = talloc(NULL, struct list_head);
384         list_head_init(head);
385
386         va_start(ap, value);
387         add_symbol(head, symbol, value);
388
389         while ((symbol = va_arg(ap, const char *)) != NULL) {
390                 value = va_arg(ap, const unsigned int *);
391                 add_symbol(head, symbol, value);
392         }
393         ret = get_pp(cond, head);
394         talloc_free(head);
395         return ret;
396 }
397
398 void score_file_error(struct score *score, struct ccan_file *f, unsigned line,
399                       const char *errorfmt, ...)
400 {
401         va_list ap;
402
403         struct file_error *fe = talloc(score, struct file_error);
404         fe->file = f;
405         fe->line = line;
406         list_add_tail(&score->per_file_errors, &fe->list);
407
408         if (!score->error)
409                 score->error = talloc_strdup(score, "");
410         
411         if (verbose < 2 && strcount(score->error, "\n") > 5)
412                 return;
413
414         if (line)
415                 score->error = talloc_asprintf_append(score->error,
416                                                       "%s:%u:",
417                                                       f->fullname, line);
418         else
419                 score->error = talloc_asprintf_append(score->error,
420                                                       "%s:", f->fullname);
421
422         va_start(ap, errorfmt);
423         score->error = talloc_vasprintf_append(score->error, errorfmt, ap);
424         va_end(ap);
425         score->error = talloc_append_string(score->error, "\n");
426
427         if (verbose < 2 && strcount(score->error, "\n") > 5)
428                 score->error = talloc_append_string(score->error,
429                                     "... more (use -vv to see them all)\n");
430 }