tools: don't abort on malformed documentation lines.
[ccan] / tools / ccanlint / file_analysis.c
1 #include "config.h"
2 #include "ccanlint.h"
3 #include <ccan/talloc/talloc.h>
4 #include <ccan/str/str.h>
5 #include <ccan/str_talloc/str_talloc.h>
6 #include <ccan/talloc_link/talloc_link.h>
7 #include <ccan/hash/hash.h>
8 #include <ccan/htable/htable_type.h>
9 #include <ccan/grab_file/grab_file.h>
10 #include <ccan/noerr/noerr.h>
11 #include <ccan/foreach/foreach.h>
12 #include <ccan/asort/asort.h>
13 #include <ccan/array_size/array_size.h>
14 #include "../tools.h"
15 #include <unistd.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <fcntl.h>
19 #include <err.h>
20 #include <errno.h>
21 #include <dirent.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <assert.h>
25
26 struct list_head *get_ccan_file_docs(struct ccan_file *f)
27 {
28         if (!f->doc_sections) {
29                 get_ccan_file_lines(f);
30                 f->doc_sections = extract_doc_sections(f->lines, f->name);
31         }
32         return f->doc_sections;
33 }
34
35
36 /**
37  * remove_comments - strip comments from a line, return copy.
38  * @line: line to copy
39  * @in_comment: are we already within a comment (from prev line).
40  * @unterminated: are we still in a comment for next line.
41  */
42 static char *remove_comments(const char *line, bool in_comment,
43                              bool *unterminated)
44 {
45         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
46
47         p = ret;
48         for (;;) {
49                 if (!in_comment) {
50                         /* Find first comment. */
51                         const char *old_comment = strstr(line, "/*");
52                         const char *new_comment = strstr(line, "//");
53                         const char *comment;
54
55                         if (new_comment && old_comment)
56                                 comment = new_comment < old_comment
57                                         ? new_comment : old_comment;
58                         else if (old_comment)
59                                 comment = old_comment;
60                         else if (new_comment)
61                                 comment = new_comment;
62                         else {
63                                 /* Nothing more. */
64                                 strcpy(p, line);
65                                 *unterminated = false;
66                                 break;
67                         }
68
69                         /* Copy up to comment. */
70                         memcpy(p, line, comment - line);
71                         p += comment - line;
72                         line += comment - line + 2;
73
74                         if (comment == new_comment) {
75                                 /* We're done: goes to EOL. */
76                                 p[0] = '\0';
77                                 *unterminated = false;
78                                 break;
79                         }
80                         in_comment = true;
81                 }
82
83                 if (in_comment) {
84                         const char *end = strstr(line, "*/");
85                         if (!end) {
86                                 *unterminated = true;
87                                 p[0] = '\0';
88                                 break;
89                         }
90                         line = end+2;
91                         in_comment = false;
92                 }
93         }
94         return ret;
95 }
96
97 static bool is_empty(const char *line)
98 {
99         return strspn(line, " \r\t") == strlen(line);
100 }
101
102 static bool continues(const char *line)
103 {
104         /* Technically, any odd number of these.  But who cares? */
105         return strends(line, "\\");
106 }
107
108 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
109 {
110         bool brackets, defined;
111
112         cond->inverse = get_token(line, "!");
113         defined = get_token(line, "defined");
114         brackets = get_token(line, "(");
115         cond->symbol = get_symbol_token(cond, line);
116         if (!cond->symbol)
117                 return false;
118         if (brackets && !get_token(line, ")"))
119                 return false;
120         if (!defined)
121                 cond->type = PP_COND_IF;
122
123         /* FIXME: We just chain them, ignoring operators. */
124         if (get_token(line, "||") || get_token(line, "&&")) {
125                 struct pp_conditions *sub = talloc(cond, struct pp_conditions);
126
127                 sub->parent = cond->parent;
128                 sub->type = PP_COND_IFDEF;
129                 if (parse_hash_if(sub, line))
130                         cond->parent = sub;
131         }
132
133         return true;
134 }
135
136 /* FIXME: Get serious! */
137 static struct pp_conditions *analyze_directive(struct ccan_file *f,
138                                                const char *line,
139                                                struct pp_conditions *parent)
140 {
141         struct pp_conditions *cond = talloc(f, struct pp_conditions);
142         bool unused;
143
144         line = remove_comments(line, false, &unused);
145
146         cond->parent = parent;
147         cond->type = PP_COND_IFDEF;
148
149         if (!get_token(&line, "#"))
150                 abort();
151
152         if (get_token(&line, "if")) {
153                 if (!parse_hash_if(cond, &line))
154                         goto unknown;
155         } else if (get_token(&line, "elif")) {
156                 /* Malformed? */
157                 if (!parent)
158                         return NULL;
159                 cond->parent = parent->parent;
160                 /* FIXME: Not quite true.  This implies !parent, but we don't
161                  * do multiple conditionals yet. */
162                 if (!parse_hash_if(cond, &line))
163                         goto unknown;
164         } else if (get_token(&line, "ifdef")) {
165                 bool brackets;
166                 cond->inverse = false;
167                 brackets = get_token(&line, "(");
168                 cond->symbol = get_symbol_token(cond, &line);
169                 if (!cond->symbol)
170                         goto unknown;
171                 if (brackets && !get_token(&line, ")"))
172                         goto unknown;
173         } else if (get_token(&line, "ifndef")) {
174                 bool brackets;
175                 cond->inverse = true;
176                 brackets = get_token(&line, "(");
177                 cond->symbol = get_symbol_token(cond, &line);
178                 if (!cond->symbol)
179                         goto unknown;
180                 if (brackets && !get_token(&line, ")"))
181                         goto unknown;
182         } else if (get_token(&line, "else")) {
183                 /* Malformed? */
184                 if (!parent)
185                         return NULL;
186
187                 *cond = *parent;
188                 cond->inverse = !cond->inverse;
189                 return cond;
190         } else if (get_token(&line, "endif")) {
191                 talloc_free(cond);
192                 /* Malformed? */
193                 if (!parent)
194                         return NULL;
195                 /* Back up one! */
196                 return parent->parent;
197         } else {
198                 /* Not a conditional. */
199                 talloc_free(cond);
200                 return parent;
201         }
202
203         if (!is_empty(line))
204                 goto unknown;
205         return cond;
206
207 unknown:
208         cond->type = PP_COND_UNKNOWN;
209         return cond;
210 }
211
212 /* This parser is rough, but OK if code is reasonably neat. */
213 struct line_info *get_ccan_line_info(struct ccan_file *f)
214 {
215         bool continued = false, in_comment = false;
216         struct pp_conditions *cond = NULL;
217         unsigned int i;
218
219         if (f->line_info)
220                 return f->line_info;
221
222         get_ccan_file_lines(f);
223         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
224
225         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
226                 char *p;
227                 bool still_doc_line;
228
229                 /* Current conditions apply to this line. */
230                 f->line_info[i].cond = cond;
231                 f->line_info[i].continued = continued;
232
233                 if (continued) {
234                         /* Same as last line. */
235                         f->line_info[i].type = f->line_info[i-1].type;
236                         /* Update in_comment. */
237                         remove_comments(f->lines[i], in_comment, &in_comment);
238                         continue;
239                 }
240
241                 /* Preprocessor directive? */
242                 if (!in_comment
243                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
244                         f->line_info[i].type = PREPROC_LINE;
245                         cond = analyze_directive(f, f->lines[i], cond);
246                         continue;
247                 }
248
249                 still_doc_line = (in_comment
250                                   && f->line_info[i-1].type == DOC_LINE);
251
252                 p = remove_comments(f->lines[i], in_comment, &in_comment);
253                 if (is_empty(p)) {
254                         if (strstarts(f->lines[i], "/**") || still_doc_line)
255                                 f->line_info[i].type = DOC_LINE;
256                         else
257                                 f->line_info[i].type = COMMENT_LINE;
258                 } else
259                         f->line_info[i].type = CODE_LINE;
260                 talloc_free(p);
261         }
262         return f->line_info;
263 }
264
265 struct symbol {
266         struct list_node list;
267         const char *name;
268         const unsigned int *value;
269 };
270
271 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
272 {
273         struct symbol *i;
274
275         list_for_each(syms, i, list)
276                 if (streq(sym, i->name))
277                         return i;
278         return NULL;
279 }
280
281 static enum line_compiled get_pp(struct pp_conditions *cond,
282                                  struct list_head *syms)
283 {
284         struct symbol *sym;
285         unsigned int val;
286         enum line_compiled parent, ret;
287
288         /* No conditions?  Easy. */
289         if (!cond)
290                 return COMPILED;
291
292         /* Check we get here at all. */
293         parent = get_pp(cond->parent, syms);
294         if (parent == NOT_COMPILED)
295                 return NOT_COMPILED;
296
297         if (cond->type == PP_COND_UNKNOWN)
298                 return MAYBE_COMPILED;
299
300         sym = find_symbol(syms, cond->symbol);
301         if (!sym)
302                 return MAYBE_COMPILED;
303
304         switch (cond->type) {
305         case PP_COND_IF:
306                 /* Undefined is 0. */
307                 val = sym->value ? *sym->value : 0;
308                 if (!val == cond->inverse)
309                         ret = COMPILED;
310                 else
311                         ret = NOT_COMPILED;
312                 break;
313
314         case PP_COND_IFDEF:
315                 if (cond->inverse == !sym->value)
316                         ret = COMPILED;
317                 else
318                         ret = NOT_COMPILED;
319                 break;
320
321         default:
322                 abort();
323         }
324
325         /* If parent didn't know, NO == NO, but YES == MAYBE. */
326         if (parent == MAYBE_COMPILED && ret == COMPILED)
327                 ret = MAYBE_COMPILED;
328         return ret;
329 }
330
331 static void add_symbol(struct list_head *head,
332                        const char *symbol, const unsigned int *value)
333 {
334         struct symbol *sym = talloc(head, struct symbol);
335         sym->name = symbol;
336         sym->value = value;
337         list_add(head, &sym->list);
338 }
339         
340 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
341                                     const char *symbol,
342                                     const unsigned int *value,
343                                     ...)
344 {
345         enum line_compiled ret;
346         struct list_head *head;
347         va_list ap;
348
349         head = talloc(NULL, struct list_head);
350         list_head_init(head);
351
352         va_start(ap, value);
353         add_symbol(head, symbol, value);
354
355         while ((symbol = va_arg(ap, const char *)) != NULL) {
356                 value = va_arg(ap, const unsigned int *);
357                 add_symbol(head, symbol, value);
358         }
359         ret = get_pp(cond, head);
360         talloc_free(head);
361         return ret;
362 }
363
364 void score_file_error(struct score *score, struct ccan_file *f, unsigned line,
365                       const char *errorfmt, ...)
366 {
367         va_list ap;
368
369         struct file_error *fe = talloc(score, struct file_error);
370         fe->file = f;
371         fe->line = line;
372         list_add_tail(&score->per_file_errors, &fe->list);
373
374         if (!score->error)
375                 score->error = talloc_strdup(score, "");
376         
377         if (verbose < 2 && strcount(score->error, "\n") > 5)
378                 return;
379
380         if (line)
381                 score->error = talloc_asprintf_append(score->error,
382                                                       "%s:%u:",
383                                                       f->fullname, line);
384         else
385                 score->error = talloc_asprintf_append(score->error,
386                                                       "%s:", f->fullname);
387
388         va_start(ap, errorfmt);
389         score->error = talloc_vasprintf_append(score->error, errorfmt, ap);
390         va_end(ap);
391         score->error = talloc_append_string(score->error, "\n");
392
393         if (verbose < 2 && strcount(score->error, "\n") > 5)
394                 score->error = talloc_append_string(score->error,
395                                     "... more (use -vv to see them all)\n");
396 }
397
398 char *get_or_compile_info(const void *ctx, const char *dir)
399 {
400         struct manifest *m = get_manifest(NULL, dir);
401
402         if (!m->info_file->compiled[COMPILE_NORMAL])
403                 m->info_file->compiled[COMPILE_NORMAL] = compile_info(m, dir);
404
405         return m->info_file->compiled[COMPILE_NORMAL];
406 }