]> git.ozlabs.org Git - ccan/blob - tools/ccanlint/file_analysis.c
2ede1f5b8cea7a1d59216e54266c74b2f1eb9fbc
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include "../tools.h"
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <err.h>
13 #include <errno.h>
14 #include <dirent.h>
15 #include <ctype.h>
16
17 char **get_ccan_file_lines(struct ccan_file *f)
18 {
19         if (!f->lines) {
20                 char *buffer = grab_file(f, f->name, NULL);
21                 if (!buffer)
22                         err(1, "Getting file %s", f->name);
23                 f->lines = strsplit(f, buffer, "\n", &f->num_lines);
24         }
25         return f->lines;
26 }
27
28 struct list_head *get_ccan_file_docs(struct ccan_file *f)
29 {
30         if (!f->doc_sections) {
31                 get_ccan_file_lines(f);
32                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
33         }
34         return f->doc_sections;
35 }
36
37 static void add_files(struct manifest *m, const char *dir)
38 {
39         DIR *d;
40         struct dirent *ent;
41
42         if (dir[0])
43                 d = opendir(dir);
44         else
45                 d = opendir(".");
46         if (!d)
47                 err(1, "Opening directory %s", dir[0] ? dir : ".");
48
49         while ((ent = readdir(d)) != NULL) {
50                 struct stat st;
51                 struct ccan_file *f;
52                 struct list_head *dest;
53                 bool is_c_src;
54
55                 if (ent->d_name[0] == '.')
56                         continue;
57
58                 f = talloc(m, struct ccan_file);
59                 f->lines = NULL;
60                 f->doc_sections = NULL;
61                 f->name = talloc_asprintf(f, "%s%s", dir, ent->d_name);
62                 if (lstat(f->name, &st) != 0)
63                         err(1, "lstat %s", f->name);
64
65                 if (S_ISDIR(st.st_mode)) {
66                         f->name = talloc_append_string(f->name, "/");
67                         add_files(m, f->name);
68                         continue;
69                 }
70                 if (!S_ISREG(st.st_mode)) {
71                         talloc_free(f);
72                         continue;
73                 }
74
75                 if (streq(f->name, "_info.c")) {
76                         m->info_file = f;
77                         continue;
78                 }
79
80                 is_c_src = strends(f->name, ".c");
81                 if (!is_c_src && !strends(f->name, ".h"))
82                         dest = &m->other_files;
83                 else if (!strchr(f->name, '/')) {
84                         if (is_c_src)
85                                 dest = &m->c_files;
86                         else
87                                 dest = &m->h_files;
88                 } else if (strstarts(f->name, "test/")) {
89                         if (is_c_src) {
90                                 if (strstarts(f->name, "test/api"))
91                                         dest = &m->api_tests;
92                                 else if (strstarts(f->name, "test/run"))
93                                         dest = &m->run_tests;
94                                 else if (strstarts(f->name, "test/compile_ok"))
95                                         dest = &m->compile_ok_tests;
96                                 else if (strstarts(f->name, "test/compile_fail"))
97                                         dest = &m->compile_fail_tests;
98                                 else
99                                         dest = &m->other_test_files;
100                         } else
101                                 dest = &m->other_test_files;
102                 } else
103                         dest = &m->other_files;
104
105                 list_add(dest, &f->list);
106         }
107         closedir(d);
108 }
109
110 char *report_on_lines(struct list_head *files,
111                       char *(*report)(const char *),
112                       char *sofar)
113 {
114         struct ccan_file *f;
115
116         list_for_each(files, f, list) {
117                 unsigned int i;
118                 char **lines = get_ccan_file_lines(f);
119
120                 for (i = 0; i < f->num_lines; i++) {
121                         char *r = report(lines[i]);
122                         if (!r)
123                                 continue;
124
125                         sofar = talloc_asprintf_append(sofar,
126                                                        "%s:%u:%s\n",
127                                                        f->name, i+1, r);
128                         talloc_free(r);
129                 }
130         }
131         return sofar;
132 }
133
134 struct manifest *get_manifest(void)
135 {
136         struct manifest *m = talloc(NULL, struct manifest);
137         unsigned int len;
138
139         m->info_file = NULL;
140         list_head_init(&m->c_files);
141         list_head_init(&m->h_files);
142         list_head_init(&m->api_tests);
143         list_head_init(&m->run_tests);
144         list_head_init(&m->compile_ok_tests);
145         list_head_init(&m->compile_fail_tests);
146         list_head_init(&m->other_test_files);
147         list_head_init(&m->other_files);
148
149         /* *This* is why people hate C. */
150         len = 32;
151         m->basename = talloc_array(m, char, len);
152         while (!getcwd(m->basename, len)) {
153                 if (errno != ERANGE)
154                         err(1, "Getting current directory");
155                 m->basename = talloc_realloc(m, m->basename, char, len *= 2);
156         }
157
158         len = strlen(m->basename);
159         while (len && m->basename[len-1] == '/')
160                 m->basename[--len] = '\0';
161
162         m->basename = strrchr(m->basename, '/');
163         if (!m->basename)
164                 errx(1, "I don't expect to be run from the root directory");
165         m->basename++;
166
167         add_files(m, "");
168         return m;
169 }
170
171
172 /**
173  * remove_comments - strip comments from a line, return copy.
174  * @line: line to copy
175  * @in_comment: are we already within a comment (from prev line).
176  * @unterminated: are we still in a comment for next line.
177  */
178 static char *remove_comments(const char *line, bool in_comment,
179                              bool *unterminated)
180 {
181         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
182
183         p = ret;
184         for (;;) {
185                 if (!in_comment) {
186                         /* Find first comment. */
187                         const char *old_comment = strstr(line, "/*");
188                         const char *new_comment = strstr(line, "//");
189                         const char *comment;
190
191                         if (new_comment && old_comment)
192                                 comment = new_comment < old_comment
193                                         ? new_comment : old_comment;
194                         else if (old_comment)
195                                 comment = old_comment;
196                         else if (new_comment)
197                                 comment = new_comment;
198                         else {
199                                 /* Nothing more. */
200                                 strcpy(p, line);
201                                 *unterminated = false;
202                                 break;
203                         }
204
205                         /* Copy up to comment. */
206                         memcpy(p, line, comment - line);
207                         p += comment - line;
208                         line += comment - line + 2;
209
210                         if (comment == new_comment) {
211                                 /* We're done: goes to EOL. */
212                                 p[0] = '\0';
213                                 *unterminated = false;
214                                 break;
215                         }
216                         in_comment = true;
217                 }
218
219                 if (in_comment) {
220                         const char *end = strstr(line, "*/");
221                         if (!end) {
222                                 *unterminated = true;
223                                 p[0] = '\0';
224                                 break;
225                         }
226                         line = end+2;
227                         in_comment = false;
228                 }
229         }
230         return ret;
231 }
232
233 static bool is_empty(const char *line)
234 {
235         return strspn(line, " \t") == strlen(line);
236 }
237
238 static bool continues(const char *line)
239 {
240         /* Technically, any odd number of these.  But who cares? */
241         return strends(line, "\\");
242 }
243
244 /* Get token if it's equal to token. */
245 static bool get_token(const char **line, const char *token)
246 {
247         unsigned int toklen;
248
249         *line += strspn(*line, " \t");
250         if (isalnum(token[0]) || token[0] == '_')
251                 toklen = strspn(*line, IDENT_CHARS);
252         else {
253                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
254                 toklen = strlen(token);
255         }
256
257         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
258                 *line += toklen;
259                 return true;
260         }
261         return false;
262 }
263
264 static char *get_symbol_token(void *ctx, const char **line)
265 {
266         unsigned int toklen;
267         char *ret;
268
269         *line += strspn(*line, " \t");
270         toklen = strspn(*line, IDENT_CHARS);
271         if (!toklen)
272                 return NULL;
273         ret = talloc_strndup(ctx, *line, toklen);
274         *line += toklen;
275         return ret;
276 }
277
278 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
279 {
280         bool brackets, defined;
281
282         cond->inverse = get_token(line, "!");
283         defined = get_token(line, "defined");
284         brackets = get_token(line, "(");
285         cond->symbol = get_symbol_token(cond, line);
286         if (!cond->symbol)
287                 return false;
288         if (brackets && !get_token(line, ")"))
289                 return false;
290         if (!defined)
291                 cond->type = PP_COND_IF;
292         return true;
293 }
294
295 /* FIXME: Get serious! */
296 static struct pp_conditions *analyze_directive(struct ccan_file *f,
297                                                const char *line,
298                                                struct pp_conditions *parent)
299 {
300         struct pp_conditions *cond = talloc(f, struct pp_conditions);
301         bool unused;
302
303         line = remove_comments(line, false, &unused);
304
305         cond->parent = parent;
306         cond->type = PP_COND_IFDEF;
307
308         if (!get_token(&line, "#"))
309                 abort();
310
311         if (get_token(&line, "if")) {
312                 if (!parse_hash_if(cond, &line))
313                         goto unknown;
314         } else if (get_token(&line, "elif")) {
315                 /* Malformed? */
316                 if (!parent)
317                         return NULL;
318                 cond->parent = parent->parent;
319                 /* FIXME: Not quite true.  This implies !parent, but we don't
320                  * do multiple conditionals yet. */
321                 if (!parse_hash_if(cond, &line))
322                         goto unknown;
323         } else if (get_token(&line, "ifdef")) {
324                 bool brackets;
325                 cond->inverse = false;
326                 brackets = get_token(&line, "(");
327                 cond->symbol = get_symbol_token(cond, &line);
328                 if (!cond->symbol)
329                         goto unknown;
330                 if (brackets && !get_token(&line, ")"))
331                         goto unknown;
332         } else if (get_token(&line, "ifndef")) {
333                 bool brackets;
334                 cond->inverse = true;
335                 brackets = get_token(&line, "(");
336                 cond->symbol = get_symbol_token(cond, &line);
337                 if (!cond->symbol)
338                         goto unknown;
339                 if (brackets && !get_token(&line, ")"))
340                         goto unknown;
341         } else if (get_token(&line, "else")) {
342                 /* Malformed? */
343                 if (!parent)
344                         return NULL;
345
346                 *cond = *parent;
347                 cond->inverse = !cond->inverse;
348                 return cond;
349         } else if (get_token(&line, "endif")) {
350                 talloc_free(cond);
351                 /* Malformed? */
352                 if (!parent)
353                         return NULL;
354                 /* Back up one! */
355                 return parent->parent;
356         } else {
357                 /* Not a conditional. */
358                 talloc_free(cond);
359                 return parent;
360         }
361
362         if (!is_empty(line))
363                 goto unknown;
364         return cond;
365
366 unknown:
367         cond->type = PP_COND_UNKNOWN;
368         return cond;
369 }
370
371 /* This parser is rough, but OK if code is reasonably neat. */
372 struct line_info *get_ccan_line_info(struct ccan_file *f)
373 {
374         bool continued = false, in_comment = false;
375         struct pp_conditions *cond = NULL;
376         unsigned int i;
377
378         if (f->line_info)
379                 return f->line_info;
380
381         get_ccan_file_lines(f);
382         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
383
384         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
385                 char *p;
386                 bool still_doc_line;
387
388                 /* Current conditions apply to this line. */
389                 f->line_info[i].cond = cond;
390                 f->line_info[i].continued = continued;
391
392                 if (continued) {
393                         /* Same as last line. */
394                         f->line_info[i].type = f->line_info[i-1].type;
395                         /* Update in_comment. */
396                         remove_comments(f->lines[i], in_comment, &in_comment);
397                         continue;
398                 }
399
400                 /* Preprocessor directive? */
401                 if (!in_comment
402                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
403                         f->line_info[i].type = PREPROC_LINE;
404                         cond = analyze_directive(f, f->lines[i], cond);
405                         continue;
406                 }
407
408                 still_doc_line = (in_comment
409                                   && f->line_info[i-1].type == DOC_LINE);
410
411                 p = remove_comments(f->lines[i], in_comment, &in_comment);
412                 if (is_empty(p)) {
413                         if (strstarts(f->lines[i], "/**") || still_doc_line)
414                                 f->line_info[i].type = DOC_LINE;
415                         else
416                                 f->line_info[i].type = COMMENT_LINE;
417                 } else
418                         f->line_info[i].type = CODE_LINE;
419                 talloc_free(p);
420         }
421         return f->line_info;
422 }
423
424 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
425                                     const char *symbol,
426                                     unsigned int value)
427 {
428         enum line_compiled ret;
429
430         /* No conditions?  Easy. */
431         if (!cond)
432                 return COMPILED;
433
434         /* Check we get here at all. */
435         ret = get_ccan_line_pp(cond->parent, symbol, value);
436         if (ret != COMPILED)
437                 return ret;
438
439         switch (cond->type) {
440         case PP_COND_IF:
441                 if (streq(cond->symbol, symbol)) {
442                         if (!value == cond->inverse)
443                                 return COMPILED;
444                         else
445                                 return NOT_COMPILED;
446                 }
447                 /* Unknown symbol, will be 0. */
448                 if (cond->inverse)
449                         return COMPILED;
450                 return NOT_COMPILED;
451
452         case PP_COND_IFDEF:
453                 if (streq(cond->symbol, symbol)) {
454                         if (cond->inverse)
455                                 return NOT_COMPILED;
456                         else
457                                 return COMPILED;
458                 }
459                 /* Unknown symbol, assume undefined. */
460                 if (cond->inverse)
461                         return COMPILED;
462                 return NOT_COMPILED;
463                 
464         default: /* Unknown. */
465                 return MAYBE_COMPILED;
466         }
467 }