htable: restore perfect bit when resizing.
[ccan] / tools / ccanlint / file_analysis.c
1 #include "ccanlint.h"
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <ccan/grab_file/grab_file.h>
6 #include <ccan/noerr/noerr.h>
7 #include "../tools.h"
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <err.h>
13 #include <errno.h>
14 #include <dirent.h>
15 #include <ctype.h>
16 #include <stdarg.h>
17 #include <assert.h>
18
19 const char *ccan_dir;
20
21 const char *get_ccan_file_contents(struct ccan_file *f)
22 {
23         if (!f->contents) {
24                 f->contents = grab_file(f, f->fullname, &f->contents_size);
25                 if (!f->contents)
26                         err(1, "Reading file %s", f->fullname);
27         }
28         return f->contents;
29 }
30
31 char **get_ccan_file_lines(struct ccan_file *f)
32 {
33         if (!f->lines)
34                 f->lines = strsplit(f, get_ccan_file_contents(f),
35                                     "\n", &f->num_lines);
36
37         return f->lines;
38 }
39
40 struct list_head *get_ccan_file_docs(struct ccan_file *f)
41 {
42         if (!f->doc_sections) {
43                 get_ccan_file_lines(f);
44                 f->doc_sections = extract_doc_sections(f->lines, f->num_lines);
45         }
46         return f->doc_sections;
47 }
48
49 struct ccan_file *new_ccan_file(const void *ctx, const char *dir, char *name)
50 {
51         struct ccan_file *f;
52
53         assert(dir[0] == '/');
54
55         f = talloc(ctx, struct ccan_file);
56         f->lines = NULL;
57         f->line_info = NULL;
58         f->doc_sections = NULL;
59         f->compiled = NULL;
60         f->name = talloc_steal(f, name);
61         f->fullname = talloc_asprintf(f, "%s/%s", dir, f->name);
62         f->contents = NULL;
63         f->cov_compiled = NULL;
64         return f;
65 }
66
67 static void add_files(struct manifest *m, const char *dir)
68 {
69         DIR *d;
70         struct dirent *ent;
71
72         if (dir[0])
73                 d = opendir(dir);
74         else
75                 d = opendir(".");
76         if (!d)
77                 err(1, "Opening directory %s", dir[0] ? dir : ".");
78
79         while ((ent = readdir(d)) != NULL) {
80                 struct stat st;
81                 struct ccan_file *f;
82                 struct list_head *dest;
83                 bool is_c_src;
84
85                 if (ent->d_name[0] == '.')
86                         continue;
87
88                 f = new_ccan_file(m, m->dir,
89                                   talloc_asprintf(m, "%s%s",
90                                                   dir, ent->d_name));
91                 if (lstat(f->name, &st) != 0)
92                         err(1, "lstat %s", f->name);
93
94                 if (S_ISDIR(st.st_mode)) {
95                         f->name = talloc_append_string(f->name, "/");
96                         add_files(m, f->name);
97                         continue;
98                 }
99                 if (!S_ISREG(st.st_mode)) {
100                         talloc_free(f);
101                         continue;
102                 }
103
104                 if (streq(f->name, "_info")) {
105                         m->info_file = f;
106                         continue;
107                 }
108
109                 is_c_src = strends(f->name, ".c");
110                 if (!is_c_src && !strends(f->name, ".h")) {
111                         dest = &m->other_files;
112                         continue;
113                 }
114
115                 if (!strchr(f->name, '/')) {
116                         if (is_c_src)
117                                 dest = &m->c_files;
118                         else
119                                 dest = &m->h_files;
120                 } else if (strstarts(f->name, "test/")) {
121                         if (is_c_src) {
122                                 if (strstarts(f->name, "test/api"))
123                                         dest = &m->api_tests;
124                                 else if (strstarts(f->name, "test/run"))
125                                         dest = &m->run_tests;
126                                 else if (strstarts(f->name, "test/compile_ok"))
127                                         dest = &m->compile_ok_tests;
128                                 else if (strstarts(f->name, "test/compile_fail"))
129                                         dest = &m->compile_fail_tests;
130                                 else
131                                         dest = &m->other_test_c_files;
132                         } else
133                                 dest = &m->other_test_files;
134                 } else
135                         dest = &m->other_files;
136
137                 list_add(dest, &f->list);
138         }
139         closedir(d);
140 }
141
142 char *report_on_lines(struct list_head *files,
143                       char *(*report)(const char *),
144                       char *sofar)
145 {
146         struct ccan_file *f;
147
148         list_for_each(files, f, list) {
149                 unsigned int i;
150                 char **lines = get_ccan_file_lines(f);
151
152                 for (i = 0; i < f->num_lines; i++) {
153                         char *r = report(lines[i]);
154                         if (!r)
155                                 continue;
156
157                         sofar = talloc_asprintf_append(sofar,
158                                                        "%s:%u:%s\n",
159                                                        f->name, i+1, r);
160                         talloc_free(r);
161                 }
162         }
163         return sofar;
164 }
165
166 struct manifest *get_manifest(const void *ctx, const char *dir)
167 {
168         struct manifest *m = talloc(ctx, struct manifest);
169         char *olddir;
170         unsigned int len;
171
172         m->info_file = NULL;
173         list_head_init(&m->c_files);
174         list_head_init(&m->h_files);
175         list_head_init(&m->api_tests);
176         list_head_init(&m->run_tests);
177         list_head_init(&m->compile_ok_tests);
178         list_head_init(&m->compile_fail_tests);
179         list_head_init(&m->other_test_c_files);
180         list_head_init(&m->other_test_files);
181         list_head_init(&m->other_files);
182         list_head_init(&m->examples);
183         list_head_init(&m->mangled_examples);
184         list_head_init(&m->dep_dirs);
185
186         olddir = talloc_getcwd(NULL);
187         if (!olddir)
188                 err(1, "Getting current directory");
189
190         if (chdir(dir) != 0)
191                 err(1, "Failed to chdir to %s", dir);
192
193         m->dir = talloc_getcwd(m);
194         if (!m->dir)
195                 err(1, "Getting current directory");
196
197         len = strlen(m->dir);
198         while (len && m->dir[len-1] == '/')
199                 m->dir[--len] = '\0';
200
201         m->basename = strrchr(m->dir, '/');
202         if (!m->basename)
203                 errx(1, "I don't expect to be run from the root directory");
204         m->basename++;
205
206         /* We expect the ccan dir to be two levels above module dir. */
207         if (!ccan_dir) {
208                 char *p;
209                 ccan_dir = talloc_strdup(NULL, m->dir);
210                 p = strrchr(ccan_dir, '/');
211                 *p = '\0';
212                 p = strrchr(ccan_dir, '/');
213                 *p = '\0';
214         }
215
216         add_files(m, "");
217
218         if (chdir(olddir) != 0)
219                 err(1, "Returning to original directory '%s'", olddir);
220         talloc_free(olddir);
221
222         return m;
223 }
224
225
226 /**
227  * remove_comments - strip comments from a line, return copy.
228  * @line: line to copy
229  * @in_comment: are we already within a comment (from prev line).
230  * @unterminated: are we still in a comment for next line.
231  */
232 static char *remove_comments(const char *line, bool in_comment,
233                              bool *unterminated)
234 {
235         char *p, *ret = talloc_array(line, char, strlen(line) + 1);
236
237         p = ret;
238         for (;;) {
239                 if (!in_comment) {
240                         /* Find first comment. */
241                         const char *old_comment = strstr(line, "/*");
242                         const char *new_comment = strstr(line, "//");
243                         const char *comment;
244
245                         if (new_comment && old_comment)
246                                 comment = new_comment < old_comment
247                                         ? new_comment : old_comment;
248                         else if (old_comment)
249                                 comment = old_comment;
250                         else if (new_comment)
251                                 comment = new_comment;
252                         else {
253                                 /* Nothing more. */
254                                 strcpy(p, line);
255                                 *unterminated = false;
256                                 break;
257                         }
258
259                         /* Copy up to comment. */
260                         memcpy(p, line, comment - line);
261                         p += comment - line;
262                         line += comment - line + 2;
263
264                         if (comment == new_comment) {
265                                 /* We're done: goes to EOL. */
266                                 p[0] = '\0';
267                                 *unterminated = false;
268                                 break;
269                         }
270                         in_comment = true;
271                 }
272
273                 if (in_comment) {
274                         const char *end = strstr(line, "*/");
275                         if (!end) {
276                                 *unterminated = true;
277                                 p[0] = '\0';
278                                 break;
279                         }
280                         line = end+2;
281                         in_comment = false;
282                 }
283         }
284         return ret;
285 }
286
287 static bool is_empty(const char *line)
288 {
289         return strspn(line, " \t") == strlen(line);
290 }
291
292 static bool continues(const char *line)
293 {
294         /* Technically, any odd number of these.  But who cares? */
295         return strends(line, "\\");
296 }
297
298 /* Get token if it's equal to token. */
299 bool get_token(const char **line, const char *token)
300 {
301         unsigned int toklen;
302
303         *line += strspn(*line, " \t");
304         if (isalnum(token[0]) || token[0] == '_')
305                 toklen = strspn(*line, IDENT_CHARS);
306         else {
307                 /* FIXME: real tokenizer handles ++ and other multi-chars.  */
308                 toklen = strlen(token);
309         }
310
311         if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
312                 *line += toklen;
313                 return true;
314         }
315         return false;
316 }
317
318 char *get_symbol_token(void *ctx, const char **line)
319 {
320         unsigned int toklen;
321         char *ret;
322
323         *line += strspn(*line, " \t");
324         toklen = strspn(*line, IDENT_CHARS);
325         if (!toklen)
326                 return NULL;
327         ret = talloc_strndup(ctx, *line, toklen);
328         *line += toklen;
329         return ret;
330 }
331
332 static bool parse_hash_if(struct pp_conditions *cond, const char **line)
333 {
334         bool brackets, defined;
335
336         cond->inverse = get_token(line, "!");
337         defined = get_token(line, "defined");
338         brackets = get_token(line, "(");
339         cond->symbol = get_symbol_token(cond, line);
340         if (!cond->symbol)
341                 return false;
342         if (brackets && !get_token(line, ")"))
343                 return false;
344         if (!defined)
345                 cond->type = PP_COND_IF;
346         return true;
347 }
348
349 /* FIXME: Get serious! */
350 static struct pp_conditions *analyze_directive(struct ccan_file *f,
351                                                const char *line,
352                                                struct pp_conditions *parent)
353 {
354         struct pp_conditions *cond = talloc(f, struct pp_conditions);
355         bool unused;
356
357         line = remove_comments(line, false, &unused);
358
359         cond->parent = parent;
360         cond->type = PP_COND_IFDEF;
361
362         if (!get_token(&line, "#"))
363                 abort();
364
365         if (get_token(&line, "if")) {
366                 if (!parse_hash_if(cond, &line))
367                         goto unknown;
368         } else if (get_token(&line, "elif")) {
369                 /* Malformed? */
370                 if (!parent)
371                         return NULL;
372                 cond->parent = parent->parent;
373                 /* FIXME: Not quite true.  This implies !parent, but we don't
374                  * do multiple conditionals yet. */
375                 if (!parse_hash_if(cond, &line))
376                         goto unknown;
377         } else if (get_token(&line, "ifdef")) {
378                 bool brackets;
379                 cond->inverse = false;
380                 brackets = get_token(&line, "(");
381                 cond->symbol = get_symbol_token(cond, &line);
382                 if (!cond->symbol)
383                         goto unknown;
384                 if (brackets && !get_token(&line, ")"))
385                         goto unknown;
386         } else if (get_token(&line, "ifndef")) {
387                 bool brackets;
388                 cond->inverse = true;
389                 brackets = get_token(&line, "(");
390                 cond->symbol = get_symbol_token(cond, &line);
391                 if (!cond->symbol)
392                         goto unknown;
393                 if (brackets && !get_token(&line, ")"))
394                         goto unknown;
395         } else if (get_token(&line, "else")) {
396                 /* Malformed? */
397                 if (!parent)
398                         return NULL;
399
400                 *cond = *parent;
401                 cond->inverse = !cond->inverse;
402                 return cond;
403         } else if (get_token(&line, "endif")) {
404                 talloc_free(cond);
405                 /* Malformed? */
406                 if (!parent)
407                         return NULL;
408                 /* Back up one! */
409                 return parent->parent;
410         } else {
411                 /* Not a conditional. */
412                 talloc_free(cond);
413                 return parent;
414         }
415
416         if (!is_empty(line))
417                 goto unknown;
418         return cond;
419
420 unknown:
421         cond->type = PP_COND_UNKNOWN;
422         return cond;
423 }
424
425 /* This parser is rough, but OK if code is reasonably neat. */
426 struct line_info *get_ccan_line_info(struct ccan_file *f)
427 {
428         bool continued = false, in_comment = false;
429         struct pp_conditions *cond = NULL;
430         unsigned int i;
431
432         if (f->line_info)
433                 return f->line_info;
434
435         get_ccan_file_lines(f);
436         f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
437
438         for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
439                 char *p;
440                 bool still_doc_line;
441
442                 /* Current conditions apply to this line. */
443                 f->line_info[i].cond = cond;
444                 f->line_info[i].continued = continued;
445
446                 if (continued) {
447                         /* Same as last line. */
448                         f->line_info[i].type = f->line_info[i-1].type;
449                         /* Update in_comment. */
450                         remove_comments(f->lines[i], in_comment, &in_comment);
451                         continue;
452                 }
453
454                 /* Preprocessor directive? */
455                 if (!in_comment
456                     && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
457                         f->line_info[i].type = PREPROC_LINE;
458                         cond = analyze_directive(f, f->lines[i], cond);
459                         continue;
460                 }
461
462                 still_doc_line = (in_comment
463                                   && f->line_info[i-1].type == DOC_LINE);
464
465                 p = remove_comments(f->lines[i], in_comment, &in_comment);
466                 if (is_empty(p)) {
467                         if (strstarts(f->lines[i], "/**") || still_doc_line)
468                                 f->line_info[i].type = DOC_LINE;
469                         else
470                                 f->line_info[i].type = COMMENT_LINE;
471                 } else
472                         f->line_info[i].type = CODE_LINE;
473                 talloc_free(p);
474         }
475         return f->line_info;
476 }
477
478 struct symbol {
479         struct list_node list;
480         const char *name;
481         const unsigned int *value;
482 };
483
484 static struct symbol *find_symbol(struct list_head *syms, const char *sym)
485 {
486         struct symbol *i;
487
488         list_for_each(syms, i, list)
489                 if (streq(sym, i->name))
490                         return i;
491         return NULL;
492 }
493
494 static enum line_compiled get_pp(struct pp_conditions *cond,
495                                  struct list_head *syms)
496 {
497         struct symbol *sym;
498         unsigned int val;
499         enum line_compiled parent, ret;
500
501         /* No conditions?  Easy. */
502         if (!cond)
503                 return COMPILED;
504
505         /* Check we get here at all. */
506         parent = get_pp(cond->parent, syms);
507         if (parent == NOT_COMPILED)
508                 return NOT_COMPILED;
509
510         if (cond->type == PP_COND_UNKNOWN)
511                 return MAYBE_COMPILED;
512
513         sym = find_symbol(syms, cond->symbol);
514         if (!sym)
515                 return MAYBE_COMPILED;
516
517         switch (cond->type) {
518         case PP_COND_IF:
519                 /* Undefined is 0. */
520                 val = sym->value ? *sym->value : 0;
521                 if (!val == cond->inverse)
522                         ret = COMPILED;
523                 else
524                         ret = NOT_COMPILED;
525                 break;
526
527         case PP_COND_IFDEF:
528                 if (cond->inverse == !sym->value)
529                         ret = COMPILED;
530                 else
531                         ret = NOT_COMPILED;
532                 break;
533
534         default:
535                 abort();
536         }
537
538         /* If parent didn't know, NO == NO, but YES == MAYBE. */
539         if (parent == MAYBE_COMPILED && ret == COMPILED)
540                 ret = MAYBE_COMPILED;
541         return ret;
542 }
543
544 static void add_symbol(struct list_head *head,
545                        const char *symbol, const unsigned int *value)
546 {
547         struct symbol *sym = talloc(head, struct symbol);
548         sym->name = symbol;
549         sym->value = value;
550         list_add(head, &sym->list);
551 }
552         
553 enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
554                                     const char *symbol,
555                                     const unsigned int *value,
556                                     ...)
557 {
558         enum line_compiled ret;
559         struct list_head *head;
560         va_list ap;
561
562         head = talloc(NULL, struct list_head);
563         list_head_init(head);
564
565         va_start(ap, value);
566         add_symbol(head, symbol, value);
567
568         while ((symbol = va_arg(ap, const char *)) != NULL) {
569                 value = va_arg(ap, const unsigned int *);
570                 add_symbol(head, symbol, value);
571         }
572         ret = get_pp(cond, head);
573         talloc_free(head);
574         return ret;
575 }
576