From: Rusty Russell Date: Sun, 29 Mar 2009 09:46:44 +0000 (+1030) Subject: Somewhat decent cpp analysis for ccanlint. X-Git-Url: https://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=08f8cff8f22fee31b74f8301cc5d6494e5ff7160 Somewhat decent cpp analysis for ccanlint. --- diff --git a/.bzrignore b/.bzrignore index 681d0a15..b5138511 100644 --- a/.bzrignore +++ b/.bzrignore @@ -12,3 +12,4 @@ inter-depends test-depends lib-depends tools/_infotojson/infotojson +tools/ccanlint/test/run-file_analysis diff --git a/tools/ccanlint/ccanlint.h b/tools/ccanlint/ccanlint.h index ac169fa8..804f8024 100644 --- a/tools/ccanlint/ccanlint.h +++ b/tools/ccanlint/ccanlint.h @@ -49,13 +49,49 @@ struct ccanlint { /* Ask the user a yes/no question: the answer is NO if there's an error. */ bool ask(const char *question); +enum line_info_type { + PREPROC_LINE, /* Line starts with # */ + CODE_LINE, /* Code (ie. not pure comment). */ + DOC_LINE, /* Line with kernel-doc-style comment. */ + COMMENT_LINE, /* (pure) comment line */ +}; + +/* So far, only do simple #ifdef/#ifndef/#if defined/#if !defined tests, + * and #if /#if ! */ +struct pp_conditions { + /* We're inside another ifdef? */ + struct pp_conditions *parent; + + enum { + PP_COND_IF, + PP_COND_IFDEF, + PP_COND_UNKNOWN, + } type; + + bool inverse; + const char *symbol; +}; + +/* Preprocessor information about each line. */ +struct line_info { + enum line_info_type type; + + /* Is this actually a continuation of line above? (which ends in \) */ + bool continued; + + /* Conditions for this line to be compiled. */ + struct pp_conditions *cond; +}; + struct ccan_file { struct list_node list; char *name; + /* Use get_ccan_file_lines / get_ccan_line_info to fill these. */ unsigned int num_lines; char **lines; + struct line_info *line_info; struct list_head *doc_sections; }; @@ -63,9 +99,25 @@ struct ccan_file { /* Use this rather than accessing f->lines directly: loads on demand. */ char **get_ccan_file_lines(struct ccan_file *f); +/* Use this rather than accessing f->lines directly: loads on demand. */ +struct line_info *get_ccan_line_info(struct ccan_file *f); + +enum line_compiled { + NOT_COMPILED, + COMPILED, + MAYBE_COMPILED, +}; + +/* Simple evaluator: if this pre-processor symbol is defined to this + * value, is this line compiled? (Other symbols assumed undefined) */ +enum line_compiled get_ccan_line_pp(struct pp_conditions *cond, + const char *symbol, + unsigned int value); + /* Similarly for ->doc_sections */ struct list_head *get_ccan_file_docs(struct ccan_file *f); + /* Call the reporting on every line in the file. sofar contains * previous results. */ char *report_on_lines(struct list_head *files, @@ -78,6 +130,4 @@ extern struct ccanlint has_main_header; /* Normal tests. */ extern struct ccanlint trailing_whitespace; - - #endif /* CCAN_LINT_H */ diff --git a/tools/ccanlint/file_analysis.c b/tools/ccanlint/file_analysis.c index 9513ab67..2ede1f5b 100644 --- a/tools/ccanlint/file_analysis.c +++ b/tools/ccanlint/file_analysis.c @@ -4,6 +4,7 @@ #include #include #include +#include "../tools.h" #include #include #include @@ -11,6 +12,7 @@ #include #include #include +#include char **get_ccan_file_lines(struct ccan_file *f) { @@ -165,3 +167,301 @@ struct manifest *get_manifest(void) add_files(m, ""); return m; } + + +/** + * remove_comments - strip comments from a line, return copy. + * @line: line to copy + * @in_comment: are we already within a comment (from prev line). + * @unterminated: are we still in a comment for next line. + */ +static char *remove_comments(const char *line, bool in_comment, + bool *unterminated) +{ + char *p, *ret = talloc_array(line, char, strlen(line) + 1); + + p = ret; + for (;;) { + if (!in_comment) { + /* Find first comment. */ + const char *old_comment = strstr(line, "/*"); + const char *new_comment = strstr(line, "//"); + const char *comment; + + if (new_comment && old_comment) + comment = new_comment < old_comment + ? new_comment : old_comment; + else if (old_comment) + comment = old_comment; + else if (new_comment) + comment = new_comment; + else { + /* Nothing more. */ + strcpy(p, line); + *unterminated = false; + break; + } + + /* Copy up to comment. */ + memcpy(p, line, comment - line); + p += comment - line; + line += comment - line + 2; + + if (comment == new_comment) { + /* We're done: goes to EOL. */ + p[0] = '\0'; + *unterminated = false; + break; + } + in_comment = true; + } + + if (in_comment) { + const char *end = strstr(line, "*/"); + if (!end) { + *unterminated = true; + p[0] = '\0'; + break; + } + line = end+2; + in_comment = false; + } + } + return ret; +} + +static bool is_empty(const char *line) +{ + return strspn(line, " \t") == strlen(line); +} + +static bool continues(const char *line) +{ + /* Technically, any odd number of these. But who cares? */ + return strends(line, "\\"); +} + +/* Get token if it's equal to token. */ +static bool get_token(const char **line, const char *token) +{ + unsigned int toklen; + + *line += strspn(*line, " \t"); + if (isalnum(token[0]) || token[0] == '_') + toklen = strspn(*line, IDENT_CHARS); + else { + /* FIXME: real tokenizer handles ++ and other multi-chars. */ + toklen = strlen(token); + } + + if (toklen == strlen(token) && !strncmp(*line, token, toklen)) { + *line += toklen; + return true; + } + return false; +} + +static char *get_symbol_token(void *ctx, const char **line) +{ + unsigned int toklen; + char *ret; + + *line += strspn(*line, " \t"); + toklen = strspn(*line, IDENT_CHARS); + if (!toklen) + return NULL; + ret = talloc_strndup(ctx, *line, toklen); + *line += toklen; + return ret; +} + +static bool parse_hash_if(struct pp_conditions *cond, const char **line) +{ + bool brackets, defined; + + cond->inverse = get_token(line, "!"); + defined = get_token(line, "defined"); + brackets = get_token(line, "("); + cond->symbol = get_symbol_token(cond, line); + if (!cond->symbol) + return false; + if (brackets && !get_token(line, ")")) + return false; + if (!defined) + cond->type = PP_COND_IF; + return true; +} + +/* FIXME: Get serious! */ +static struct pp_conditions *analyze_directive(struct ccan_file *f, + const char *line, + struct pp_conditions *parent) +{ + struct pp_conditions *cond = talloc(f, struct pp_conditions); + bool unused; + + line = remove_comments(line, false, &unused); + + cond->parent = parent; + cond->type = PP_COND_IFDEF; + + if (!get_token(&line, "#")) + abort(); + + if (get_token(&line, "if")) { + if (!parse_hash_if(cond, &line)) + goto unknown; + } else if (get_token(&line, "elif")) { + /* Malformed? */ + if (!parent) + return NULL; + cond->parent = parent->parent; + /* FIXME: Not quite true. This implies !parent, but we don't + * do multiple conditionals yet. */ + if (!parse_hash_if(cond, &line)) + goto unknown; + } else if (get_token(&line, "ifdef")) { + bool brackets; + cond->inverse = false; + brackets = get_token(&line, "("); + cond->symbol = get_symbol_token(cond, &line); + if (!cond->symbol) + goto unknown; + if (brackets && !get_token(&line, ")")) + goto unknown; + } else if (get_token(&line, "ifndef")) { + bool brackets; + cond->inverse = true; + brackets = get_token(&line, "("); + cond->symbol = get_symbol_token(cond, &line); + if (!cond->symbol) + goto unknown; + if (brackets && !get_token(&line, ")")) + goto unknown; + } else if (get_token(&line, "else")) { + /* Malformed? */ + if (!parent) + return NULL; + + *cond = *parent; + cond->inverse = !cond->inverse; + return cond; + } else if (get_token(&line, "endif")) { + talloc_free(cond); + /* Malformed? */ + if (!parent) + return NULL; + /* Back up one! */ + return parent->parent; + } else { + /* Not a conditional. */ + talloc_free(cond); + return parent; + } + + if (!is_empty(line)) + goto unknown; + return cond; + +unknown: + cond->type = PP_COND_UNKNOWN; + return cond; +} + +/* This parser is rough, but OK if code is reasonably neat. */ +struct line_info *get_ccan_line_info(struct ccan_file *f) +{ + bool continued = false, in_comment = false; + struct pp_conditions *cond = NULL; + unsigned int i; + + if (f->line_info) + return f->line_info; + + get_ccan_file_lines(f); + f->line_info = talloc_array(f->lines, struct line_info, f->num_lines); + + for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) { + char *p; + bool still_doc_line; + + /* Current conditions apply to this line. */ + f->line_info[i].cond = cond; + f->line_info[i].continued = continued; + + if (continued) { + /* Same as last line. */ + f->line_info[i].type = f->line_info[i-1].type; + /* Update in_comment. */ + remove_comments(f->lines[i], in_comment, &in_comment); + continue; + } + + /* Preprocessor directive? */ + if (!in_comment + && f->lines[i][strspn(f->lines[i], " \t")] == '#') { + f->line_info[i].type = PREPROC_LINE; + cond = analyze_directive(f, f->lines[i], cond); + continue; + } + + still_doc_line = (in_comment + && f->line_info[i-1].type == DOC_LINE); + + p = remove_comments(f->lines[i], in_comment, &in_comment); + if (is_empty(p)) { + if (strstarts(f->lines[i], "/**") || still_doc_line) + f->line_info[i].type = DOC_LINE; + else + f->line_info[i].type = COMMENT_LINE; + } else + f->line_info[i].type = CODE_LINE; + talloc_free(p); + } + return f->line_info; +} + +enum line_compiled get_ccan_line_pp(struct pp_conditions *cond, + const char *symbol, + unsigned int value) +{ + enum line_compiled ret; + + /* No conditions? Easy. */ + if (!cond) + return COMPILED; + + /* Check we get here at all. */ + ret = get_ccan_line_pp(cond->parent, symbol, value); + if (ret != COMPILED) + return ret; + + switch (cond->type) { + case PP_COND_IF: + if (streq(cond->symbol, symbol)) { + if (!value == cond->inverse) + return COMPILED; + else + return NOT_COMPILED; + } + /* Unknown symbol, will be 0. */ + if (cond->inverse) + return COMPILED; + return NOT_COMPILED; + + case PP_COND_IFDEF: + if (streq(cond->symbol, symbol)) { + if (cond->inverse) + return NOT_COMPILED; + else + return COMPILED; + } + /* Unknown symbol, assume undefined. */ + if (cond->inverse) + return COMPILED; + return NOT_COMPILED; + + default: /* Unknown. */ + return MAYBE_COMPILED; + } +} diff --git a/tools/ccanlint/test/Makefile b/tools/ccanlint/test/Makefile new file mode 100644 index 00000000..d8423660 --- /dev/null +++ b/tools/ccanlint/test/Makefile @@ -0,0 +1,3 @@ +CFLAGS=-g -Wall -I../../../ + +run-file_analysis: run-file_analysis.o ../../doc_extract-core.o ../../../libccan.a diff --git a/tools/ccanlint/test/run-file_analysis.c b/tools/ccanlint/test/run-file_analysis.c new file mode 100644 index 00000000..b52d19df --- /dev/null +++ b/tools/ccanlint/test/run-file_analysis.c @@ -0,0 +1,203 @@ +#include "tools/ccanlint/ccanlint.h" +#include "ccan/tap/tap.h" +#include "tools/ccanlint/file_analysis.c" +#include +#include +#include +#include +#include + +/* This is our test file. */ +struct test { + enum line_info_type type; + bool continued; + const char *line; +}; + +static struct test testfile[] = { + { PREPROC_LINE, false, "#ifndef TEST_H" }, + { PREPROC_LINE, false, "#define TEST_H" }, + { DOC_LINE, false, "/**" }, + { DOC_LINE, false, " * Comment here." }, + { DOC_LINE, false, " * Comment here too." }, + { DOC_LINE, false, " */" }, + { COMMENT_LINE, false, "// Normal one-line comment" }, + { COMMENT_LINE, false, " // Spaced one-line comment" }, + { COMMENT_LINE, false, "/* Normal one-line comment */" }, + { COMMENT_LINE, false, " /* Spaced one-line comment */" }, + { COMMENT_LINE, false, " /* Spaced two-line comment" }, + { COMMENT_LINE, false, " continued comment */" }, + { CODE_LINE, false, "extern int x;"}, + { CODE_LINE, false, "extern int y; // With new-style comment"}, + { CODE_LINE, false, "extern int z; /* With old-style comment */"}, + { CODE_LINE, false, "extern int v; /* With two-line comment"}, + { COMMENT_LINE, false, " Second line of comment"}, + { COMMENT_LINE, false, "/* comment1 */ // comment 2"}, + { COMMENT_LINE, false, "/* comment1 */ /* comment 2 */ "}, + { CODE_LINE, false, "/* comment1 */ code; /* comment 2 */ "}, + { CODE_LINE, false, "/* comment1 */ code; // comment 2"}, + { COMMENT_LINE, false, "/* comment start \\"}, + { COMMENT_LINE, true, " comment finish */"}, + { PREPROC_LINE, false, "#define foo \\"}, + { PREPROC_LINE, true, " (bar + \\"}, + { PREPROC_LINE, true, " baz)"}, + { CODE_LINE, false, "extern int \\"}, + { CODE_LINE, true, "#x;"}, + + /* Variants of the same thing. */ + { PREPROC_LINE, false, "#ifdef BAR"}, + { CODE_LINE, false, "BAR"}, + { PREPROC_LINE, false, "#else"}, + { CODE_LINE, false, "!BAR"}, + { PREPROC_LINE, false, "#endif"}, + + { PREPROC_LINE, false, "#if defined BAR"}, + { CODE_LINE, false, "BAR"}, + { PREPROC_LINE, false, "#else"}, + { CODE_LINE, false, "!BAR"}, + { PREPROC_LINE, false, "#endif"}, + + { PREPROC_LINE, false, "#if defined(BAR)"}, + { CODE_LINE, false, "BAR"}, + { PREPROC_LINE, false, "#else"}, + { CODE_LINE, false, "!BAR"}, + { PREPROC_LINE, false, "#endif"}, + + { PREPROC_LINE, false, "#if !defined(BAR)"}, + { CODE_LINE, false, "!BAR"}, + { PREPROC_LINE, false, "#else"}, + { CODE_LINE, false, "BAR"}, + { PREPROC_LINE, false, "#endif"}, + + { PREPROC_LINE, false, "#if HAVE_FOO"}, + { CODE_LINE, false, "HAVE_FOO"}, + { PREPROC_LINE, false, "#elif HAVE_BAR"}, + { CODE_LINE, false, "HAVE_BAR"}, + { PREPROC_LINE, false, "#else"}, + { CODE_LINE, false, "neither"}, + { PREPROC_LINE, false, "#endif /* With a comment. */"}, + + { PREPROC_LINE, false, "#endif /* TEST_H */" }, +}; + +#define NUM_LINES (sizeof(testfile)/sizeof(testfile[0])) + +static const char *line_type_name(enum line_info_type type) +{ + switch (type) { + case PREPROC_LINE: return "PREPROC_LINE"; + case CODE_LINE: return "CODE_LINE"; + case DOC_LINE: return "DOC_LINE"; + case COMMENT_LINE: return "COMMENT_LINE"; + default: return "**INVALID**"; + } +} + +/* This just tests parser for the moment. */ +int main(int argc, char *argv[]) +{ + unsigned int i; + struct line_info *line_info; + struct ccan_file *f = talloc(NULL, struct ccan_file); + + plan_tests(NUM_LINES * 2 + 2 + 66); + + f->num_lines = NUM_LINES; + f->line_info = NULL; + f->lines = talloc_array(f, char *, f->num_lines); + for (i = 0; i < f->num_lines; i++) + f->lines[i] = talloc_strdup(f->lines, testfile[i].line); + + line_info = get_ccan_line_info(f); + ok1(line_info == f->line_info); + for (i = 0; i < f->num_lines; i++) { + ok(f->line_info[i].type == testfile[i].type, + "Line %u:'%s' type %s should be %s", + i, testfile[i].line, + line_type_name(f->line_info[i].type), + line_type_name(testfile[i].type)); + ok(f->line_info[i].continued == testfile[i].continued, + "Line %u:'%s' continued should be %s", + i, testfile[i].line, + testfile[i].continued ? "TRUE" : "FALSE"); + } + + /* Should cache. */ + ok1(get_ccan_line_info(f) == line_info); + + /* Expect line 1 condition to be NULL. */ + ok1(line_info[0].cond == NULL); + /* Line 2, should depend on TEST_H being undefined. */ + ok1(line_info[1].cond != NULL); + ok1(line_info[1].cond->type == PP_COND_IFDEF); + ok1(line_info[1].cond->inverse); + ok1(line_info[1].cond->parent == NULL); + ok1(streq(line_info[1].cond->symbol, "TEST_H")); + + /* Every line BAR should depend on BAR being defined. */ + for (i = 0; i < f->num_lines; i++) { + if (!streq(testfile[i].line, "BAR")) + continue; + ok1(line_info[i].cond->type == PP_COND_IFDEF); + ok1(!line_info[i].cond->inverse); + ok1(streq(line_info[i].cond->symbol, "BAR")); + ok1(line_info[i].cond->parent == line_info[1].cond); + } + + /* Every line !BAR should depend on BAR being undefined. */ + for (i = 0; i < f->num_lines; i++) { + if (!streq(testfile[i].line, "!BAR")) + continue; + ok1(line_info[i].cond->type == PP_COND_IFDEF); + ok1(line_info[i].cond->inverse); + ok1(streq(line_info[i].cond->symbol, "BAR")); + ok1(line_info[i].cond->parent == line_info[1].cond); + } + + /* Every line HAVE_BAR should depend on HAVE_BAR being set. */ + for (i = 0; i < f->num_lines; i++) { + if (!streq(testfile[i].line, "HAVE_BAR")) + continue; + ok1(line_info[i].cond->type == PP_COND_IF); + ok1(!line_info[i].cond->inverse); + ok1(streq(line_info[i].cond->symbol, "HAVE_BAR")); + ok1(line_info[i].cond->parent == line_info[1].cond); + } + + /* Every line HAVE_FOO should depend on HAVE_FOO being set. */ + for (i = 0; i < f->num_lines; i++) { + if (!streq(testfile[i].line, "HAVE_FOO")) + continue; + ok1(line_info[i].cond->type == PP_COND_IF); + ok1(!line_info[i].cond->inverse); + ok1(streq(line_info[i].cond->symbol, "HAVE_FOO")); + ok1(line_info[i].cond->parent == line_info[1].cond); + } + + /* Now check using interface. */ + for (i = 0; i < f->num_lines; i++) { + if (streq(testfile[i].line, "BAR")) { + ok1(get_ccan_line_pp(line_info[i].cond, "BAR", 1) + == COMPILED); + ok1(get_ccan_line_pp(line_info[i].cond, "FOO", 1) + == NOT_COMPILED); + } else if (streq(testfile[i].line, "!BAR")) { + ok1(get_ccan_line_pp(line_info[i].cond, "BAR", 1) + == NOT_COMPILED); + ok1(get_ccan_line_pp(line_info[i].cond, "FOO", 1) + == COMPILED); + } else if (streq(testfile[i].line, "HAVE_BAR")) { + ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_BAR", 1) + == COMPILED); + ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_BAR", 0) + == NOT_COMPILED); + } else if (streq(testfile[i].line, "HAVE_FOO")) { + ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_FOO", 1) + == COMPILED); + ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_FOO", 0) + == NOT_COMPILED); + } + } + + return exit_status(); +}