Somewhat decent cpp analysis for ccanlint.
authorRusty Russell <rusty@rustcorp.com.au>
Sun, 29 Mar 2009 09:46:44 +0000 (20:16 +1030)
committerRusty Russell <rusty@rustcorp.com.au>
Sun, 29 Mar 2009 09:46:44 +0000 (20:16 +1030)
.bzrignore
tools/ccanlint/ccanlint.h
tools/ccanlint/file_analysis.c
tools/ccanlint/test/Makefile [new file with mode: 0644]
tools/ccanlint/test/run-file_analysis.c [new file with mode: 0644]

index 681d0a15df084b9853117847660251a179812469..b513851143d3f69e025e4816c7548df0b63d3c38 100644 (file)
@@ -12,3 +12,4 @@ inter-depends
 test-depends
 lib-depends
 tools/_infotojson/infotojson
+tools/ccanlint/test/run-file_analysis
index ac169fa81156b57452708208af65fecd05402fdf..804f80249d9204b362032e592c29a52d5e7b66b6 100644 (file)
@@ -49,13 +49,49 @@ struct ccanlint {
 /* Ask the user a yes/no question: the answer is NO if there's an error. */
 bool ask(const char *question);
 
+enum line_info_type {
+       PREPROC_LINE, /* Line starts with # */
+       CODE_LINE, /* Code (ie. not pure comment). */
+       DOC_LINE, /* Line with kernel-doc-style comment. */
+       COMMENT_LINE, /* (pure) comment line */
+};
+
+/* So far, only do simple #ifdef/#ifndef/#if defined/#if !defined tests,
+ * and #if <SYMBOL>/#if !<SYMBOL> */
+struct pp_conditions {
+       /* We're inside another ifdef? */
+       struct pp_conditions *parent;
+
+       enum {
+               PP_COND_IF,
+               PP_COND_IFDEF,
+               PP_COND_UNKNOWN,
+       } type;
+
+       bool inverse;
+       const char *symbol;
+};
+
+/* Preprocessor information about each line. */
+struct line_info {
+       enum line_info_type type;
+
+       /* Is this actually a continuation of line above? (which ends in \) */
+       bool continued;
+
+       /* Conditions for this line to be compiled. */
+       struct pp_conditions *cond;
+};
+
 struct ccan_file {
        struct list_node list;
 
        char *name;
 
+       /* Use get_ccan_file_lines / get_ccan_line_info to fill these. */
        unsigned int num_lines;
        char **lines;
+       struct line_info *line_info;
 
        struct list_head *doc_sections;
 };
@@ -63,9 +99,25 @@ struct ccan_file {
 /* Use this rather than accessing f->lines directly: loads on demand. */
 char **get_ccan_file_lines(struct ccan_file *f);
 
+/* Use this rather than accessing f->lines directly: loads on demand. */
+struct line_info *get_ccan_line_info(struct ccan_file *f);
+
+enum line_compiled {
+       NOT_COMPILED,
+       COMPILED,
+       MAYBE_COMPILED,
+};
+
+/* Simple evaluator: if this pre-processor symbol is defined to this
+ * value, is this line compiled? (Other symbols assumed undefined) */
+enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
+                                   const char *symbol,
+                                   unsigned int value);
+
 /* Similarly for ->doc_sections */
 struct list_head *get_ccan_file_docs(struct ccan_file *f);
 
+
 /* Call the reporting on every line in the file.  sofar contains
  * previous results. */
 char *report_on_lines(struct list_head *files,
@@ -78,6 +130,4 @@ extern struct ccanlint has_main_header;
 
 /* Normal tests. */
 extern struct ccanlint trailing_whitespace;
-
-
 #endif /* CCAN_LINT_H */
index 9513ab67d7c0b6fe821362218cf0666520f9cf30..2ede1f5b8cea7a1d59216e54266c74b2f1eb9fbc 100644 (file)
@@ -4,6 +4,7 @@
 #include <ccan/str_talloc/str_talloc.h>
 #include <ccan/grab_file/grab_file.h>
 #include <ccan/noerr/noerr.h>
+#include "../tools.h"
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -11,6 +12,7 @@
 #include <err.h>
 #include <errno.h>
 #include <dirent.h>
+#include <ctype.h>
 
 char **get_ccan_file_lines(struct ccan_file *f)
 {
@@ -165,3 +167,301 @@ struct manifest *get_manifest(void)
        add_files(m, "");
        return m;
 }
+
+
+/**
+ * remove_comments - strip comments from a line, return copy.
+ * @line: line to copy
+ * @in_comment: are we already within a comment (from prev line).
+ * @unterminated: are we still in a comment for next line.
+ */
+static char *remove_comments(const char *line, bool in_comment,
+                            bool *unterminated)
+{
+       char *p, *ret = talloc_array(line, char, strlen(line) + 1);
+
+       p = ret;
+       for (;;) {
+               if (!in_comment) {
+                       /* Find first comment. */
+                       const char *old_comment = strstr(line, "/*");
+                       const char *new_comment = strstr(line, "//");
+                       const char *comment;
+
+                       if (new_comment && old_comment)
+                               comment = new_comment < old_comment
+                                       ? new_comment : old_comment;
+                       else if (old_comment)
+                               comment = old_comment;
+                       else if (new_comment)
+                               comment = new_comment;
+                       else {
+                               /* Nothing more. */
+                               strcpy(p, line);
+                               *unterminated = false;
+                               break;
+                       }
+
+                       /* Copy up to comment. */
+                       memcpy(p, line, comment - line);
+                       p += comment - line;
+                       line += comment - line + 2;
+
+                       if (comment == new_comment) {
+                               /* We're done: goes to EOL. */
+                               p[0] = '\0';
+                               *unterminated = false;
+                               break;
+                       }
+                       in_comment = true;
+               }
+
+               if (in_comment) {
+                       const char *end = strstr(line, "*/");
+                       if (!end) {
+                               *unterminated = true;
+                               p[0] = '\0';
+                               break;
+                       }
+                       line = end+2;
+                       in_comment = false;
+               }
+       }
+       return ret;
+}
+
+static bool is_empty(const char *line)
+{
+       return strspn(line, " \t") == strlen(line);
+}
+
+static bool continues(const char *line)
+{
+       /* Technically, any odd number of these.  But who cares? */
+       return strends(line, "\\");
+}
+
+/* Get token if it's equal to token. */
+static bool get_token(const char **line, const char *token)
+{
+       unsigned int toklen;
+
+       *line += strspn(*line, " \t");
+       if (isalnum(token[0]) || token[0] == '_')
+               toklen = strspn(*line, IDENT_CHARS);
+       else {
+               /* FIXME: real tokenizer handles ++ and other multi-chars.  */
+               toklen = strlen(token);
+       }
+
+       if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
+               *line += toklen;
+               return true;
+       }
+       return false;
+}
+
+static char *get_symbol_token(void *ctx, const char **line)
+{
+       unsigned int toklen;
+       char *ret;
+
+       *line += strspn(*line, " \t");
+       toklen = strspn(*line, IDENT_CHARS);
+       if (!toklen)
+               return NULL;
+       ret = talloc_strndup(ctx, *line, toklen);
+       *line += toklen;
+       return ret;
+}
+
+static bool parse_hash_if(struct pp_conditions *cond, const char **line)
+{
+       bool brackets, defined;
+
+       cond->inverse = get_token(line, "!");
+       defined = get_token(line, "defined");
+       brackets = get_token(line, "(");
+       cond->symbol = get_symbol_token(cond, line);
+       if (!cond->symbol)
+               return false;
+       if (brackets && !get_token(line, ")"))
+               return false;
+       if (!defined)
+               cond->type = PP_COND_IF;
+       return true;
+}
+
+/* FIXME: Get serious! */
+static struct pp_conditions *analyze_directive(struct ccan_file *f,
+                                              const char *line,
+                                              struct pp_conditions *parent)
+{
+       struct pp_conditions *cond = talloc(f, struct pp_conditions);
+       bool unused;
+
+       line = remove_comments(line, false, &unused);
+
+       cond->parent = parent;
+       cond->type = PP_COND_IFDEF;
+
+       if (!get_token(&line, "#"))
+               abort();
+
+       if (get_token(&line, "if")) {
+               if (!parse_hash_if(cond, &line))
+                       goto unknown;
+       } else if (get_token(&line, "elif")) {
+               /* Malformed? */
+               if (!parent)
+                       return NULL;
+               cond->parent = parent->parent;
+               /* FIXME: Not quite true.  This implies !parent, but we don't
+                * do multiple conditionals yet. */
+               if (!parse_hash_if(cond, &line))
+                       goto unknown;
+       } else if (get_token(&line, "ifdef")) {
+               bool brackets;
+               cond->inverse = false;
+               brackets = get_token(&line, "(");
+               cond->symbol = get_symbol_token(cond, &line);
+               if (!cond->symbol)
+                       goto unknown;
+               if (brackets && !get_token(&line, ")"))
+                       goto unknown;
+       } else if (get_token(&line, "ifndef")) {
+               bool brackets;
+               cond->inverse = true;
+               brackets = get_token(&line, "(");
+               cond->symbol = get_symbol_token(cond, &line);
+               if (!cond->symbol)
+                       goto unknown;
+               if (brackets && !get_token(&line, ")"))
+                       goto unknown;
+       } else if (get_token(&line, "else")) {
+               /* Malformed? */
+               if (!parent)
+                       return NULL;
+
+               *cond = *parent;
+               cond->inverse = !cond->inverse;
+               return cond;
+       } else if (get_token(&line, "endif")) {
+               talloc_free(cond);
+               /* Malformed? */
+               if (!parent)
+                       return NULL;
+               /* Back up one! */
+               return parent->parent;
+       } else {
+               /* Not a conditional. */
+               talloc_free(cond);
+               return parent;
+       }
+
+       if (!is_empty(line))
+               goto unknown;
+       return cond;
+
+unknown:
+       cond->type = PP_COND_UNKNOWN;
+       return cond;
+}
+
+/* This parser is rough, but OK if code is reasonably neat. */
+struct line_info *get_ccan_line_info(struct ccan_file *f)
+{
+       bool continued = false, in_comment = false;
+       struct pp_conditions *cond = NULL;
+       unsigned int i;
+
+       if (f->line_info)
+               return f->line_info;
+
+       get_ccan_file_lines(f);
+       f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
+
+       for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
+               char *p;
+               bool still_doc_line;
+
+               /* Current conditions apply to this line. */
+               f->line_info[i].cond = cond;
+               f->line_info[i].continued = continued;
+
+               if (continued) {
+                       /* Same as last line. */
+                       f->line_info[i].type = f->line_info[i-1].type;
+                       /* Update in_comment. */
+                       remove_comments(f->lines[i], in_comment, &in_comment);
+                       continue;
+               }
+
+               /* Preprocessor directive? */
+               if (!in_comment
+                   && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
+                       f->line_info[i].type = PREPROC_LINE;
+                       cond = analyze_directive(f, f->lines[i], cond);
+                       continue;
+               }
+
+               still_doc_line = (in_comment
+                                 && f->line_info[i-1].type == DOC_LINE);
+
+               p = remove_comments(f->lines[i], in_comment, &in_comment);
+               if (is_empty(p)) {
+                       if (strstarts(f->lines[i], "/**") || still_doc_line)
+                               f->line_info[i].type = DOC_LINE;
+                       else
+                               f->line_info[i].type = COMMENT_LINE;
+               } else
+                       f->line_info[i].type = CODE_LINE;
+               talloc_free(p);
+       }
+       return f->line_info;
+}
+
+enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
+                                   const char *symbol,
+                                   unsigned int value)
+{
+       enum line_compiled ret;
+
+       /* No conditions?  Easy. */
+       if (!cond)
+               return COMPILED;
+
+       /* Check we get here at all. */
+       ret = get_ccan_line_pp(cond->parent, symbol, value);
+       if (ret != COMPILED)
+               return ret;
+
+       switch (cond->type) {
+       case PP_COND_IF:
+               if (streq(cond->symbol, symbol)) {
+                       if (!value == cond->inverse)
+                               return COMPILED;
+                       else
+                               return NOT_COMPILED;
+               }
+               /* Unknown symbol, will be 0. */
+               if (cond->inverse)
+                       return COMPILED;
+               return NOT_COMPILED;
+
+       case PP_COND_IFDEF:
+               if (streq(cond->symbol, symbol)) {
+                       if (cond->inverse)
+                               return NOT_COMPILED;
+                       else
+                               return COMPILED;
+               }
+               /* Unknown symbol, assume undefined. */
+               if (cond->inverse)
+                       return COMPILED;
+               return NOT_COMPILED;
+               
+       default: /* Unknown. */
+               return MAYBE_COMPILED;
+       }
+}
diff --git a/tools/ccanlint/test/Makefile b/tools/ccanlint/test/Makefile
new file mode 100644 (file)
index 0000000..d842366
--- /dev/null
@@ -0,0 +1,3 @@
+CFLAGS=-g -Wall -I../../../ 
+
+run-file_analysis: run-file_analysis.o ../../doc_extract-core.o ../../../libccan.a
diff --git a/tools/ccanlint/test/run-file_analysis.c b/tools/ccanlint/test/run-file_analysis.c
new file mode 100644 (file)
index 0000000..b52d19d
--- /dev/null
@@ -0,0 +1,203 @@
+#include "tools/ccanlint/ccanlint.h"
+#include "ccan/tap/tap.h"
+#include "tools/ccanlint/file_analysis.c"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <assert.h>
+
+/* This is our test file. */
+struct test {
+       enum line_info_type type;
+       bool continued;
+       const char *line;
+};
+
+static struct test testfile[] = {
+       { PREPROC_LINE, false, "#ifndef TEST_H" },
+       { PREPROC_LINE, false, "#define TEST_H" },
+       { DOC_LINE,     false, "/**" },
+       { DOC_LINE,     false, " * Comment here." },
+       { DOC_LINE,     false, " * Comment here too." },
+       { DOC_LINE,     false, " */" },
+       { COMMENT_LINE, false, "// Normal one-line comment" },
+       { COMMENT_LINE, false, "  // Spaced one-line comment" },
+       { COMMENT_LINE, false, "/* Normal one-line comment */" },
+       { COMMENT_LINE, false, "  /* Spaced one-line comment */" },
+       { COMMENT_LINE, false, "  /* Spaced two-line comment" },
+       { COMMENT_LINE, false, "  continued comment */" },
+       { CODE_LINE,    false, "extern int x;"},
+       { CODE_LINE,    false, "extern int y; // With new-style comment"},
+       { CODE_LINE,    false, "extern int z; /* With old-style comment */"},
+       { CODE_LINE,    false, "extern int v; /* With two-line comment"},
+       { COMMENT_LINE, false, "                 Second line of comment"},
+       { COMMENT_LINE, false, "/* comment1  */ // comment 2"},
+       { COMMENT_LINE, false, "/* comment1  */ /* comment 2 */ "},
+       { CODE_LINE,    false, "/* comment1  */ code; /* comment 2 */ "},
+       { CODE_LINE,    false, "/* comment1  */ code; // comment 2"},
+       { COMMENT_LINE, false, "/* comment start  \\"},
+       { COMMENT_LINE, true,  "   comment finish */"},
+       { PREPROC_LINE, false, "#define foo \\"},
+       { PREPROC_LINE, true,  "        (bar + \\"},
+       { PREPROC_LINE, true,  "         baz)"},
+       { CODE_LINE,    false, "extern int \\"},
+       { CODE_LINE,    true,  "#x;"},
+
+       /* Variants of the same thing. */
+       { PREPROC_LINE, false, "#ifdef BAR"},
+       { CODE_LINE,    false, "BAR"},
+       { PREPROC_LINE, false, "#else"},
+       { CODE_LINE,    false, "!BAR"},
+       { PREPROC_LINE, false, "#endif"},
+
+       { PREPROC_LINE, false, "#if defined BAR"},
+       { CODE_LINE,    false, "BAR"},
+       { PREPROC_LINE, false, "#else"},
+       { CODE_LINE,    false, "!BAR"},
+       { PREPROC_LINE, false, "#endif"},
+
+       { PREPROC_LINE, false, "#if defined(BAR)"},
+       { CODE_LINE,    false, "BAR"},
+       { PREPROC_LINE, false, "#else"},
+       { CODE_LINE,    false, "!BAR"},
+       { PREPROC_LINE, false, "#endif"},
+
+       { PREPROC_LINE, false, "#if !defined(BAR)"},
+       { CODE_LINE,    false, "!BAR"},
+       { PREPROC_LINE, false, "#else"},
+       { CODE_LINE,    false, "BAR"},
+       { PREPROC_LINE, false, "#endif"},
+
+       { PREPROC_LINE, false, "#if HAVE_FOO"},
+       { CODE_LINE,    false, "HAVE_FOO"},
+       { PREPROC_LINE, false, "#elif HAVE_BAR"},
+       { CODE_LINE,    false, "HAVE_BAR"},
+       { PREPROC_LINE, false, "#else"},
+       { CODE_LINE,    false, "neither"},
+       { PREPROC_LINE, false, "#endif /* With a comment. */"},
+
+       { PREPROC_LINE, false, "#endif /* TEST_H */" },
+};
+
+#define NUM_LINES (sizeof(testfile)/sizeof(testfile[0]))
+
+static const char *line_type_name(enum line_info_type type)
+{
+       switch (type) {
+       case PREPROC_LINE: return "PREPROC_LINE";
+       case CODE_LINE: return "CODE_LINE";
+       case DOC_LINE: return "DOC_LINE";
+       case COMMENT_LINE: return "COMMENT_LINE";
+       default: return "**INVALID**";
+       }
+}
+
+/* This just tests parser for the moment. */
+int main(int argc, char *argv[])
+{
+       unsigned int i;
+       struct line_info *line_info;
+       struct ccan_file *f = talloc(NULL, struct ccan_file);
+
+       plan_tests(NUM_LINES * 2 + 2 + 66);
+
+       f->num_lines = NUM_LINES;
+       f->line_info = NULL;
+       f->lines = talloc_array(f, char *, f->num_lines);
+       for (i = 0; i < f->num_lines; i++)
+               f->lines[i] = talloc_strdup(f->lines, testfile[i].line);
+       
+       line_info = get_ccan_line_info(f);
+       ok1(line_info == f->line_info);
+       for (i = 0; i < f->num_lines; i++) {
+               ok(f->line_info[i].type == testfile[i].type,
+                  "Line %u:'%s' type %s should be %s",
+                  i, testfile[i].line,
+                  line_type_name(f->line_info[i].type),
+                  line_type_name(testfile[i].type));
+               ok(f->line_info[i].continued == testfile[i].continued,
+                  "Line %u:'%s' continued should be %s",
+                  i, testfile[i].line,
+                  testfile[i].continued ? "TRUE" : "FALSE");
+       }
+
+       /* Should cache. */
+       ok1(get_ccan_line_info(f) == line_info);
+
+       /* Expect line 1 condition to be NULL. */
+       ok1(line_info[0].cond == NULL);
+       /* Line 2, should depend on TEST_H being undefined. */
+       ok1(line_info[1].cond != NULL);
+       ok1(line_info[1].cond->type == PP_COND_IFDEF);
+       ok1(line_info[1].cond->inverse);
+       ok1(line_info[1].cond->parent == NULL);
+       ok1(streq(line_info[1].cond->symbol, "TEST_H"));
+
+       /* Every line BAR should depend on BAR being defined. */
+       for (i = 0; i < f->num_lines; i++) {
+               if (!streq(testfile[i].line, "BAR"))
+                       continue;
+               ok1(line_info[i].cond->type == PP_COND_IFDEF);
+               ok1(!line_info[i].cond->inverse);
+               ok1(streq(line_info[i].cond->symbol, "BAR"));
+               ok1(line_info[i].cond->parent == line_info[1].cond);
+       }
+
+       /* Every line !BAR should depend on BAR being undefined. */
+       for (i = 0; i < f->num_lines; i++) {
+               if (!streq(testfile[i].line, "!BAR"))
+                       continue;
+               ok1(line_info[i].cond->type == PP_COND_IFDEF);
+               ok1(line_info[i].cond->inverse);
+               ok1(streq(line_info[i].cond->symbol, "BAR"));
+               ok1(line_info[i].cond->parent == line_info[1].cond);
+       }
+       
+       /* Every line HAVE_BAR should depend on HAVE_BAR being set. */
+       for (i = 0; i < f->num_lines; i++) {
+               if (!streq(testfile[i].line, "HAVE_BAR"))
+                       continue;
+               ok1(line_info[i].cond->type == PP_COND_IF);
+               ok1(!line_info[i].cond->inverse);
+               ok1(streq(line_info[i].cond->symbol, "HAVE_BAR"));
+               ok1(line_info[i].cond->parent == line_info[1].cond);
+       }
+       
+       /* Every line HAVE_FOO should depend on HAVE_FOO being set. */
+       for (i = 0; i < f->num_lines; i++) {
+               if (!streq(testfile[i].line, "HAVE_FOO"))
+                       continue;
+               ok1(line_info[i].cond->type == PP_COND_IF);
+               ok1(!line_info[i].cond->inverse);
+               ok1(streq(line_info[i].cond->symbol, "HAVE_FOO"));
+               ok1(line_info[i].cond->parent == line_info[1].cond);
+       }
+
+       /* Now check using interface. */
+       for (i = 0; i < f->num_lines; i++) {
+               if (streq(testfile[i].line, "BAR")) {
+                       ok1(get_ccan_line_pp(line_info[i].cond, "BAR", 1)
+                           == COMPILED);
+                       ok1(get_ccan_line_pp(line_info[i].cond, "FOO", 1)
+                           == NOT_COMPILED);
+               } else if (streq(testfile[i].line, "!BAR")) {
+                       ok1(get_ccan_line_pp(line_info[i].cond, "BAR", 1)
+                           == NOT_COMPILED);
+                       ok1(get_ccan_line_pp(line_info[i].cond, "FOO", 1)
+                           == COMPILED);
+               } else if (streq(testfile[i].line, "HAVE_BAR")) {
+                       ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_BAR", 1)
+                           == COMPILED);
+                       ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_BAR", 0)
+                           == NOT_COMPILED);
+               } else if (streq(testfile[i].line, "HAVE_FOO")) {
+                       ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_FOO", 1)
+                           == COMPILED);
+                       ok1(get_ccan_line_pp(line_info[i].cond, "HAVE_FOO", 0)
+                           == NOT_COMPILED);
+               }
+       }
+
+       return exit_status();
+}