]> git.ozlabs.org Git - ccan/blobdiff - tools/ccanlint/file_analysis.c
Somewhat decent cpp analysis for ccanlint.
[ccan] / tools / ccanlint / file_analysis.c
index 9513ab67d7c0b6fe821362218cf0666520f9cf30..2ede1f5b8cea7a1d59216e54266c74b2f1eb9fbc 100644 (file)
@@ -4,6 +4,7 @@
 #include <ccan/str_talloc/str_talloc.h>
 #include <ccan/grab_file/grab_file.h>
 #include <ccan/noerr/noerr.h>
+#include "../tools.h"
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -11,6 +12,7 @@
 #include <err.h>
 #include <errno.h>
 #include <dirent.h>
+#include <ctype.h>
 
 char **get_ccan_file_lines(struct ccan_file *f)
 {
@@ -165,3 +167,301 @@ struct manifest *get_manifest(void)
        add_files(m, "");
        return m;
 }
+
+
+/**
+ * remove_comments - strip comments from a line, return copy.
+ * @line: line to copy
+ * @in_comment: are we already within a comment (from prev line).
+ * @unterminated: are we still in a comment for next line.
+ */
+static char *remove_comments(const char *line, bool in_comment,
+                            bool *unterminated)
+{
+       char *p, *ret = talloc_array(line, char, strlen(line) + 1);
+
+       p = ret;
+       for (;;) {
+               if (!in_comment) {
+                       /* Find first comment. */
+                       const char *old_comment = strstr(line, "/*");
+                       const char *new_comment = strstr(line, "//");
+                       const char *comment;
+
+                       if (new_comment && old_comment)
+                               comment = new_comment < old_comment
+                                       ? new_comment : old_comment;
+                       else if (old_comment)
+                               comment = old_comment;
+                       else if (new_comment)
+                               comment = new_comment;
+                       else {
+                               /* Nothing more. */
+                               strcpy(p, line);
+                               *unterminated = false;
+                               break;
+                       }
+
+                       /* Copy up to comment. */
+                       memcpy(p, line, comment - line);
+                       p += comment - line;
+                       line += comment - line + 2;
+
+                       if (comment == new_comment) {
+                               /* We're done: goes to EOL. */
+                               p[0] = '\0';
+                               *unterminated = false;
+                               break;
+                       }
+                       in_comment = true;
+               }
+
+               if (in_comment) {
+                       const char *end = strstr(line, "*/");
+                       if (!end) {
+                               *unterminated = true;
+                               p[0] = '\0';
+                               break;
+                       }
+                       line = end+2;
+                       in_comment = false;
+               }
+       }
+       return ret;
+}
+
+static bool is_empty(const char *line)
+{
+       return strspn(line, " \t") == strlen(line);
+}
+
+static bool continues(const char *line)
+{
+       /* Technically, any odd number of these.  But who cares? */
+       return strends(line, "\\");
+}
+
+/* Get token if it's equal to token. */
+static bool get_token(const char **line, const char *token)
+{
+       unsigned int toklen;
+
+       *line += strspn(*line, " \t");
+       if (isalnum(token[0]) || token[0] == '_')
+               toklen = strspn(*line, IDENT_CHARS);
+       else {
+               /* FIXME: real tokenizer handles ++ and other multi-chars.  */
+               toklen = strlen(token);
+       }
+
+       if (toklen == strlen(token) && !strncmp(*line, token, toklen)) {
+               *line += toklen;
+               return true;
+       }
+       return false;
+}
+
+static char *get_symbol_token(void *ctx, const char **line)
+{
+       unsigned int toklen;
+       char *ret;
+
+       *line += strspn(*line, " \t");
+       toklen = strspn(*line, IDENT_CHARS);
+       if (!toklen)
+               return NULL;
+       ret = talloc_strndup(ctx, *line, toklen);
+       *line += toklen;
+       return ret;
+}
+
+static bool parse_hash_if(struct pp_conditions *cond, const char **line)
+{
+       bool brackets, defined;
+
+       cond->inverse = get_token(line, "!");
+       defined = get_token(line, "defined");
+       brackets = get_token(line, "(");
+       cond->symbol = get_symbol_token(cond, line);
+       if (!cond->symbol)
+               return false;
+       if (brackets && !get_token(line, ")"))
+               return false;
+       if (!defined)
+               cond->type = PP_COND_IF;
+       return true;
+}
+
+/* FIXME: Get serious! */
+static struct pp_conditions *analyze_directive(struct ccan_file *f,
+                                              const char *line,
+                                              struct pp_conditions *parent)
+{
+       struct pp_conditions *cond = talloc(f, struct pp_conditions);
+       bool unused;
+
+       line = remove_comments(line, false, &unused);
+
+       cond->parent = parent;
+       cond->type = PP_COND_IFDEF;
+
+       if (!get_token(&line, "#"))
+               abort();
+
+       if (get_token(&line, "if")) {
+               if (!parse_hash_if(cond, &line))
+                       goto unknown;
+       } else if (get_token(&line, "elif")) {
+               /* Malformed? */
+               if (!parent)
+                       return NULL;
+               cond->parent = parent->parent;
+               /* FIXME: Not quite true.  This implies !parent, but we don't
+                * do multiple conditionals yet. */
+               if (!parse_hash_if(cond, &line))
+                       goto unknown;
+       } else if (get_token(&line, "ifdef")) {
+               bool brackets;
+               cond->inverse = false;
+               brackets = get_token(&line, "(");
+               cond->symbol = get_symbol_token(cond, &line);
+               if (!cond->symbol)
+                       goto unknown;
+               if (brackets && !get_token(&line, ")"))
+                       goto unknown;
+       } else if (get_token(&line, "ifndef")) {
+               bool brackets;
+               cond->inverse = true;
+               brackets = get_token(&line, "(");
+               cond->symbol = get_symbol_token(cond, &line);
+               if (!cond->symbol)
+                       goto unknown;
+               if (brackets && !get_token(&line, ")"))
+                       goto unknown;
+       } else if (get_token(&line, "else")) {
+               /* Malformed? */
+               if (!parent)
+                       return NULL;
+
+               *cond = *parent;
+               cond->inverse = !cond->inverse;
+               return cond;
+       } else if (get_token(&line, "endif")) {
+               talloc_free(cond);
+               /* Malformed? */
+               if (!parent)
+                       return NULL;
+               /* Back up one! */
+               return parent->parent;
+       } else {
+               /* Not a conditional. */
+               talloc_free(cond);
+               return parent;
+       }
+
+       if (!is_empty(line))
+               goto unknown;
+       return cond;
+
+unknown:
+       cond->type = PP_COND_UNKNOWN;
+       return cond;
+}
+
+/* This parser is rough, but OK if code is reasonably neat. */
+struct line_info *get_ccan_line_info(struct ccan_file *f)
+{
+       bool continued = false, in_comment = false;
+       struct pp_conditions *cond = NULL;
+       unsigned int i;
+
+       if (f->line_info)
+               return f->line_info;
+
+       get_ccan_file_lines(f);
+       f->line_info = talloc_array(f->lines, struct line_info, f->num_lines);
+
+       for (i = 0; i < f->num_lines; continued = continues(f->lines[i++])) {
+               char *p;
+               bool still_doc_line;
+
+               /* Current conditions apply to this line. */
+               f->line_info[i].cond = cond;
+               f->line_info[i].continued = continued;
+
+               if (continued) {
+                       /* Same as last line. */
+                       f->line_info[i].type = f->line_info[i-1].type;
+                       /* Update in_comment. */
+                       remove_comments(f->lines[i], in_comment, &in_comment);
+                       continue;
+               }
+
+               /* Preprocessor directive? */
+               if (!in_comment
+                   && f->lines[i][strspn(f->lines[i], " \t")] == '#') {
+                       f->line_info[i].type = PREPROC_LINE;
+                       cond = analyze_directive(f, f->lines[i], cond);
+                       continue;
+               }
+
+               still_doc_line = (in_comment
+                                 && f->line_info[i-1].type == DOC_LINE);
+
+               p = remove_comments(f->lines[i], in_comment, &in_comment);
+               if (is_empty(p)) {
+                       if (strstarts(f->lines[i], "/**") || still_doc_line)
+                               f->line_info[i].type = DOC_LINE;
+                       else
+                               f->line_info[i].type = COMMENT_LINE;
+               } else
+                       f->line_info[i].type = CODE_LINE;
+               talloc_free(p);
+       }
+       return f->line_info;
+}
+
+enum line_compiled get_ccan_line_pp(struct pp_conditions *cond,
+                                   const char *symbol,
+                                   unsigned int value)
+{
+       enum line_compiled ret;
+
+       /* No conditions?  Easy. */
+       if (!cond)
+               return COMPILED;
+
+       /* Check we get here at all. */
+       ret = get_ccan_line_pp(cond->parent, symbol, value);
+       if (ret != COMPILED)
+               return ret;
+
+       switch (cond->type) {
+       case PP_COND_IF:
+               if (streq(cond->symbol, symbol)) {
+                       if (!value == cond->inverse)
+                               return COMPILED;
+                       else
+                               return NOT_COMPILED;
+               }
+               /* Unknown symbol, will be 0. */
+               if (cond->inverse)
+                       return COMPILED;
+               return NOT_COMPILED;
+
+       case PP_COND_IFDEF:
+               if (streq(cond->symbol, symbol)) {
+                       if (cond->inverse)
+                               return NOT_COMPILED;
+                       else
+                               return COMPILED;
+               }
+               /* Unknown symbol, assume undefined. */
+               if (cond->inverse)
+                       return COMPILED;
+               return NOT_COMPILED;
+               
+       default: /* Unknown. */
+               return MAYBE_COMPILED;
+       }
+}