]> git.ozlabs.org Git - ccan-lca-2011.git/blobdiff - ccan/cdump/cdump_parse.c
cdump: first cut of translation of Tridge's genstruct junkcode.
[ccan-lca-2011.git] / ccan / cdump / cdump_parse.c
diff --git a/ccan/cdump/cdump_parse.c b/ccan/cdump/cdump_parse.c
new file mode 100644 (file)
index 0000000..fea7df1
--- /dev/null
@@ -0,0 +1,311 @@
+#include <ccan/cdump/cdump_parse.h>
+#include <ccan/talloc/talloc.h>
+#include <ccan/str/str.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <ctype.h>
+
+static void add_token(char ***toks, const char *tok, unsigned toklen)
+{
+       size_t len = talloc_array_length(*toks);
+
+       *toks = talloc_realloc(NULL, *toks, char *, len+1);
+       (*toks)[len] = talloc_strndup(*toks, tok, toklen);
+}
+
+/* Simplified tokenizer: comments and preproc directives removed,
+   identifiers are a token, others are single char tokens. */
+static char **tokenize(const void *ctx, const char *code)
+{
+       unsigned int i, len, tok_start = -1;
+       bool start_of_line = true;
+       char **ret = talloc_array(ctx, char *, 0);
+
+       for (i = 0; code[i]; i += len) {
+               if (code[i] == '#' && start_of_line) {
+                       /* Preprocessor line. */
+                       len = strcspn(code+i, "\n");
+               } else if (code[i] == '/' && code[i+1] == '/') {
+                       /* One line comment. */
+                       len = strcspn(code+i, "\n");
+                       if (tok_start != -1U) {
+                               add_token(&ret, code+tok_start, i - tok_start);
+                               tok_start = -1U;
+                       }
+               } else if (code[i] == '/' && code[i+1] == '*') {
+                       /* Multi-line comment. */
+                       const char *end = strstr(code+i+2, "*/");
+                       len = (end + 2) - (code + i);
+                       if (!end)
+                               len = strlen(code + i);
+                       if (tok_start != -1U) {
+                               add_token(&ret, code+tok_start, i - tok_start);
+                               tok_start = -1U;
+                       }
+               } else if (isalnum(code[i]) || code[i] == '_') {
+                       /* Identifier or part thereof */
+                       if (tok_start == -1U)
+                               tok_start = i;
+                       len = 1;
+               } else if (!isspace(code[i])) {
+                       /* Punctuation: treat as single char token. */
+                       if (tok_start != -1U) {
+                               add_token(&ret, code+tok_start, i - tok_start);
+                               tok_start = -1U;
+                       }
+                       add_token(&ret, code+i, 1);
+                       len = 1;
+               } else {
+                       /* Whitespace. */
+                       if (tok_start != -1U) {
+                               add_token(&ret, code+tok_start, i - tok_start);
+                               tok_start = -1U;
+                       }
+                       len = 1;
+               }
+               if (code[i] == '\n')
+                       start_of_line = true;
+               else if (!isspace(code[i]))
+                       start_of_line = false;
+       }
+
+       /* Add terminating NULL. */
+       ret = talloc_realloc(NULL, ret, char *, talloc_array_length(ret)+1);
+       ret[talloc_array_length(ret)-1] = NULL;
+
+       return ret;
+}
+
+static size_t handle_general(const void *ctx, const char *outer_struct_name,
+                            char **definitions, unsigned int ptr_count,
+                            const char *size, char **tok, const char *flags,
+                            const char *dynlen, const char *bundle,
+                            const char *unbundle)
+{
+       size_t off = 1;
+       char *array_len = NULL;
+
+       /* handle arrays, treat multidimensional arrays as 1 dimensional */
+       while (streq(tok[off], "[")) {
+               if (!array_len)
+                       array_len = talloc_strdup(ctx, "(");
+               else
+                       array_len = talloc_asprintf_append(array_len, " * (");
+               off++;
+               while (!streq(tok[off], "]")) {
+                       array_len = talloc_asprintf_append(array_len,
+                                                          "%s ", tok[off]);
+                       off++;
+               }
+               array_len[strlen(array_len)-1] = ')';
+               off++;
+       }
+
+       *definitions = talloc_asprintf_append(*definitions,
+       "\t{ \"%s\", %u, %s, offsetof(struct %s, %s), %s, %s, %s, %s, %s },\n",
+               tok[0], ptr_count, size, outer_struct_name, tok[0],
+               array_len ? array_len : "0", dynlen ? dynlen : "NULL", flags,
+               bundle, unbundle);
+
+       return off;
+}
+
+static size_t parse_one(const void *ctx, const char *outer_struct_name,
+                       char **definitions, char **type, unsigned typelen,
+                       char **tok, const char *dynlen, const char *flags)
+{
+       unsigned int i, ptr_count = 0;
+       size_t off = 0;
+       char *bundle, *unbundle, *size;
+
+       while (streq(tok[off], "*")) {
+               ptr_count++;
+               off++;
+       }
+
+       bundle = talloc_strdup(ctx, "cdump_bundle");
+       unbundle = talloc_strdup(ctx, "cdump_unbundle");
+       size = talloc_strdup(ctx, "sizeof(");
+       for (i = 0; i < typelen; i++) {
+               bundle = talloc_asprintf_append(bundle, "_%s", type[i]);
+               unbundle = talloc_asprintf_append(unbundle, "_%s", type[i]);
+               size = talloc_asprintf_append(size, "%s ", type[i]);
+       }
+       size[strlen(size)-1] = ')';
+
+       off += handle_general(ctx, outer_struct_name, definitions, ptr_count,
+                             size, tok + off, flags, dynlen, bundle, unbundle);
+       return off;
+}
+
+static size_t parse_element(const void *ctx, const char *outer_struct_name,
+                           char **definitions, char **tok)
+{
+       const char *dynlen = NULL, *flags = "0";
+       char **type;
+       unsigned int typelen;
+       size_t i;
+
+       if (streq(tok[0], "enum"))
+               flags = talloc_strdup(ctx, "CDUMP_FLAG_ALWAYS");
+
+       type = tok;
+       for (i = typelen = 0; tok[i]; i++) {
+               /* These mean we've passed the variable name */
+               if (streq(tok[i], "[")
+                   || streq(tok[i], ",")) {
+                       if (typelen == 0) {
+                               typelen = i - 1;
+                       }
+               }
+               /* End of expression means we've passed variable name, too */
+               if (streq(tok[i], ";")) {
+                       if (typelen == 0) {
+                               typelen = i - 1;
+                       }
+                       break;
+               }
+               /* This marks the end of the type: parse_one swallows *s. */
+               if (streq(tok[i], "*")) {
+                       if (typelen == 0) {
+                               typelen = i;
+                       }
+               }
+               if (streq(tok[i], "CDUMP_LEN")) {
+                       dynlen = talloc_asprintf(ctx, "\"%s\"", tok[i+2]);
+                       if (typelen == 0) {
+                               typelen = i - 1;
+                       }
+               }
+       }
+       i = typelen;
+
+       /* They could be comma-separated, so process them all. */
+       do {
+               i += parse_one(ctx, outer_struct_name, definitions,
+                              type, typelen, tok+i, dynlen, flags);
+               if (tok[i] && streq(tok[i], ","))
+                       i++;
+       } while (tok[i] && !streq(tok[i], ";") && !strstarts(tok[i], "CDUMP_"));
+
+       while (tok[i] && !streq(tok[i], ";"))
+               i++;
+
+       return i + 1;
+}
+
+static unsigned parse_struct(const void *ctx,
+                            const char *name, char **tok,
+                            char **declarations, char **definitions)
+{
+       unsigned int i = 1, len;
+
+       *declarations = talloc_asprintf_append(*declarations,
+"bool cdump_bundle_struct_%s(struct cdump_string *, const void *, unsigned);\n"
+"bool cdump_unbundle_struct_%s(const void *, void *, const char *);\n"
+"extern const struct cdump_desc cdump_struct_%s[];\n",
+                                              name, name, name);
+
+       *definitions = talloc_asprintf_append(*definitions,
+"const struct cdump_desc cdump_struct_%s[] = {\n",
+                                             name);
+       while (!streq(tok[i], "}")) {
+               len = parse_element(ctx, name, definitions, tok + i);
+               if (!len)
+                       return 0;
+               i += len;
+               if (!tok[i])
+                       return 0;
+       }
+       *definitions = talloc_asprintf_append(*definitions,
+       "\t{ NULL, 0, 0, 0, 0, NULL, 0, NULL, NULL } };\n"
+       "bool cdump_bundle_struct_%s(struct cdump_string *p, const void *ptr, unsigned indent)\n"
+       "{\n"
+       "       return cdump_bundle_struct(cdump_struct_%s, p, ptr, indent);\n"
+       "}\n"
+       "bool cdump_unbundle_struct_%s(const void *ctx, void *ptr, const char *str)\n"
+       "{\n"
+       "       return cdump_unbundle_struct(ctx, cdump_struct_%s, ptr, str);\n"
+       "}\n"
+       "\n",
+                                             name, name, name, name);
+       return i + 1;
+}
+
+static unsigned parse_enum(const void *ctx,
+                          const char *name, char **tok,
+                          char **declarations, char **definitions)
+{
+       unsigned int i = 1;
+       
+       *declarations = talloc_asprintf_append(*declarations,
+"bool cdump_bundle_enum_%s(struct cdump_string *, const void *, unsigned);\n"
+"bool cdump_unbundle_enum_%s(const void *, void *, const char *);\n"
+"extern const struct cdump_enum cdump_enum_%s[];\n",
+                                              name, name, name);
+
+       *definitions = talloc_asprintf_append(*definitions,
+"const struct cdump_enum cdump_enum_%s[] = {\n",
+                                             name);
+       while (!streq(tok[i], "}")) {
+               *definitions = talloc_asprintf_append(*definitions,
+                                                     "\t{ \"%s\", %s },\n",
+                                                     tok[i], tok[i]);
+               while (!streq(tok[i], ",")) {
+                       if (streq(tok[i], "}")) {
+                               i--;
+                               break;
+                       }
+                       i++;
+               }
+               i++;
+       }
+
+       *definitions = talloc_asprintf_append(*definitions,
+       "\t{ NULL, 0 } };\n"
+       "bool cdump_bundle_enum_%s(struct cdump_string *p, const void *ptr, unsigned indent)\n"
+       "{\n"
+       "       return cdump_bundle_enum(cdump_enum_%s, p, ptr, indent);\n"
+       "}\n"
+       "bool cdump_unbundle_enum_%s(const void *ctx, void *ptr, const char *str)\n"
+       "{\n"
+       "       return cdump_unbundle_enum(cdump_enum_%s, ptr, str);\n"
+       "}\n"
+       "\n",
+                                             name, name, name, name);
+       return i + 1;
+}
+
+/* World's hackiest parser, inspired by Tridge's genstruct.pl. */
+char *cdump_parse(const void *ctx, const char *code,
+                 char **declarations, char **definitions)
+{
+       char **tokens = tokenize(ctx, code);
+       unsigned int i, len;
+
+       *declarations = talloc_strdup(ctx, "");
+       *definitions = talloc_strdup(ctx, "");
+
+       for (i = 0; i < talloc_array_length(tokens)-1; i++) {
+               if (!streq(tokens[i], "CDUMP_SAVED"))
+                       continue;
+               if (i + 3 >= talloc_array_length(tokens)-1)
+                       return talloc_strdup(ctx, "EOF after CDUMP_SAVED");
+
+               if (streq(tokens[i+1], "struct")) {
+                       len = parse_struct(ctx, tokens[i+2], tokens + i + 3,
+                                          declarations, definitions);
+               } else if (streq(tokens[i+1], "enum")) {
+                       len = parse_enum(ctx, tokens[i+2], tokens + i + 3,
+                                        declarations, definitions);
+               } else
+                       return talloc_asprintf(ctx, "Unknown saved type"
+                                              " '%s'", tokens[i+1]);
+               if (len == 0)
+                       return talloc_asprintf(ctx, "Invalid %s '%s'",
+                                              tokens[i+1], tokens[i+2]);
+               i += len + 2;
+       }
+
+       return NULL;
+}