X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=tools%2Fdoc_extract-core.c;h=069bf2237c6815f12f428e272970ad863c4b9188;hp=f3666e59924926aeed86bec1478fad53e1f6eb56;hb=adc90816df194167a588a943ae503a37fec3fb6a;hpb=44c480c492c4596801261d748c5e3339c30f1f7e diff --git a/tools/doc_extract-core.c b/tools/doc_extract-core.c index f3666e59..069bf223 100644 --- a/tools/doc_extract-core.c +++ b/tools/doc_extract-core.c @@ -1,4 +1,6 @@ /* This merely extracts, doesn't do XML or anything. */ +#include +#include #include #include #include @@ -9,34 +11,48 @@ #include #include #include -#include -#include #include "doc_extract.h" #include "tools.h" -static char **grab_doc(char **lines, unsigned int num) +static char **grab_doc(char **lines, unsigned int **linemap, + const char *file) { char **ret; - unsigned int i; + unsigned int i, num; bool printing = false; - ret = talloc_array(NULL, char *, num+1); + ret = tal_arr(NULL, char *, tal_count(lines)); + *linemap = tal_arr(ret, unsigned int, tal_count(lines)); num = 0; for (i = 0; lines[i]; i++) { if (streq(lines[i], "/**")) { printing = true; - if (num != 0) - talloc_append_string(ret[num-1], "\n"); + if (num != 0) { + ret[num-1] = tal_strcat(NULL, + take(ret[num-1]), "\n"); + } } else if (streq(lines[i], " */")) printing = false; else if (printing) { if (strstarts(lines[i], " * ")) - ret[num++] = talloc_strdup(ret, lines[i]+3); + ret[num++] = tal_strdup(ret, lines[i]+3); else if (strstarts(lines[i], " *")) - ret[num++] = talloc_strdup(ret, lines[i]+2); - else - errx(1, "Malformed line %u", i); + ret[num++] = tal_strdup(ret, lines[i]+2); + else { + /* Weird, malformed? */ + static bool warned; + if (!warned) { + warnx("%s:%u:" + " Expected ' *' in comment.", + file, i+1); + warned++; + } + ret[num++] = tal_strdup(ret, lines[i]); + if (strstr(lines[i], "*/")) + printing = false; + } + (*linemap)[num-1] = i; } } ret[num] = NULL; @@ -48,35 +64,33 @@ static bool is_blank(const char *line) return line && line[strspn(line, " \t\n")] == '\0'; } -static bool is_section(const char *line, bool one_liner) +static char *is_section(const void *ctx, const char *line, char **value) { - unsigned int len; + char *secname; - if (!isupper(line[0])) - return false; - len = strspn(line, IDENT_CHARS); - if (line[len] != ':') - return false; + /* Any number of upper case words separated by spaces, ending in : */ + if (!tal_strreg(ctx, line, + "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)", + &secname, NULL, value)) + return NULL; - /* If it can be a one-liner, a space is sufficient.*/ - if (one_liner) - return (line[len+1] == ' ' || line[len+1] == '\t'); - - return line[len] == ':' && is_blank(line+len+1); + return secname; } -/* Summary line is form ' - ' */ -static bool is_summary_line(const char *line) +/* Summary line is form ' - ' (spaces for 'struct foo -') */ +static unsigned int is_summary_line(const char *line) { unsigned int id_len; - id_len = strspn(line, IDENT_CHARS); + /* We allow /, because it can be in (nested) module names. */ + id_len = strspn(line, IDENT_CHARS" /"); if (id_len == 0) - return false; - if (!strstarts(line + id_len, " - ")) - return false; - - return true; + return 0; + if (strspn(line, " ") == id_len) + return 0; + if (!strstarts(line + id_len-1, " - ")) + return 0; + return id_len - 1; } static bool empty_section(struct doc_section *d) @@ -91,7 +105,8 @@ static bool empty_section(struct doc_section *d) static struct doc_section *new_section(struct list_head *list, const char *function, - const char *type) + const char *type, + unsigned int srcline) { struct doc_section *d; char *lowertype; @@ -101,18 +116,19 @@ static struct doc_section *new_section(struct list_head *list, d = list_tail(list, struct doc_section, list); if (d && empty_section(d)) { list_del(&d->list); - talloc_free(d); + tal_free(d); } - d = talloc(list, struct doc_section); + d = tal(list, struct doc_section); d->function = function; - lowertype = talloc_size(d, strlen(type) + 1); + lowertype = tal_arr(d, char, strlen(type) + 1); /* Canonicalize type to lower case. */ for (i = 0; i < strlen(type)+1; i++) lowertype[i] = tolower(type[i]); d->type = lowertype; - d->lines = NULL; + d->lines = tal_arr(d, char *, 0); d->num_lines = 0; + d->srcline = srcline; list_add_tail(list, &d->list); return d; @@ -120,45 +136,115 @@ static struct doc_section *new_section(struct list_head *list, static void add_line(struct doc_section *curr, const char *line) { - curr->lines = talloc_realloc(curr, curr->lines, char *, - curr->num_lines+1); - curr->lines[curr->num_lines++] = talloc_strdup(curr->lines, line); + char *myline = tal_strdup(curr->lines, line); + tal_expand(&curr->lines, &myline, 1); + curr->num_lines++; +} + +/* We convert tabs to spaces here. */ +static void add_detabbed_line(struct doc_section *curr, const char *rawline) +{ + unsigned int i, eff_i, len, off = 0; + char *line; + + /* Worst-case alloc: 8 spaces per tab. */ + line = tal_arr(curr, char, strlen(rawline) + + strcount(rawline, "\t") * 7 + 1); + len = 0; + + /* We keep track of the *effective* offset of i. */ + for (i = eff_i = 0; i < strlen(rawline); i++) { + if (rawline[i] == '\t') { + do { + line[len++] = ' '; + eff_i++; + } while (eff_i % 8 != 0); + } else { + line[len++] = rawline[i]; + if (off == 0 && rawline[i] == '*') + off = i + 1; + eff_i++; + } + } + line[len] = '\0'; + + add_line(curr, line + off); + tal_free(line); +} + +/* Not very efficient: we could track prefix length while doing + * add_detabbed_line */ +static void trim_lines(struct doc_section *curr) +{ + unsigned int i, trim = -1; + int last_non_empty = -1; + + /* Get minimum whitespace prefix. */ + for (i = 0; i < curr->num_lines; i++) { + unsigned int prefix = strspn(curr->lines[i], " "); + /* Ignore blank lines */ + if (curr->lines[i][prefix] == '\0') + continue; + if (prefix < trim) + trim = prefix; + } + + /* Now trim it. */ + for (i = 0; i < curr->num_lines; i++) { + unsigned int prefix = strspn(curr->lines[i], " "); + if (prefix < trim) + curr->lines[i] += prefix; + else + curr->lines[i] += trim; + + /* All blank? Potential to trim. */ + if (curr->lines[i][strspn(curr->lines[i], " \t")] != '\0') + last_non_empty = i; + } + + /* Remove trailing blank lines. */ + curr->num_lines = last_non_empty + 1; } -struct list_head *extract_doc_sections(char **rawlines, unsigned int num) +struct list_head *extract_doc_sections(char **rawlines, const char *file) { - char **lines = grab_doc(rawlines, num); + unsigned int *linemap; + char **lines = grab_doc(rawlines, &linemap, file); const char *function = NULL; struct doc_section *curr = NULL; unsigned int i; struct list_head *list; - list = talloc(NULL, struct list_head); + list = tal(NULL, struct list_head); list_head_init(list); for (i = 0; lines[i]; i++) { - if (is_summary_line(lines[i])) { - function = talloc_strndup(list, lines[i], - strcspn(lines[i], " ")); - curr = new_section(list, function, "summary"); - add_line(curr, strstr(lines[i], " - ") + 3); - curr = new_section(list, function, "description"); - } else if (is_section(lines[i], false)) { - char *type = talloc_strndup(curr, lines[i], - strcspn(lines[i], ":")); - curr = new_section(list, function, type); - } else if (is_section(lines[i], true)) { - unsigned int sectlen = strcspn(lines[i], ":"); - char *type = talloc_strndup(curr, lines[i], sectlen); - curr = new_section(list, function, type); - add_line(curr, lines[i] + sectlen + 1 - + strspn(lines[i] + sectlen + 1, " \t")); + unsigned funclen; + char *type, *extra; + + funclen = is_summary_line(lines[i]); + if (funclen) { + function = tal_strndup(list, lines[i], funclen); + curr = new_section(list, function, "summary", + linemap[i]); + add_line(curr, lines[i] + funclen + 3); + curr = new_section(list, function, "description", + linemap[i]); + } else if ((type = is_section(list, lines[i], &extra)) != NULL){ + curr = new_section(list, function, type, linemap[i]); + if (!streq(extra, "")) { + add_line(curr, extra); + curr = NULL; + } } else { - if (!curr) - continue; - add_line(curr, lines[i]); + if (curr) + add_detabbed_line(curr, rawlines[linemap[i]]); } } - talloc_free(lines); + + list_for_each(list, curr, list) + trim_lines(curr); + + tal_free(lines); return list; }