/* This merely extracts, doesn't do XML or anything. */
+#include <ccan/talloc/talloc.h>
+#include <ccan/str/str.h>
+#include <ccan/str_talloc/str_talloc.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdbool.h>
#include <ctype.h>
-#include <ccan/talloc/talloc.h>
-#include <ccan/str/str.h>
#include "doc_extract.h"
#include "tools.h"
-static char **grab_doc(char **lines, unsigned int num)
+static char **grab_doc(char **lines, unsigned int **linemap)
{
char **ret;
- unsigned int i;
+ unsigned int i, num;
bool printing = false;
- ret = talloc_array(NULL, char *, num+1);
+ ret = talloc_array(NULL, char *, talloc_array_length(lines));
+ *linemap = talloc_array(ret, unsigned int, talloc_array_length(lines));
num = 0;
for (i = 0; lines[i]; i++) {
ret[num++] = talloc_strdup(ret, lines[i]+2);
else
errx(1, "Malformed line %u", i);
+ (*linemap)[num-1] = i;
}
}
ret[num] = NULL;
return line && line[strspn(line, " \t\n")] == '\0';
}
-static bool is_section(const char *line, bool one_liner)
+static char *is_section(const void *ctx, const char *line, char **value)
{
- unsigned int len = 0;
+ char *secname;
/* Any number of upper case words separated by spaces, ending in : */
- for (;;) {
- if (!isupper(line[len]))
- return false;
- len += strspn(line+len, IDENT_CHARS);
- if (line[len] == ':')
- break;
+ if (!strreg(ctx, line,
+ "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)",
+ &secname, NULL, value))
+ return NULL;
- if (line[len] != ' ')
- return false;
- len++;
- }
-
- /* If it can be a one-liner, a space is sufficient.*/
- if (one_liner)
- return (line[len+1] == ' ' || line[len+1] == '\t');
-
- return line[len] == ':' && is_blank(line+len+1);
+ return secname;
}
/* Summary line is form '<identifier> - ' (spaces for 'struct foo -') */
id_len = strspn(line, IDENT_CHARS" ");
if (id_len == 0)
return 0;
+ if (strspn(line, " ") == id_len)
+ return 0;
if (!strstarts(line + id_len-1, " - "))
return 0;
-
return id_len - 1;
}
static struct doc_section *new_section(struct list_head *list,
const char *function,
- const char *type)
+ const char *type,
+ unsigned int srcline)
{
struct doc_section *d;
char *lowertype;
d->type = lowertype;
d->lines = NULL;
d->num_lines = 0;
+ d->srcline = srcline;
list_add_tail(list, &d->list);
return d;
curr->lines[curr->num_lines++] = talloc_strdup(curr->lines, line);
}
-struct list_head *extract_doc_sections(char **rawlines, unsigned int num)
+/* We convert tabs to spaces here. */
+static void add_detabbed_line(struct doc_section *curr, const char *rawline)
{
- char **lines = grab_doc(rawlines, num);
+ unsigned int i, eff_i, len, off = 0;
+ char *line;
+
+ /* Worst-case alloc: 8 spaces per tab. */
+ line = talloc_array(curr, char, strlen(rawline) +
+ strcount(rawline, "\t") * 7 + 1);
+ len = 0;
+
+ /* We keep track of the *effective* offset of i. */
+ for (i = eff_i = 0; i < strlen(rawline); i++) {
+ if (rawline[i] == '\t') {
+ do {
+ line[len++] = ' ';
+ eff_i++;
+ } while (eff_i % 8 != 0);
+ } else {
+ line[len++] = rawline[i];
+ if (off == 0 && rawline[i] == '*')
+ off = i + 1;
+ eff_i++;
+ }
+ }
+ line[len] = '\0';
+
+ add_line(curr, line + off);
+ talloc_free(line);
+}
+
+/* Not very efficient: we could track prefix length while doing
+ * add_detabbed_line */
+static void trim_lines(struct doc_section *curr)
+{
+ unsigned int i, trim = -1;
+ int last_non_empty = -1;
+
+ /* Get minimum whitespace prefix. */
+ for (i = 0; i < curr->num_lines; i++) {
+ unsigned int prefix = strspn(curr->lines[i], " ");
+ /* Ignore blank lines */
+ if (curr->lines[i][prefix] == '\0')
+ continue;
+ if (prefix < trim)
+ trim = prefix;
+ }
+
+ /* Now trim it. */
+ for (i = 0; i < curr->num_lines; i++) {
+ unsigned int prefix = strspn(curr->lines[i], " ");
+ if (prefix < trim)
+ curr->lines[i] += prefix;
+ else
+ curr->lines[i] += trim;
+
+ /* All blank? Potential to trim. */
+ if (curr->lines[i][strspn(curr->lines[i], " \t")] != '\0')
+ last_non_empty = i;
+ }
+
+ /* Remove trailing blank lines. */
+ curr->num_lines = last_non_empty + 1;
+}
+
+struct list_head *extract_doc_sections(char **rawlines)
+{
+ unsigned int *linemap;
+ char **lines = grab_doc(rawlines, &linemap);
const char *function = NULL;
struct doc_section *curr = NULL;
unsigned int i;
for (i = 0; lines[i]; i++) {
unsigned funclen;
+ char *type, *extra;
funclen = is_summary_line(lines[i]);
if (funclen) {
function = talloc_strndup(list, lines[i], funclen);
- curr = new_section(list, function, "summary");
+ curr = new_section(list, function, "summary",
+ linemap[i]);
add_line(curr, lines[i] + funclen + 3);
- curr = new_section(list, function, "description");
- } else if (is_section(lines[i], false)) {
- char *type = talloc_strndup(curr, lines[i],
- strcspn(lines[i], ":"));
- curr = new_section(list, function, type);
- } else if (is_section(lines[i], true)) {
- unsigned int sectlen = strcspn(lines[i], ":");
- char *type = talloc_strndup(curr, lines[i], sectlen);
- curr = new_section(list, function, type);
- add_line(curr, lines[i] + sectlen + 1
- + strspn(lines[i] + sectlen + 1, " \t"));
+ curr = new_section(list, function, "description",
+ linemap[i]);
+ } else if ((type = is_section(list, lines[i], &extra)) != NULL){
+ curr = new_section(list, function, type, linemap[i]);
+ if (!streq(extra, "")) {
+ add_line(curr, extra);
+ curr = NULL;
+ }
} else {
- if (!curr)
- continue;
- add_line(curr, lines[i]);
+ if (curr)
+ add_detabbed_line(curr, rawlines[linemap[i]]);
}
}
+
+ list_for_each(list, curr, list)
+ trim_lines(curr);
+
talloc_free(lines);
return list;
}