X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=tools%2Fdoc_extract-core.c;h=8695a7e16fa2e2437c1ca1671b2eada3134f2b76;hp=07d31c77c01f15e85cbc3cebda601b050b916c5f;hb=6bc8ea012391198bc3898ae2937558b60dd55906;hpb=b06cf2a67593d4101d1c20b86c5864f062df4a5b

diff --git a/tools/doc_extract-core.c b/tools/doc_extract-core.c
index 07d31c77..8695a7e1 100644
--- a/tools/doc_extract-core.c
+++ b/tools/doc_extract-core.c
@@ -1,4 +1,7 @@
 /* This merely extracts, doesn't do XML or anything. */
+#include <ccan/talloc/talloc.h>
+#include <ccan/str/str.h>
+#include <ccan/str_talloc/str_talloc.h>
 #include <err.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -9,18 +12,17 @@
 #include <fcntl.h>
 #include <stdbool.h>
 #include <ctype.h>
-#include <ccan/talloc/talloc.h>
-#include <ccan/str/str.h>
 #include "doc_extract.h"
 #include "tools.h"
 
-static char **grab_doc(char **lines, unsigned int num)
+static char **grab_doc(char **lines, unsigned int **linemap)
 {
 	char **ret;
-	unsigned int i;
+	unsigned int i, num;
 	bool printing = false;
 
-	ret = talloc_array(NULL, char *, num+1);
+	ret = talloc_array(NULL, char *, talloc_array_length(lines));
+	*linemap = talloc_array(ret, unsigned int, talloc_array_length(lines));
 
 	num = 0;
 	for (i = 0; lines[i]; i++) {
@@ -39,6 +41,7 @@ static char **grab_doc(char **lines, unsigned int num)
 				ret[num++] = talloc_strdup(ret, lines[i]+2);
 			else
 				errx(1, "Malformed line %u", i);
+			(*linemap)[num-1] = i;
 		}
 	}
 	ret[num] = NULL;
@@ -50,28 +53,17 @@ static bool is_blank(const char *line)
 	return line && line[strspn(line, " \t\n")] == '\0';
 }
 
-static bool is_section(const char *line, bool one_liner)
+static char *is_section(const void *ctx, const char *line, char **value)
 {
-	unsigned int len = 0;
+	char *secname;
 
 	/* Any number of upper case words separated by spaces, ending in : */
-	for (;;) {
-		if (!isupper(line[len]))
-			return false;
-		len += strspn(line+len, IDENT_CHARS);
-		if (line[len] == ':')
-			break;
+	if (!strreg(ctx, line,
+		    "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)",
+		    &secname, NULL, value))
+		return NULL;
 
-		if (line[len] != ' ')
-			return false;
-		len++;
-	}
-
-	/* If it can be a one-liner, a space is sufficient.*/
-	if (one_liner)
-		return (line[len+1] == ' ' || line[len+1] == '\t');
-
-	return line[len] == ':' && is_blank(line+len+1);
+	return secname;
 }
 
 /* Summary line is form '<identifier> - ' (spaces for 'struct foo -') */
@@ -101,7 +93,8 @@ static bool empty_section(struct doc_section *d)
 
 static struct doc_section *new_section(struct list_head *list,
 				       const char *function,
-				       const char *type)
+				       const char *type,
+				       unsigned int srcline)
 {
 	struct doc_section *d;
 	char *lowertype;
@@ -123,6 +116,7 @@ static struct doc_section *new_section(struct list_head *list,
 	d->type = lowertype;
 	d->lines = NULL;
 	d->num_lines = 0;
+	d->srcline = srcline;
 
 	list_add_tail(list, &d->list);
 	return d;
@@ -135,9 +129,67 @@ static void add_line(struct doc_section *curr, const char *line)
 	curr->lines[curr->num_lines++] = talloc_strdup(curr->lines, line);
 }
 
-struct list_head *extract_doc_sections(char **rawlines, unsigned int num)
+/* We convert tabs to spaces here. */
+static void add_detabbed_line(struct doc_section *curr, const char *rawline)
+{
+	unsigned int i, eff_i, len, off = 0;
+	char *line;
+
+	/* Worst-case alloc: 8 spaces per tab. */
+	line = talloc_array(curr, char, strlen(rawline) +
+			    strcount(rawline, "\t") * 7 + 1);
+	len = 0;
+
+	/* We keep track of the *effective* offset of i. */
+	for (i = eff_i = 0; i < strlen(rawline); i++) {
+		if (rawline[i] == '\t') {
+			do {
+				line[len++] = ' ';
+				eff_i++;
+			} while (eff_i % 8 != 0);
+		} else {
+			line[len++] = rawline[i];
+			if (off == 0 && rawline[i] == '*')
+				off = i + 1;
+			eff_i++;
+		}
+	}
+	line[len] = '\0';
+
+	add_line(curr, line + off);
+	talloc_free(line);
+}
+
+/* Not very efficient: we could track prefix length while doing
+ * add_detabbed_line */
+static void trim_lines(struct doc_section *curr)
 {
-	char **lines = grab_doc(rawlines, num);
+	unsigned int i, trim = -1;
+
+	/* Get minimum whitespace prefix. */
+	for (i = 0; i < curr->num_lines; i++) {
+		unsigned int prefix = strspn(curr->lines[i], " ");
+		/* Ignore blank lines */
+		if (curr->lines[i][prefix] == '\0')
+			continue;
+		if (prefix < trim)
+			trim = prefix;
+	}
+
+	/* Now trim it. */
+	for (i = 0; i < curr->num_lines; i++) {
+		unsigned int prefix = strspn(curr->lines[i], " ");
+		if (prefix < trim)
+			curr->lines[i] += prefix;
+		else
+			curr->lines[i] += trim;
+	}
+}
+
+struct list_head *extract_doc_sections(char **rawlines)
+{
+	unsigned int *linemap;
+	char **lines = grab_doc(rawlines, &linemap);
 	const char *function = NULL;
 	struct doc_section *curr = NULL;
 	unsigned int i;
@@ -148,29 +200,31 @@ struct list_head *extract_doc_sections(char **rawlines, unsigned int num)
 
 	for (i = 0; lines[i]; i++) {
 		unsigned funclen;
+		char *type, *extra;
 
 		funclen = is_summary_line(lines[i]);
 		if (funclen) {
 			function = talloc_strndup(list, lines[i], funclen);
-			curr = new_section(list, function, "summary");
+			curr = new_section(list, function, "summary",
+					   linemap[i]);
 			add_line(curr, lines[i] + funclen + 3);
-			curr = new_section(list, function, "description");
-		} else if (is_section(lines[i], false)) {
-			char *type = talloc_strndup(curr, lines[i],
-						    strcspn(lines[i], ":"));
-			curr = new_section(list, function, type);
-		} else if (is_section(lines[i], true)) {
-			unsigned int sectlen = strcspn(lines[i], ":");
-			char *type = talloc_strndup(curr, lines[i], sectlen);
-			curr = new_section(list, function, type);
-			add_line(curr, lines[i] + sectlen + 1
-				 + strspn(lines[i] + sectlen + 1, " \t"));
+			curr = new_section(list, function, "description",
+					   linemap[i]);
+		} else if ((type = is_section(list, lines[i], &extra)) != NULL){
+			curr = new_section(list, function, type, linemap[i]);
+			if (!streq(extra, "")) {
+				add_line(curr, extra);
+				curr = NULL;
+			}
 		} else {
-			if (!curr)
-				continue;
-			add_line(curr, lines[i]);
+			if (curr)
+				add_detabbed_line(curr, rawlines[linemap[i]]);
 		}
 	}
+
+	list_for_each(list, curr, list)
+		trim_lines(curr);
+
 	talloc_free(lines);
 	return list;
 }