X-Git-Url: http://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=tools%2Fdoc_extract-core.c;h=069bf2237c6815f12f428e272970ad863c4b9188;hp=f3666e59924926aeed86bec1478fad53e1f6eb56;hb=adc90816df194167a588a943ae503a37fec3fb6a;hpb=44c480c492c4596801261d748c5e3339c30f1f7e

diff --git a/tools/doc_extract-core.c b/tools/doc_extract-core.c
index f3666e59..069bf223 100644
--- a/tools/doc_extract-core.c
+++ b/tools/doc_extract-core.c
@@ -1,4 +1,6 @@
 /* This merely extracts, doesn't do XML or anything. */
+#include <ccan/take/take.h>
+#include <ccan/str/str.h>
 #include <err.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -9,34 +11,48 @@
 #include <fcntl.h>
 #include <stdbool.h>
 #include <ctype.h>
-#include <ccan/talloc/talloc.h>
-#include <ccan/str/str.h>
 #include "doc_extract.h"
 #include "tools.h"
 
-static char **grab_doc(char **lines, unsigned int num)
+static char **grab_doc(char **lines, unsigned int **linemap,
+		       const char *file)
 {
 	char **ret;
-	unsigned int i;
+	unsigned int i, num;
 	bool printing = false;
 
-	ret = talloc_array(NULL, char *, num+1);
+	ret = tal_arr(NULL, char *, tal_count(lines));
+	*linemap = tal_arr(ret, unsigned int, tal_count(lines));
 
 	num = 0;
 	for (i = 0; lines[i]; i++) {
 		if (streq(lines[i], "/**")) {
 			printing = true;
-			if (num != 0)
-				talloc_append_string(ret[num-1], "\n");
+			if (num != 0) {
+				ret[num-1] = tal_strcat(NULL,
+							take(ret[num-1]), "\n");
+			}
 		} else if (streq(lines[i], " */")) 
 			printing = false;
 		else if (printing) {
 			if (strstarts(lines[i], " * "))
-				ret[num++] = talloc_strdup(ret, lines[i]+3);
+				ret[num++] = tal_strdup(ret, lines[i]+3);
 			else if (strstarts(lines[i], " *"))
-				ret[num++] = talloc_strdup(ret, lines[i]+2);
-			else
-				errx(1, "Malformed line %u", i);
+				ret[num++] = tal_strdup(ret, lines[i]+2);
+			else {
+				/* Weird, malformed? */
+				static bool warned;
+				if (!warned) {
+					warnx("%s:%u:"
+					      " Expected ' *' in comment.",
+					      file, i+1);
+					warned++;
+				}
+				ret[num++] = tal_strdup(ret, lines[i]);
+				if (strstr(lines[i], "*/"))
+					printing = false;
+			}
+			(*linemap)[num-1] = i;
 		}
 	}
 	ret[num] = NULL;
@@ -48,35 +64,33 @@ static bool is_blank(const char *line)
 	return line && line[strspn(line, " \t\n")] == '\0';
 }
 
-static bool is_section(const char *line, bool one_liner)
+static char *is_section(const void *ctx, const char *line, char **value)
 {
-	unsigned int len;
+	char *secname;
 
-	if (!isupper(line[0]))
-		return false;
-	len = strspn(line, IDENT_CHARS);
-	if (line[len] != ':')
-		return false;
+	/* Any number of upper case words separated by spaces, ending in : */
+	if (!tal_strreg(ctx, line,
+		    "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)",
+		    &secname, NULL, value))
+		return NULL;
 
-	/* If it can be a one-liner, a space is sufficient.*/
-	if (one_liner)
-		return (line[len+1] == ' ' || line[len+1] == '\t');
-
-	return line[len] == ':' && is_blank(line+len+1);
+	return secname;
 }
 
-/* Summary line is form '<identifier> - ' */
-static bool is_summary_line(const char *line)
+/* Summary line is form '<identifier> - ' (spaces for 'struct foo -') */
+static unsigned int is_summary_line(const char *line)
 {
 	unsigned int id_len;
 
-	id_len = strspn(line, IDENT_CHARS);
+	/* We allow /, because it can be in (nested) module names. */
+	id_len = strspn(line, IDENT_CHARS" /");
 	if (id_len == 0)
-		return false;
-	if (!strstarts(line + id_len, " - "))
-		return false;
-
-	return true;
+		return 0;
+	if (strspn(line, " ") == id_len)
+		return 0;
+	if (!strstarts(line + id_len-1, " - "))
+		return 0;
+	return id_len - 1;
 }
 
 static bool empty_section(struct doc_section *d)
@@ -91,7 +105,8 @@ static bool empty_section(struct doc_section *d)
 
 static struct doc_section *new_section(struct list_head *list,
 				       const char *function,
-				       const char *type)
+				       const char *type,
+				       unsigned int srcline)
 {
 	struct doc_section *d;
 	char *lowertype;
@@ -101,18 +116,19 @@ static struct doc_section *new_section(struct list_head *list,
 	d = list_tail(list, struct doc_section, list);
 	if (d && empty_section(d)) {
 		list_del(&d->list);
-		talloc_free(d);
+		tal_free(d);
 	}
 
-	d = talloc(list, struct doc_section);
+	d = tal(list, struct doc_section);
 	d->function = function;
-	lowertype = talloc_size(d, strlen(type) + 1);
+	lowertype = tal_arr(d, char, strlen(type) + 1);
 	/* Canonicalize type to lower case. */
 	for (i = 0; i < strlen(type)+1; i++)
 		lowertype[i] = tolower(type[i]);
 	d->type = lowertype;
-	d->lines = NULL;
+	d->lines = tal_arr(d, char *, 0);
 	d->num_lines = 0;
+	d->srcline = srcline;
 
 	list_add_tail(list, &d->list);
 	return d;
@@ -120,45 +136,115 @@ static struct doc_section *new_section(struct list_head *list,
 
 static void add_line(struct doc_section *curr, const char *line)
 {
-	curr->lines = talloc_realloc(curr, curr->lines, char *,
-				     curr->num_lines+1);
-	curr->lines[curr->num_lines++] = talloc_strdup(curr->lines, line);
+	char *myline = tal_strdup(curr->lines, line);
+	tal_expand(&curr->lines, &myline, 1);
+	curr->num_lines++;
+}
+
+/* We convert tabs to spaces here. */
+static void add_detabbed_line(struct doc_section *curr, const char *rawline)
+{
+	unsigned int i, eff_i, len, off = 0;
+	char *line;
+
+	/* Worst-case alloc: 8 spaces per tab. */
+	line = tal_arr(curr, char, strlen(rawline) +
+		       strcount(rawline, "\t") * 7 + 1);
+	len = 0;
+
+	/* We keep track of the *effective* offset of i. */
+	for (i = eff_i = 0; i < strlen(rawline); i++) {
+		if (rawline[i] == '\t') {
+			do {
+				line[len++] = ' ';
+				eff_i++;
+			} while (eff_i % 8 != 0);
+		} else {
+			line[len++] = rawline[i];
+			if (off == 0 && rawline[i] == '*')
+				off = i + 1;
+			eff_i++;
+		}
+	}
+	line[len] = '\0';
+
+	add_line(curr, line + off);
+	tal_free(line);
+}
+
+/* Not very efficient: we could track prefix length while doing
+ * add_detabbed_line */
+static void trim_lines(struct doc_section *curr)
+{
+	unsigned int i, trim = -1;
+	int last_non_empty = -1;
+
+	/* Get minimum whitespace prefix. */
+	for (i = 0; i < curr->num_lines; i++) {
+		unsigned int prefix = strspn(curr->lines[i], " ");
+		/* Ignore blank lines */
+		if (curr->lines[i][prefix] == '\0')
+			continue;
+		if (prefix < trim)
+			trim = prefix;
+	}
+
+	/* Now trim it. */
+	for (i = 0; i < curr->num_lines; i++) {
+		unsigned int prefix = strspn(curr->lines[i], " ");
+		if (prefix < trim)
+			curr->lines[i] += prefix;
+		else
+			curr->lines[i] += trim;
+
+		/* All blank?  Potential to trim. */
+		if (curr->lines[i][strspn(curr->lines[i], " \t")] != '\0')
+			last_non_empty = i;
+	}
+
+	/* Remove trailing blank lines. */
+	curr->num_lines = last_non_empty + 1;
 }
 
-struct list_head *extract_doc_sections(char **rawlines, unsigned int num)
+struct list_head *extract_doc_sections(char **rawlines, const char *file)
 {
-	char **lines = grab_doc(rawlines, num);
+	unsigned int *linemap;
+	char **lines = grab_doc(rawlines, &linemap, file);
 	const char *function = NULL;
 	struct doc_section *curr = NULL;
 	unsigned int i;
 	struct list_head *list;
 
-	list = talloc(NULL, struct list_head);
+	list = tal(NULL, struct list_head);
 	list_head_init(list);
 
 	for (i = 0; lines[i]; i++) {
-		if (is_summary_line(lines[i])) {
-			function = talloc_strndup(list, lines[i],
-						  strcspn(lines[i], " "));
-			curr = new_section(list, function, "summary");
-			add_line(curr, strstr(lines[i], " - ") + 3);
-			curr = new_section(list, function, "description");
-		} else if (is_section(lines[i], false)) {
-			char *type = talloc_strndup(curr, lines[i],
-						    strcspn(lines[i], ":"));
-			curr = new_section(list, function, type);
-		} else if (is_section(lines[i], true)) {
-			unsigned int sectlen = strcspn(lines[i], ":");
-			char *type = talloc_strndup(curr, lines[i], sectlen);
-			curr = new_section(list, function, type);
-			add_line(curr, lines[i] + sectlen + 1
-				 + strspn(lines[i] + sectlen + 1, " \t"));
+		unsigned funclen;
+		char *type, *extra;
+
+		funclen = is_summary_line(lines[i]);
+		if (funclen) {
+			function = tal_strndup(list, lines[i], funclen);
+			curr = new_section(list, function, "summary",
+					   linemap[i]);
+			add_line(curr, lines[i] + funclen + 3);
+			curr = new_section(list, function, "description",
+					   linemap[i]);
+		} else if ((type = is_section(list, lines[i], &extra)) != NULL){
+			curr = new_section(list, function, type, linemap[i]);
+			if (!streq(extra, "")) {
+				add_line(curr, extra);
+				curr = NULL;
+			}
 		} else {
-			if (!curr)
-				continue;
-			add_line(curr, lines[i]);
+			if (curr)
+				add_detabbed_line(curr, rawlines[linemap[i]]);
 		}
 	}
-	talloc_free(lines);
+
+	list_for_each(list, curr, list)
+		trim_lines(curr);
+
+	tal_free(lines);
 	return list;
 }