tools: don't assume modules are immediately below ccan/ dir.
[ccan] / tools / doc_extract-core.c
1 /* This merely extracts, doesn't do XML or anything. */
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <err.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <string.h>
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 #include <fcntl.h>
13 #include <stdbool.h>
14 #include <ctype.h>
15 #include "doc_extract.h"
16 #include "tools.h"
17
18 static char **grab_doc(char **lines, unsigned int **linemap)
19 {
20         char **ret;
21         unsigned int i, num;
22         bool printing = false;
23
24         ret = talloc_array(NULL, char *, talloc_array_length(lines));
25         *linemap = talloc_array(ret, unsigned int, talloc_array_length(lines));
26
27         num = 0;
28         for (i = 0; lines[i]; i++) {
29                 if (streq(lines[i], "/**")) {
30                         printing = true;
31                         if (num != 0) {
32                                 ret[num-1] = talloc_append_string(ret[num-1],
33                                                                   "\n");
34                         }
35                 } else if (streq(lines[i], " */")) 
36                         printing = false;
37                 else if (printing) {
38                         if (strstarts(lines[i], " * "))
39                                 ret[num++] = talloc_strdup(ret, lines[i]+3);
40                         else if (strstarts(lines[i], " *"))
41                                 ret[num++] = talloc_strdup(ret, lines[i]+2);
42                         else
43                                 errx(1, "Malformed line %u", i);
44                         (*linemap)[num-1] = i;
45                 }
46         }
47         ret[num] = NULL;
48         return ret;
49 }
50
51 static bool is_blank(const char *line)
52 {
53         return line && line[strspn(line, " \t\n")] == '\0';
54 }
55
56 static char *is_section(const void *ctx, const char *line, char **value)
57 {
58         char *secname;
59
60         /* Any number of upper case words separated by spaces, ending in : */
61         if (!strreg(ctx, line,
62                     "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)",
63                     &secname, NULL, value))
64                 return NULL;
65
66         return secname;
67 }
68
69 /* Summary line is form '<identifier> - ' (spaces for 'struct foo -') */
70 static unsigned int is_summary_line(const char *line)
71 {
72         unsigned int id_len;
73
74         /* We allow /, because it can be in (nested) module names. */
75         id_len = strspn(line, IDENT_CHARS" /");
76         if (id_len == 0)
77                 return 0;
78         if (strspn(line, " ") == id_len)
79                 return 0;
80         if (!strstarts(line + id_len-1, " - "))
81                 return 0;
82         return id_len - 1;
83 }
84
85 static bool empty_section(struct doc_section *d)
86 {
87         unsigned int i;
88
89         for (i = 0; i < d->num_lines; i++)
90                 if (!is_blank(d->lines[i]))
91                         return false;
92         return true;
93 }
94
95 static struct doc_section *new_section(struct list_head *list,
96                                        const char *function,
97                                        const char *type,
98                                        unsigned int srcline)
99 {
100         struct doc_section *d;
101         char *lowertype;
102         unsigned int i;
103
104         /* If previous section was empty, delete it. */
105         d = list_tail(list, struct doc_section, list);
106         if (d && empty_section(d)) {
107                 list_del(&d->list);
108                 talloc_free(d);
109         }
110
111         d = talloc(list, struct doc_section);
112         d->function = function;
113         lowertype = talloc_size(d, strlen(type) + 1);
114         /* Canonicalize type to lower case. */
115         for (i = 0; i < strlen(type)+1; i++)
116                 lowertype[i] = tolower(type[i]);
117         d->type = lowertype;
118         d->lines = NULL;
119         d->num_lines = 0;
120         d->srcline = srcline;
121
122         list_add_tail(list, &d->list);
123         return d;
124 }
125
126 static void add_line(struct doc_section *curr, const char *line)
127 {
128         curr->lines = talloc_realloc(curr, curr->lines, char *,
129                                      curr->num_lines+1);
130         curr->lines[curr->num_lines++] = talloc_strdup(curr->lines, line);
131 }
132
133 /* We convert tabs to spaces here. */
134 static void add_detabbed_line(struct doc_section *curr, const char *rawline)
135 {
136         unsigned int i, eff_i, len, off = 0;
137         char *line;
138
139         /* Worst-case alloc: 8 spaces per tab. */
140         line = talloc_array(curr, char, strlen(rawline) +
141                             strcount(rawline, "\t") * 7 + 1);
142         len = 0;
143
144         /* We keep track of the *effective* offset of i. */
145         for (i = eff_i = 0; i < strlen(rawline); i++) {
146                 if (rawline[i] == '\t') {
147                         do {
148                                 line[len++] = ' ';
149                                 eff_i++;
150                         } while (eff_i % 8 != 0);
151                 } else {
152                         line[len++] = rawline[i];
153                         if (off == 0 && rawline[i] == '*')
154                                 off = i + 1;
155                         eff_i++;
156                 }
157         }
158         line[len] = '\0';
159
160         add_line(curr, line + off);
161         talloc_free(line);
162 }
163
164 /* Not very efficient: we could track prefix length while doing
165  * add_detabbed_line */
166 static void trim_lines(struct doc_section *curr)
167 {
168         unsigned int i, trim = -1;
169         int last_non_empty = -1;
170
171         /* Get minimum whitespace prefix. */
172         for (i = 0; i < curr->num_lines; i++) {
173                 unsigned int prefix = strspn(curr->lines[i], " ");
174                 /* Ignore blank lines */
175                 if (curr->lines[i][prefix] == '\0')
176                         continue;
177                 if (prefix < trim)
178                         trim = prefix;
179         }
180
181         /* Now trim it. */
182         for (i = 0; i < curr->num_lines; i++) {
183                 unsigned int prefix = strspn(curr->lines[i], " ");
184                 if (prefix < trim)
185                         curr->lines[i] += prefix;
186                 else
187                         curr->lines[i] += trim;
188
189                 /* All blank?  Potential to trim. */
190                 if (curr->lines[i][strspn(curr->lines[i], " \t")] != '\0')
191                         last_non_empty = i;
192         }
193
194         /* Remove trailing blank lines. */
195         curr->num_lines = last_non_empty + 1;
196 }
197
198 struct list_head *extract_doc_sections(char **rawlines)
199 {
200         unsigned int *linemap;
201         char **lines = grab_doc(rawlines, &linemap);
202         const char *function = NULL;
203         struct doc_section *curr = NULL;
204         unsigned int i;
205         struct list_head *list;
206
207         list = talloc(NULL, struct list_head);
208         list_head_init(list);
209
210         for (i = 0; lines[i]; i++) {
211                 unsigned funclen;
212                 char *type, *extra;
213
214                 funclen = is_summary_line(lines[i]);
215                 if (funclen) {
216                         function = talloc_strndup(list, lines[i], funclen);
217                         curr = new_section(list, function, "summary",
218                                            linemap[i]);
219                         add_line(curr, lines[i] + funclen + 3);
220                         curr = new_section(list, function, "description",
221                                            linemap[i]);
222                 } else if ((type = is_section(list, lines[i], &extra)) != NULL){
223                         curr = new_section(list, function, type, linemap[i]);
224                         if (!streq(extra, "")) {
225                                 add_line(curr, extra);
226                                 curr = NULL;
227                         }
228                 } else {
229                         if (curr)
230                                 add_detabbed_line(curr, rawlines[linemap[i]]);
231                 }
232         }
233
234         list_for_each(list, curr, list)
235                 trim_lines(curr);
236
237         talloc_free(lines);
238         return list;
239 }