]> git.ozlabs.org Git - ccan/blob - tools/doc_extract-core.c
htable: add a htable_prev method to oppose _next
[ccan] / tools / doc_extract-core.c
1 /* This merely extracts, doesn't do XML or anything. */
2 #include <ccan/take/take.h>
3 #include <ccan/str/str.h>
4 #include <err.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <unistd.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <stdbool.h>
13 #include <ctype.h>
14 #include "doc_extract.h"
15 #include "tools.h"
16
17 static char **grab_doc(char **lines, unsigned int **linemap,
18                        const char *file)
19 {
20         char **ret;
21         unsigned int i, num;
22         bool printing = false;
23
24         ret = tal_arr(NULL, char *, tal_count(lines));
25         *linemap = tal_arr(ret, unsigned int, tal_count(lines));
26
27         num = 0;
28         for (i = 0; lines[i]; i++) {
29                 if (streq(lines[i], "/**")) {
30                         printing = true;
31                         if (num != 0) {
32                                 ret[num-1] = tal_strcat(NULL,
33                                                         take(ret[num-1]), "\n");
34                         }
35                 } else if (streq(lines[i], " */")) 
36                         printing = false;
37                 else if (printing) {
38                         if (strstarts(lines[i], " * "))
39                                 ret[num++] = tal_strdup(ret, lines[i]+3);
40                         else if (strstarts(lines[i], " *"))
41                                 ret[num++] = tal_strdup(ret, lines[i]+2);
42                         else {
43                                 /* Weird, malformed? */
44                                 static bool warned;
45                                 if (!warned) {
46                                         warnx("%s:%u:"
47                                               " Expected ' *' in comment.",
48                                               file, i+1);
49                                         warned++;
50                                 }
51                                 ret[num++] = tal_strdup(ret, lines[i]);
52                                 if (strstr(lines[i], "*/"))
53                                         printing = false;
54                         }
55                         (*linemap)[num-1] = i;
56                 }
57         }
58         ret[num] = NULL;
59         return ret;
60 }
61
62 static bool is_blank(const char *line)
63 {
64         return line && line[strspn(line, " \t\n")] == '\0';
65 }
66
67 static char *is_section(const void *ctx, const char *line, char **value)
68 {
69         char *secname;
70
71         /* Any number of upper case words separated by spaces, ending in : */
72         if (!tal_strreg(ctx, line,
73                     "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)",
74                     &secname, NULL, value))
75                 return NULL;
76
77         return secname;
78 }
79
80 /* Summary line is form '<identifier> - ' (spaces for 'struct foo -') */
81 static unsigned int is_summary_line(const char *line)
82 {
83         unsigned int id_len;
84
85         /* We allow /, because it can be in (nested) module names. */
86         id_len = strspn(line, IDENT_CHARS" /");
87         if (id_len == 0)
88                 return 0;
89         if (strspn(line, " ") == id_len)
90                 return 0;
91         if (!strstarts(line + id_len-1, " - "))
92                 return 0;
93         return id_len - 1;
94 }
95
96 static bool empty_section(struct doc_section *d)
97 {
98         unsigned int i;
99
100         for (i = 0; i < d->num_lines; i++)
101                 if (!is_blank(d->lines[i]))
102                         return false;
103         return true;
104 }
105
106 static struct doc_section *new_section(struct list_head *list,
107                                        const char *function,
108                                        const char *type,
109                                        unsigned int srcline)
110 {
111         struct doc_section *d;
112         char *lowertype;
113         unsigned int i;
114
115         /* If previous section was empty, delete it. */
116         d = list_tail(list, struct doc_section, list);
117         if (d && empty_section(d)) {
118                 list_del(&d->list);
119                 tal_free(d);
120         }
121
122         d = tal(list, struct doc_section);
123         d->function = function;
124         lowertype = tal_arr(d, char, strlen(type) + 1);
125         /* Canonicalize type to lower case. */
126         for (i = 0; i < strlen(type)+1; i++)
127                 lowertype[i] = tolower(type[i]);
128         d->type = lowertype;
129         d->lines = tal_arr(d, char *, 0);
130         d->num_lines = 0;
131         d->srcline = srcline;
132
133         list_add_tail(list, &d->list);
134         return d;
135 }
136
137 static void add_line(struct doc_section *curr, const char *line)
138 {
139         char *myline = tal_strdup(curr->lines, line);
140         tal_expand(&curr->lines, &myline, 1);
141         curr->num_lines++;
142 }
143
144 /* We convert tabs to spaces here. */
145 static void add_detabbed_line(struct doc_section *curr, const char *rawline)
146 {
147         unsigned int i, eff_i, len, off = 0;
148         char *line;
149
150         /* Worst-case alloc: 8 spaces per tab. */
151         line = tal_arr(curr, char, strlen(rawline) +
152                        strcount(rawline, "\t") * 7 + 1);
153         len = 0;
154
155         /* We keep track of the *effective* offset of i. */
156         for (i = eff_i = 0; i < strlen(rawline); i++) {
157                 if (rawline[i] == '\t') {
158                         do {
159                                 line[len++] = ' ';
160                                 eff_i++;
161                         } while (eff_i % 8 != 0);
162                 } else {
163                         line[len++] = rawline[i];
164                         if (off == 0 && rawline[i] == '*')
165                                 off = i + 1;
166                         eff_i++;
167                 }
168         }
169         line[len] = '\0';
170
171         add_line(curr, line + off);
172         tal_free(line);
173 }
174
175 /* Not very efficient: we could track prefix length while doing
176  * add_detabbed_line */
177 static void trim_lines(struct doc_section *curr)
178 {
179         unsigned int i, trim = -1;
180         int last_non_empty = -1;
181
182         /* Get minimum whitespace prefix. */
183         for (i = 0; i < curr->num_lines; i++) {
184                 unsigned int prefix = strspn(curr->lines[i], " ");
185                 /* Ignore blank lines */
186                 if (curr->lines[i][prefix] == '\0')
187                         continue;
188                 if (prefix < trim)
189                         trim = prefix;
190         }
191
192         /* Now trim it. */
193         for (i = 0; i < curr->num_lines; i++) {
194                 unsigned int prefix = strspn(curr->lines[i], " ");
195                 if (prefix < trim)
196                         curr->lines[i] += prefix;
197                 else
198                         curr->lines[i] += trim;
199
200                 /* All blank?  Potential to trim. */
201                 if (curr->lines[i][strspn(curr->lines[i], " \t")] != '\0')
202                         last_non_empty = i;
203         }
204
205         /* Remove trailing blank lines. */
206         curr->num_lines = last_non_empty + 1;
207 }
208
209 struct list_head *extract_doc_sections(char **rawlines, const char *file)
210 {
211         unsigned int *linemap;
212         char **lines = grab_doc(rawlines, &linemap, file);
213         const char *function = NULL;
214         struct doc_section *curr = NULL;
215         unsigned int i;
216         struct list_head *list;
217
218         list = tal(NULL, struct list_head);
219         list_head_init(list);
220
221         for (i = 0; lines[i]; i++) {
222                 unsigned funclen;
223                 char *type, *extra;
224
225                 funclen = is_summary_line(lines[i]);
226                 if (funclen) {
227                         function = tal_strndup(list, lines[i], funclen);
228                         curr = new_section(list, function, "summary",
229                                            linemap[i]);
230                         add_line(curr, lines[i] + funclen + 3);
231                         curr = new_section(list, function, "description",
232                                            linemap[i]);
233                 } else if ((type = is_section(list, lines[i], &extra)) != NULL){
234                         curr = new_section(list, function, type, linemap[i]);
235                         if (!streq(extra, "")) {
236                                 add_line(curr, extra);
237                                 curr = NULL;
238                         }
239                 } else {
240                         if (curr)
241                                 add_detabbed_line(curr, rawlines[linemap[i]]);
242                 }
243         }
244
245         list_for_each(list, curr, list)
246                 trim_lines(curr);
247
248         tal_free(lines);
249         return list;
250 }