7b9bb84ab3f93e50c06be26a1663e18be8054074
[ccan] / tools / doc_extract-core.c
1 /* This merely extracts, doesn't do XML or anything. */
2 #include <ccan/talloc/talloc.h>
3 #include <ccan/str/str.h>
4 #include <ccan/str_talloc/str_talloc.h>
5 #include <err.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <string.h>
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 #include <fcntl.h>
13 #include <stdbool.h>
14 #include <ctype.h>
15 #include "doc_extract.h"
16 #include "tools.h"
17
18 static char **grab_doc(char **lines, unsigned int **linemap,
19                        const char *file)
20 {
21         char **ret;
22         unsigned int i, num;
23         bool printing = false;
24
25         ret = talloc_array(NULL, char *, talloc_array_length(lines));
26         *linemap = talloc_array(ret, unsigned int, talloc_array_length(lines));
27
28         num = 0;
29         for (i = 0; lines[i]; i++) {
30                 if (streq(lines[i], "/**")) {
31                         printing = true;
32                         if (num != 0) {
33                                 ret[num-1] = talloc_append_string(ret[num-1],
34                                                                   "\n");
35                         }
36                 } else if (streq(lines[i], " */")) 
37                         printing = false;
38                 else if (printing) {
39                         if (strstarts(lines[i], " * "))
40                                 ret[num++] = talloc_strdup(ret, lines[i]+3);
41                         else if (strstarts(lines[i], " *"))
42                                 ret[num++] = talloc_strdup(ret, lines[i]+2);
43                         else {
44                                 /* Weird, malformed? */
45                                 static bool warned;
46                                 if (!warned) {
47                                         warnx("%s:%u:"
48                                               " Expected ' *' in comment.",
49                                               file, i+1);
50                                         warned++;
51                                 }
52                                 ret[num++] = talloc_strdup(ret, lines[i]);
53                                 if (strstr(lines[i], "*/"))
54                                         printing = false;
55                         }
56                         (*linemap)[num-1] = i;
57                 }
58         }
59         ret[num] = NULL;
60         return ret;
61 }
62
63 static bool is_blank(const char *line)
64 {
65         return line && line[strspn(line, " \t\n")] == '\0';
66 }
67
68 static char *is_section(const void *ctx, const char *line, char **value)
69 {
70         char *secname;
71
72         /* Any number of upper case words separated by spaces, ending in : */
73         if (!strreg(ctx, line,
74                     "^([A-Z][a-zA-Z0-9_]*( [A-Z][a-zA-Z0-9_]*)*):[ \t\n]*(.*)",
75                     &secname, NULL, value))
76                 return NULL;
77
78         return secname;
79 }
80
81 /* Summary line is form '<identifier> - ' (spaces for 'struct foo -') */
82 static unsigned int is_summary_line(const char *line)
83 {
84         unsigned int id_len;
85
86         /* We allow /, because it can be in (nested) module names. */
87         id_len = strspn(line, IDENT_CHARS" /");
88         if (id_len == 0)
89                 return 0;
90         if (strspn(line, " ") == id_len)
91                 return 0;
92         if (!strstarts(line + id_len-1, " - "))
93                 return 0;
94         return id_len - 1;
95 }
96
97 static bool empty_section(struct doc_section *d)
98 {
99         unsigned int i;
100
101         for (i = 0; i < d->num_lines; i++)
102                 if (!is_blank(d->lines[i]))
103                         return false;
104         return true;
105 }
106
107 static struct doc_section *new_section(struct list_head *list,
108                                        const char *function,
109                                        const char *type,
110                                        unsigned int srcline)
111 {
112         struct doc_section *d;
113         char *lowertype;
114         unsigned int i;
115
116         /* If previous section was empty, delete it. */
117         d = list_tail(list, struct doc_section, list);
118         if (d && empty_section(d)) {
119                 list_del(&d->list);
120                 talloc_free(d);
121         }
122
123         d = talloc(list, struct doc_section);
124         d->function = function;
125         lowertype = talloc_size(d, strlen(type) + 1);
126         /* Canonicalize type to lower case. */
127         for (i = 0; i < strlen(type)+1; i++)
128                 lowertype[i] = tolower(type[i]);
129         d->type = lowertype;
130         d->lines = NULL;
131         d->num_lines = 0;
132         d->srcline = srcline;
133
134         list_add_tail(list, &d->list);
135         return d;
136 }
137
138 static void add_line(struct doc_section *curr, const char *line)
139 {
140         curr->lines = talloc_realloc(curr, curr->lines, char *,
141                                      curr->num_lines+1);
142         curr->lines[curr->num_lines++] = talloc_strdup(curr->lines, line);
143 }
144
145 /* We convert tabs to spaces here. */
146 static void add_detabbed_line(struct doc_section *curr, const char *rawline)
147 {
148         unsigned int i, eff_i, len, off = 0;
149         char *line;
150
151         /* Worst-case alloc: 8 spaces per tab. */
152         line = talloc_array(curr, char, strlen(rawline) +
153                             strcount(rawline, "\t") * 7 + 1);
154         len = 0;
155
156         /* We keep track of the *effective* offset of i. */
157         for (i = eff_i = 0; i < strlen(rawline); i++) {
158                 if (rawline[i] == '\t') {
159                         do {
160                                 line[len++] = ' ';
161                                 eff_i++;
162                         } while (eff_i % 8 != 0);
163                 } else {
164                         line[len++] = rawline[i];
165                         if (off == 0 && rawline[i] == '*')
166                                 off = i + 1;
167                         eff_i++;
168                 }
169         }
170         line[len] = '\0';
171
172         add_line(curr, line + off);
173         talloc_free(line);
174 }
175
176 /* Not very efficient: we could track prefix length while doing
177  * add_detabbed_line */
178 static void trim_lines(struct doc_section *curr)
179 {
180         unsigned int i, trim = -1;
181         int last_non_empty = -1;
182
183         /* Get minimum whitespace prefix. */
184         for (i = 0; i < curr->num_lines; i++) {
185                 unsigned int prefix = strspn(curr->lines[i], " ");
186                 /* Ignore blank lines */
187                 if (curr->lines[i][prefix] == '\0')
188                         continue;
189                 if (prefix < trim)
190                         trim = prefix;
191         }
192
193         /* Now trim it. */
194         for (i = 0; i < curr->num_lines; i++) {
195                 unsigned int prefix = strspn(curr->lines[i], " ");
196                 if (prefix < trim)
197                         curr->lines[i] += prefix;
198                 else
199                         curr->lines[i] += trim;
200
201                 /* All blank?  Potential to trim. */
202                 if (curr->lines[i][strspn(curr->lines[i], " \t")] != '\0')
203                         last_non_empty = i;
204         }
205
206         /* Remove trailing blank lines. */
207         curr->num_lines = last_non_empty + 1;
208 }
209
210 struct list_head *extract_doc_sections(char **rawlines, const char *file)
211 {
212         unsigned int *linemap;
213         char **lines = grab_doc(rawlines, &linemap, file);
214         const char *function = NULL;
215         struct doc_section *curr = NULL;
216         unsigned int i;
217         struct list_head *list;
218
219         list = talloc(NULL, struct list_head);
220         list_head_init(list);
221
222         for (i = 0; lines[i]; i++) {
223                 unsigned funclen;
224                 char *type, *extra;
225
226                 funclen = is_summary_line(lines[i]);
227                 if (funclen) {
228                         function = talloc_strndup(list, lines[i], funclen);
229                         curr = new_section(list, function, "summary",
230                                            linemap[i]);
231                         add_line(curr, lines[i] + funclen + 3);
232                         curr = new_section(list, function, "description",
233                                            linemap[i]);
234                 } else if ((type = is_section(list, lines[i], &extra)) != NULL){
235                         curr = new_section(list, function, type, linemap[i]);
236                         if (!streq(extra, "")) {
237                                 add_line(curr, extra);
238                                 curr = NULL;
239                         }
240                 } else {
241                         if (curr)
242                                 add_detabbed_line(curr, rawlines[linemap[i]]);
243                 }
244         }
245
246         list_for_each(list, curr, list)
247                 trim_lines(curr);
248
249         talloc_free(lines);
250         return list;
251 }