rfc822: Index headers by name
[ccan] / ccan / rfc822 / rfc822.c
1 /* Licensed under LGPLv2.1+ - see LICENSE file for details */
2
3 #include "config.h"
4
5 #include <string.h>
6
7 #include <ccan/str/str.h>
8 #include <ccan/talloc/talloc.h>
9 #include <ccan/list/list.h>
10
11 #include <ccan/rfc822/rfc822.h>
12
13 #if !HAVE_MEMMEM
14 void *memmem(const void *haystack, size_t haystacklen,
15              const void *needle, size_t needlelen)
16 {
17         const char *p, *last;
18
19         p = haystack;
20         last = p + haystacklen - needlelen;
21
22         do {
23                 if (memcmp(p, needle, needlelen) == 0)
24                         return (void *)p;
25         } while (p++ <= last);
26
27         return NULL;
28 }
29 #endif
30
31 static void (*allocation_failure_hook)(const char *);
32
33 static void NORETURN default_allocation_failure(const char *s)
34 {
35         fprintf(stderr, "ccan/rfc822: Allocation failure: %s", s);
36         abort();
37 }
38
39 static void allocation_failure(const char *s)
40 {
41         if (allocation_failure_hook)
42                 (*allocation_failure_hook)(s);
43         else
44                 default_allocation_failure(s);
45 }
46
47 void rfc822_set_allocation_failure_handler(void (*h)(const char *))
48 {
49         allocation_failure_hook = h;
50 }
51
52 #define ALLOC_CHECK(p, r) \
53         do { \
54                 if (!(p)) { \
55                         allocation_failure(__FILE__ ":" stringify(__LINE__)); \
56                         return (r); \
57                 } \
58         } while (0)
59
60 /*
61  * No real point doing fancy resizing hashes, when any given mail
62  * message is unlikely to have more than a fairly small number of
63  * distinct header types.  This should be ample.
64  */
65 #define INDEX_HASH_SIZE         63
66
67 struct rfc822_msg {
68         const char *data, *end;
69         const char *remainder;
70         struct list_head headers;
71         struct list_head header_index[INDEX_HASH_SIZE];
72         const char *body;
73 };
74
75 struct rfc822_header {
76         struct bytestring all, rawname, rawvalue;
77         struct bytestring unfolded;
78         struct list_node list;
79         struct rfc822_header *name_next;
80 };
81
82 struct rfc822_headers_of_name {
83         struct bytestring name;
84         struct rfc822_header *first;
85         struct rfc822_header **lastptr;
86         struct list_node bucket;
87 };
88
89 struct rfc822_msg *rfc822_check(const struct rfc822_msg *msg,
90                                 const char *abortstr)
91 {
92         assert(msg);
93         if (!list_check(&msg->headers, abortstr))
94                 return NULL;
95         return (struct rfc822_msg *)msg;
96 }
97
98 #ifdef CCAN_RFC822_DEBUG
99 #define CHECK(msg, str) do { rfc822_check((msg), (str)); } while (0)
100 #else
101 #define CHECK(msg, str) do { } while (0)
102 #endif
103
104 struct rfc822_msg *rfc822_start(const void *ctx, const char *p, size_t len)
105 {
106         struct rfc822_msg *msg;
107         int i;
108
109         msg = talloc(ctx, struct rfc822_msg);
110         ALLOC_CHECK(msg, NULL);
111
112         msg->data = p;
113         msg->end = p + len;
114
115         msg->remainder = msg->data;
116         msg->body = NULL;
117
118         list_head_init(&msg->headers);
119
120         for (i = 0; i < INDEX_HASH_SIZE; i++)
121                 list_head_init(&msg->header_index[i]);
122
123         CHECK(msg, "<rfc22_start");
124
125         return msg;
126 }
127
128 void rfc822_free(struct rfc822_msg *msg)
129 {
130         CHECK(msg, ">rfc822_free");
131         talloc_free(msg);
132 }
133
134 static struct rfc822_header *next_header_cached(struct rfc822_msg *msg,
135                                                 struct rfc822_header *hdr)
136 {
137         struct list_node *h = &msg->headers.n;
138         const struct list_node *n = h;
139
140         CHECK(msg, ">next_header_cached");
141
142         if (hdr)
143                 n = &hdr->list;
144
145         if (n->next == h)
146                 return NULL;
147
148         CHECK(msg, "<next_header_cached");
149
150         return list_entry(n->next, struct rfc822_header, list);
151 }
152
153 static const char *next_line(const char *start, const char *end)
154 {
155         const char *p = memchr(start, '\n', end - start);
156
157         return p ? (p + 1) : end;
158 }
159
160 static struct rfc822_header *index_header(struct rfc822_msg *msg,
161                                           struct rfc822_header *hdr);
162
163 static struct rfc822_header *next_header_parse(struct rfc822_msg *msg)
164 {
165         const char *h, *eh, *ev, *colon;
166         struct rfc822_header *hi;
167
168         CHECK(msg, ">next_header_parse");
169
170         if (!msg->remainder)
171                 return NULL;
172
173         if (msg->body && (msg->remainder >= msg->body))
174                 return NULL;
175
176         h = msg->remainder;
177         eh = next_line(h, msg->end);
178
179         ev = eh;
180         if ((ev > h) && (ev[-1] == '\n'))
181                 ev--;
182         if ((ev > h) && (ev[-1] == '\r'))
183                 ev--;
184         if (ev == h) {
185                 /* Found the end of the headers */
186
187                 assert(!msg->body || (msg->body == eh));
188
189                 if (eh < msg->end)
190                         msg->body = eh;
191                 return NULL;
192         }
193
194         while ((eh < msg->end) && rfc822_iswsp(*eh))
195                 eh = next_line(eh, msg->end);
196
197         if (eh >= msg->end)
198                 msg->remainder = NULL;
199         else
200                 msg->remainder = eh;
201
202
203         hi = talloc_zero(msg, struct rfc822_header);
204         ALLOC_CHECK(hi, NULL);
205
206         hi->all = bytestring(h, eh - h);
207         list_add_tail(&msg->headers, &hi->list);
208
209         colon = memchr(h, ':', hi->all.len);
210         if (colon) {
211                 hi->rawname = bytestring(h, colon - h);
212                 hi->rawvalue = bytestring(colon + 1, eh - colon - 1);
213         } else {
214                 hi->rawname = bytestring_NULL;
215                 hi->rawvalue = bytestring_NULL;
216         }
217
218         CHECK(msg, "<next_header_parse");
219
220         return index_header(msg, hi);
221 }
222
223 struct rfc822_header *rfc822_next_header(struct rfc822_msg *msg,
224                                          struct rfc822_header *hdr)
225 {
226         struct rfc822_header *h;
227
228         CHECK(msg, ">rfc822_next_header");
229
230         h = next_header_cached(msg, hdr);
231         if (h)
232                 return h;
233
234         return next_header_parse(msg);
235 }
236
237 struct bytestring rfc822_body(struct rfc822_msg *msg)
238 {
239         CHECK(msg, ">rfc822_body");
240
241         if (!msg->body && msg->remainder) {
242                 const char *p, *q;
243
244                 p = memmem(msg->remainder, msg->end - msg->remainder,
245                            "\n\r\n", 3);
246                 q = memmem(msg->remainder, msg->end - msg->remainder,
247                            "\n\n", 2);
248
249                 if (p && (!q || (p < q)))
250                         msg->body = p + 3;
251                 else if (q && (!p || (q < p)))
252                         msg->body = q + 2;
253
254                 if (msg->body >= msg->end) {
255                         assert(msg->body == msg->end);
256                         msg->body = NULL;
257                 }
258         }
259
260         CHECK(msg, "<rfc822_body");
261
262         if (msg->body)
263                 return bytestring(msg->body, msg->end - msg->body);
264         else
265                 return bytestring_NULL;
266 }
267
268 enum rfc822_header_errors rfc822_header_errors(struct rfc822_msg *msg,
269                                                struct rfc822_header *hdr)
270 {
271         enum rfc822_header_errors err = 0;
272         int i;
273
274         if (!hdr->rawname.ptr) {
275                 err |= RFC822_HDR_NO_COLON;
276         } else {
277                 for (i = 0; i < hdr->rawname.len; i++) {
278                         char c = hdr->rawname.ptr[i];
279
280                         assert(c != ':');
281
282                         if ((c < 33) || (c > 126)) {
283                                 err |= RFC822_HDR_BAD_NAME_CHARS;
284                                 break;
285                         }
286                 }
287         }
288         return err;
289 }
290
291 struct bytestring rfc822_header_raw_content(struct rfc822_msg *msg,
292                                             struct rfc822_header *hdr)
293 {
294         return hdr->all;
295 }
296
297 struct bytestring rfc822_header_raw_name(struct rfc822_msg *msg,
298                                          struct rfc822_header *hdr)
299 {
300         return hdr->rawname;
301 }
302
303 struct bytestring rfc822_header_raw_value(struct rfc822_msg *msg,
304                                           struct rfc822_header *hdr)
305 {
306         return hdr->rawvalue;
307 }
308
309 static void get_line(struct bytestring in, struct bytestring *first,
310                      struct bytestring *rest)
311 {
312         size_t rawlen, trimlen;
313         const char *inp = in.ptr;
314         const char *nl;
315
316         nl = memchr(inp, '\n', in.len);
317         if (!nl)
318                 rawlen = in.len;
319         else
320                 rawlen = nl - inp + 1;
321
322         trimlen = rawlen;
323         if ((trimlen > 0) && (inp[trimlen-1] == '\n')) {
324                 trimlen--;
325                 if ((trimlen > 0) && (inp[trimlen-1] == '\r'))
326                         trimlen--;
327         }
328
329         *first = bytestring(in.ptr, trimlen);
330
331         if (rawlen < in.len)
332                 *rest = bytestring(in.ptr + rawlen, in.len - rawlen);
333         else
334                 *rest = bytestring_NULL;
335 }
336
337
338 struct bytestring rfc822_header_unfolded_value(struct rfc822_msg *msg,
339                                                struct rfc822_header *hdr)
340 {
341         struct bytestring raw = rfc822_header_raw_value(msg, hdr);
342         struct bytestring next, rest;
343         int lines = 0;
344         size_t len = 0;
345
346         if (!hdr->unfolded.ptr) {
347                 rest = raw;
348                 while (rest.ptr) {
349                         get_line(rest, &next, &rest);
350                         lines++;
351                         len += next.len;
352                 }
353
354                 if (lines <= 1) {
355                         hdr->unfolded = bytestring(raw.ptr, len);
356                 } else {
357                         char *unfold = talloc_array(msg, char, len);
358                         char *p = unfold;
359
360                         ALLOC_CHECK(unfold, bytestring_NULL);
361
362                         rest = raw;
363                         while (rest.ptr) {
364                                 get_line(rest, &next, &rest);
365                                 memcpy(p, next.ptr, next.len);
366                                 p += next.len;
367                         }
368
369                         assert(p == (unfold + len));
370                         hdr->unfolded = bytestring(unfold, len);
371                 }
372         }
373
374         return hdr->unfolded;
375 }
376
377 /* Specifically locale *un*aware tolower() - headers should be ascii
378  * only, and if they're not best to leave them as is */
379 static char xtolower(char c)
380 {
381         if ((c >= 'A') && (c <= 'Z'))
382                 return 'a' + (c - 'A');
383         else
384                 return c;
385 }
386
387 static bool hdr_name_eq(struct bytestring a, struct bytestring b)
388 {
389         int i;
390
391         if (a.len != b.len)
392                 return false;
393
394         for (i = 0; i < a.len; i++)
395                 if (xtolower(a.ptr[i]) != xtolower(b.ptr[i]))
396                         return false;
397
398         return true;
399 }
400
401 bool rfc822_header_is(struct rfc822_msg *msg, struct rfc822_header *hdr,
402                       const char *name)
403 {
404         struct bytestring hname = rfc822_header_raw_name(msg, hdr);
405
406         if (!hname.ptr || !name)
407                 return false;
408
409         return hdr_name_eq(hname, bytestring_from_string(name));
410 }
411
412 static unsigned headerhash(struct bytestring name)
413 {
414         /*
415          * This is stolen from hash_string() in ccan/hash, but adapted
416          * to add the xtolower() call and use a bytestring
417          */
418         unsigned ret = 0;
419         size_t i;
420
421         for (i = 0; i < name.len; i++)
422                 ret = (ret << 5) - ret + xtolower(name.ptr[i]);
423
424         return ret % INDEX_HASH_SIZE;
425 }
426
427 static struct rfc822_headers_of_name *headers_of_name(struct rfc822_msg *msg,
428                                                       struct bytestring name)
429 {
430         unsigned hash = headerhash(name);
431         struct rfc822_headers_of_name *hn;
432
433         list_for_each(&msg->header_index[hash], hn, bucket) {
434                 if (hdr_name_eq(hn->name, name))
435                         return hn;
436         }
437
438         return NULL;
439 }
440
441 static struct rfc822_header *index_header(struct rfc822_msg *msg,
442                                           struct rfc822_header *hdr)
443 {
444         struct bytestring hname = rfc822_header_raw_name(msg, hdr);
445         struct rfc822_headers_of_name *hn = headers_of_name(msg, hname);
446
447         if (!hn) {
448                 unsigned hash = headerhash(hname);
449
450                 hn = talloc_zero(msg, struct rfc822_headers_of_name);
451                 ALLOC_CHECK(hn, NULL);
452
453                 hn->name = hname;
454                 hn->first = NULL;
455                 hn->lastptr = &hn->first;
456                 list_add_tail(&msg->header_index[hash], &hn->bucket);
457         }
458
459         hdr->name_next = NULL;
460         *(hn->lastptr) = hdr;
461         hn->lastptr = &hdr->name_next;
462         return hdr;
463 }
464
465 struct rfc822_header *rfc822_first_header_of_name(struct rfc822_msg *msg,
466                                                   const char *name)
467 {
468         struct bytestring namebs = bytestring_from_string(name);
469         struct rfc822_headers_of_name *hn = headers_of_name(msg, namebs);
470         struct rfc822_header *hdr;
471
472         if (hn)
473                 return hn->first;
474
475         do {
476                 hdr = next_header_parse(msg);
477                 if (hdr && rfc822_header_is(msg, hdr, name))
478                         return hdr;
479         } while (hdr);
480
481         return NULL;
482 }
483
484 struct rfc822_header *rfc822_next_header_of_name(struct rfc822_msg *msg,
485                                                  struct rfc822_header *hdr,
486                                                  const char *name)
487 {
488         if (!hdr)
489                 return rfc822_first_header_of_name(msg, name);
490
491         if (hdr->name_next) {
492                 assert(rfc822_header_is(msg, hdr->name_next, name));
493                 return hdr->name_next;
494         }
495
496         do {
497                 hdr = next_header_parse(msg);
498                 if (hdr && rfc822_header_is(msg, hdr, name))
499                         return hdr;
500         } while (hdr);
501
502         return NULL;
503 }