endian: add constant versions.
[ccan] / ccan / rfc822 / rfc822.c
1 /* Licensed under LGPLv2.1+ - see LICENSE file for details */
2
3 #include "config.h"
4
5 #include <string.h>
6
7 #include <ccan/str/str.h>
8 #include <ccan/list/list.h>
9 #include <stdio.h>
10
11 #include <ccan/rfc822/rfc822.h>
12
13 #ifdef TAL_USE_TALLOC
14 #include <ccan/tal/talloc/talloc.h>
15 #else
16 #include <ccan/tal/tal.h>
17 #endif
18
19 #if !HAVE_MEMMEM
20 void *memmem(const void *haystack, size_t haystacklen,
21              const void *needle, size_t needlelen)
22 {
23         const char *p, *last;
24
25         p = haystack;
26         last = p + haystacklen - needlelen;
27
28         do {
29                 if (memcmp(p, needle, needlelen) == 0)
30                         return (void *)p;
31         } while (p++ <= last);
32
33         return NULL;
34 }
35 #endif
36
37 static void (*allocation_failure_hook)(const char *);
38
39 static void NORETURN default_allocation_failure(const char *s)
40 {
41         fprintf(stderr, "ccan/rfc822: Allocation failure: %s", s);
42         abort();
43 }
44
45 static void allocation_failure(const char *s)
46 {
47         if (allocation_failure_hook)
48                 (*allocation_failure_hook)(s);
49         else
50                 default_allocation_failure(s);
51 }
52
53 void rfc822_set_allocation_failure_handler(void (*h)(const char *))
54 {
55         allocation_failure_hook = h;
56 }
57
58 #define ALLOC_CHECK(p, r) \
59         do { \
60                 if (!(p)) { \
61                         allocation_failure(__FILE__ ":" stringify(__LINE__)); \
62                         return (r); \
63                 } \
64         } while (0)
65
66 /*
67  * No real point doing fancy resizing hashes, when any given mail
68  * message is unlikely to have more than a fairly small number of
69  * distinct header types.  This should be ample.
70  */
71 #define INDEX_HASH_SIZE         63
72
73 struct rfc822_msg {
74         const char *data, *end;
75         const char *remainder;
76         struct list_head headers;
77         struct list_head header_index[INDEX_HASH_SIZE];
78         const char *body;
79 };
80
81 struct rfc822_header {
82         struct bytestring all, rawname, rawvalue;
83         struct bytestring unfolded;
84         struct list_node list;
85         struct rfc822_header *name_next;
86 };
87
88 struct rfc822_headers_of_name {
89         struct bytestring name;
90         struct rfc822_header *first;
91         struct rfc822_header **lastptr;
92         struct list_node bucket;
93 };
94
95 struct rfc822_msg *rfc822_check(const struct rfc822_msg *msg,
96                                 const char *abortstr)
97 {
98         assert(msg);
99         if (!list_check(&msg->headers, abortstr))
100                 return NULL;
101         if (!tal_check(msg, abortstr))
102                 return NULL;
103         return (struct rfc822_msg *)msg;
104 }
105
106 #ifdef CCAN_RFC822_DEBUG
107 #define CHECK(msg, str) do { rfc822_check((msg), (str)); } while (0)
108 #else
109 #define CHECK(msg, str) do { } while (0)
110 #endif
111
112 struct rfc822_msg *rfc822_start(const void *ctx, const char *p, size_t len)
113 {
114         struct rfc822_msg *msg;
115         int i;
116
117         msg = tal(ctx, struct rfc822_msg);
118         ALLOC_CHECK(msg, NULL);
119
120         msg->data = p;
121         msg->end = p + len;
122
123         msg->remainder = msg->data;
124         msg->body = NULL;
125
126         list_head_init(&msg->headers);
127
128         for (i = 0; i < INDEX_HASH_SIZE; i++)
129                 list_head_init(&msg->header_index[i]);
130
131         CHECK(msg, "<rfc22_start");
132
133         return msg;
134 }
135
136 void rfc822_free(struct rfc822_msg *msg)
137 {
138         CHECK(msg, ">rfc822_free");
139         tal_free(msg);
140 }
141
142 static struct rfc822_header *next_header_cached(struct rfc822_msg *msg,
143                                                 struct rfc822_header *hdr)
144 {
145         struct list_node *h = &msg->headers.n;
146         const struct list_node *n = h;
147
148         CHECK(msg, ">next_header_cached");
149
150         if (hdr)
151                 n = &hdr->list;
152
153         if (n->next == h)
154                 return NULL;
155
156         CHECK(msg, "<next_header_cached");
157
158         return list_entry(n->next, struct rfc822_header, list);
159 }
160
161 static const char *next_line(const char *start, const char *end)
162 {
163         const char *p = memchr(start, '\n', end - start);
164
165         return p ? (p + 1) : end;
166 }
167
168 static struct rfc822_header *index_header(struct rfc822_msg *msg,
169                                           struct rfc822_header *hdr);
170
171 static struct rfc822_header *next_header_parse(struct rfc822_msg *msg)
172 {
173         const char *h, *eh, *ev, *colon;
174         struct rfc822_header *hi;
175
176         CHECK(msg, ">next_header_parse");
177
178         if (!msg->remainder)
179                 return NULL;
180
181         if (msg->body && (msg->remainder >= msg->body))
182                 return NULL;
183
184         h = msg->remainder;
185         eh = next_line(h, msg->end);
186
187         ev = eh;
188         if ((ev > h) && (ev[-1] == '\n'))
189                 ev--;
190         if ((ev > h) && (ev[-1] == '\r'))
191                 ev--;
192         if (ev == h) {
193                 /* Found the end of the headers */
194
195                 assert(!msg->body || (msg->body == eh));
196
197                 if (eh < msg->end)
198                         msg->body = eh;
199                 return NULL;
200         }
201
202         while ((eh < msg->end) && rfc822_iswsp(*eh))
203                 eh = next_line(eh, msg->end);
204
205         if (eh >= msg->end)
206                 msg->remainder = NULL;
207         else
208                 msg->remainder = eh;
209
210
211         hi = talz(msg, struct rfc822_header);
212         ALLOC_CHECK(hi, NULL);
213
214         hi->all = bytestring(h, eh - h);
215         list_add_tail(&msg->headers, &hi->list);
216
217         colon = memchr(h, ':', hi->all.len);
218         if (colon) {
219                 hi->rawname = bytestring(h, colon - h);
220                 hi->rawvalue = bytestring(colon + 1, eh - colon - 1);
221         } else {
222                 hi->rawname = bytestring_NULL;
223                 hi->rawvalue = bytestring_NULL;
224         }
225
226         CHECK(msg, "<next_header_parse");
227
228         return index_header(msg, hi);
229 }
230
231 struct rfc822_header *rfc822_next_header(struct rfc822_msg *msg,
232                                          struct rfc822_header *hdr)
233 {
234         struct rfc822_header *h;
235
236         CHECK(msg, ">rfc822_next_header");
237
238         h = next_header_cached(msg, hdr);
239         if (h)
240                 return h;
241
242         return next_header_parse(msg);
243 }
244
245 struct bytestring rfc822_body(struct rfc822_msg *msg)
246 {
247         CHECK(msg, ">rfc822_body");
248
249         if (!msg->body && msg->remainder) {
250                 const char *p, *q;
251
252                 p = memmem(msg->remainder, msg->end - msg->remainder,
253                            "\n\r\n", 3);
254                 q = memmem(msg->remainder, msg->end - msg->remainder,
255                            "\n\n", 2);
256
257                 if (p && (!q || (p < q)))
258                         msg->body = p + 3;
259                 else if (q && (!p || (q < p)))
260                         msg->body = q + 2;
261
262                 if (msg->body >= msg->end) {
263                         assert(msg->body == msg->end);
264                         msg->body = NULL;
265                 }
266         }
267
268         CHECK(msg, "<rfc822_body");
269
270         if (msg->body)
271                 return bytestring(msg->body, msg->end - msg->body);
272         else
273                 return bytestring_NULL;
274 }
275
276 enum rfc822_header_errors rfc822_header_errors(struct rfc822_msg *msg,
277                                                struct rfc822_header *hdr)
278 {
279         enum rfc822_header_errors err = 0;
280         int i;
281
282         if (!hdr->rawname.ptr) {
283                 err |= RFC822_HDR_NO_COLON;
284         } else {
285                 for (i = 0; i < hdr->rawname.len; i++) {
286                         char c = hdr->rawname.ptr[i];
287
288                         assert(c != ':');
289
290                         if ((c < 33) || (c > 126)) {
291                                 err |= RFC822_HDR_BAD_NAME_CHARS;
292                                 break;
293                         }
294                 }
295         }
296         return err;
297 }
298
299 struct bytestring rfc822_header_raw_content(struct rfc822_msg *msg,
300                                             struct rfc822_header *hdr)
301 {
302         return hdr->all;
303 }
304
305 struct bytestring rfc822_header_raw_name(struct rfc822_msg *msg,
306                                          struct rfc822_header *hdr)
307 {
308         return hdr->rawname;
309 }
310
311 struct bytestring rfc822_header_raw_value(struct rfc822_msg *msg,
312                                           struct rfc822_header *hdr)
313 {
314         return hdr->rawvalue;
315 }
316
317 static void get_line(struct bytestring in, struct bytestring *first,
318                      struct bytestring *rest)
319 {
320         size_t rawlen, trimlen;
321         const char *inp = in.ptr;
322         const char *nl;
323
324         nl = memchr(inp, '\n', in.len);
325         if (!nl)
326                 rawlen = in.len;
327         else
328                 rawlen = nl - inp + 1;
329
330         trimlen = rawlen;
331         if ((trimlen > 0) && (inp[trimlen-1] == '\n')) {
332                 trimlen--;
333                 if ((trimlen > 0) && (inp[trimlen-1] == '\r'))
334                         trimlen--;
335         }
336
337         *first = bytestring(in.ptr, trimlen);
338
339         if (rawlen < in.len)
340                 *rest = bytestring(in.ptr + rawlen, in.len - rawlen);
341         else
342                 *rest = bytestring_NULL;
343 }
344
345
346 struct bytestring rfc822_header_unfolded_value(struct rfc822_msg *msg,
347                                                struct rfc822_header *hdr)
348 {
349         struct bytestring raw = rfc822_header_raw_value(msg, hdr);
350         struct bytestring next, rest;
351         int lines = 0;
352         size_t len = 0;
353
354         if (!hdr->unfolded.ptr) {
355                 rest = raw;
356                 while (rest.ptr) {
357                         get_line(rest, &next, &rest);
358                         lines++;
359                         len += next.len;
360                 }
361
362                 if (lines <= 1) {
363                         hdr->unfolded = bytestring(raw.ptr, len);
364                 } else {
365                         char *unfold = tal_arr(msg, char, len);
366                         char *p = unfold;
367
368                         ALLOC_CHECK(unfold, bytestring_NULL);
369
370                         rest = raw;
371                         while (rest.ptr) {
372                                 get_line(rest, &next, &rest);
373                                 memcpy(p, next.ptr, next.len);
374                                 p += next.len;
375                         }
376
377                         assert(p == (unfold + len));
378                         hdr->unfolded = bytestring(unfold, len);
379                 }
380         }
381
382         return hdr->unfolded;
383 }
384
385 /* Specifically locale *un*aware tolower() - headers should be ascii
386  * only, and if they're not best to leave them as is */
387 static char xtolower(char c)
388 {
389         if ((c >= 'A') && (c <= 'Z'))
390                 return 'a' + (c - 'A');
391         else
392                 return c;
393 }
394
395 static bool hdr_name_eq(struct bytestring a, struct bytestring b)
396 {
397         int i;
398
399         if (a.len != b.len)
400                 return false;
401
402         for (i = 0; i < a.len; i++)
403                 if (xtolower(a.ptr[i]) != xtolower(b.ptr[i]))
404                         return false;
405
406         return true;
407 }
408
409 bool rfc822_header_is(struct rfc822_msg *msg, struct rfc822_header *hdr,
410                       const char *name)
411 {
412         struct bytestring hname = rfc822_header_raw_name(msg, hdr);
413
414         if (!hname.ptr || !name)
415                 return false;
416
417         return hdr_name_eq(hname, bytestring_from_string(name));
418 }
419
420 static unsigned headerhash(struct bytestring name)
421 {
422         /*
423          * This is stolen from hash_string() in ccan/hash, but adapted
424          * to add the xtolower() call and use a bytestring
425          */
426         unsigned ret = 0;
427         size_t i;
428
429         for (i = 0; i < name.len; i++)
430                 ret = (ret << 5) - ret + xtolower(name.ptr[i]);
431
432         return ret % INDEX_HASH_SIZE;
433 }
434
435 static struct rfc822_headers_of_name *headers_of_name(struct rfc822_msg *msg,
436                                                       struct bytestring name)
437 {
438         unsigned hash = headerhash(name);
439         struct rfc822_headers_of_name *hn;
440
441         list_for_each(&msg->header_index[hash], hn, bucket) {
442                 if (hdr_name_eq(hn->name, name))
443                         return hn;
444         }
445
446         return NULL;
447 }
448
449 static struct rfc822_header *index_header(struct rfc822_msg *msg,
450                                           struct rfc822_header *hdr)
451 {
452         struct bytestring hname = rfc822_header_raw_name(msg, hdr);
453         struct rfc822_headers_of_name *hn = headers_of_name(msg, hname);
454
455         if (!hn) {
456                 unsigned hash = headerhash(hname);
457
458                 hn = talz(msg, struct rfc822_headers_of_name);
459                 ALLOC_CHECK(hn, NULL);
460
461                 hn->name = hname;
462                 hn->first = NULL;
463                 hn->lastptr = &hn->first;
464                 list_add_tail(&msg->header_index[hash], &hn->bucket);
465         }
466
467         hdr->name_next = NULL;
468         *(hn->lastptr) = hdr;
469         hn->lastptr = &hdr->name_next;
470         return hdr;
471 }
472
473 struct rfc822_header *rfc822_first_header_of_name(struct rfc822_msg *msg,
474                                                   const char *name)
475 {
476         struct bytestring namebs = bytestring_from_string(name);
477         struct rfc822_headers_of_name *hn = headers_of_name(msg, namebs);
478         struct rfc822_header *hdr;
479
480         if (hn)
481                 return hn->first;
482
483         do {
484                 hdr = next_header_parse(msg);
485                 if (hdr && rfc822_header_is(msg, hdr, name))
486                         return hdr;
487         } while (hdr);
488
489         return NULL;
490 }
491
492 struct rfc822_header *rfc822_next_header_of_name(struct rfc822_msg *msg,
493                                                  struct rfc822_header *hdr,
494                                                  const char *name)
495 {
496         if (!hdr)
497                 return rfc822_first_header_of_name(msg, name);
498
499         if (hdr->name_next) {
500                 assert(rfc822_header_is(msg, hdr->name_next, name));
501                 return hdr->name_next;
502         }
503
504         do {
505                 hdr = next_header_parse(msg);
506                 if (hdr && rfc822_header_is(msg, hdr, name))
507                         return hdr;
508         } while (hdr);
509
510         return NULL;
511 }