]> git.ozlabs.org Git - ccan/blob - ccan/rfc822/rfc822.c
rfc822: Use the memmem module
[ccan] / ccan / rfc822 / rfc822.c
1 /* Licensed under LGPLv2.1+ - see LICENSE file for details */
2
3 #include "config.h"
4
5 #include <string.h>
6
7 #include <ccan/str/str.h>
8 #include <ccan/list/list.h>
9 #include <stdio.h>
10
11 #include <ccan/memmem/memmem.h>
12 #include <ccan/rfc822/rfc822.h>
13
14 #ifdef TAL_USE_TALLOC
15 #include <ccan/tal/talloc/talloc.h>
16 #else
17 #include <ccan/tal/tal.h>
18 #endif
19
20 static void (*allocation_failure_hook)(const char *);
21
22 static void NORETURN default_allocation_failure(const char *s)
23 {
24         fprintf(stderr, "ccan/rfc822: Allocation failure: %s", s);
25         abort();
26 }
27
28 static void allocation_failure(const char *s)
29 {
30         if (allocation_failure_hook)
31                 (*allocation_failure_hook)(s);
32         else
33                 default_allocation_failure(s);
34 }
35
36 void rfc822_set_allocation_failure_handler(void (*h)(const char *))
37 {
38         allocation_failure_hook = h;
39 }
40
41 #define ALLOC_CHECK(p, r) \
42         do { \
43                 if (!(p)) { \
44                         allocation_failure(__FILE__ ":" stringify(__LINE__)); \
45                         return (r); \
46                 } \
47         } while (0)
48
49 /*
50  * No real point doing fancy resizing hashes, when any given mail
51  * message is unlikely to have more than a fairly small number of
52  * distinct header types.  This should be ample.
53  */
54 #define INDEX_HASH_SIZE         63
55
56 struct rfc822_msg {
57         const char *data, *end;
58         const char *remainder;
59         struct list_head headers;
60         struct list_head header_index[INDEX_HASH_SIZE];
61         const char *body;
62 };
63
64 struct rfc822_header {
65         struct bytestring all, rawname, rawvalue;
66         struct bytestring unfolded;
67         struct list_node list;
68         struct rfc822_header *name_next;
69 };
70
71 struct rfc822_headers_of_name {
72         struct bytestring name;
73         struct rfc822_header *first;
74         struct rfc822_header **lastptr;
75         struct list_node bucket;
76 };
77
78 struct rfc822_msg *rfc822_check(const struct rfc822_msg *msg,
79                                 const char *abortstr)
80 {
81         assert(msg);
82         if (!list_check(&msg->headers, abortstr))
83                 return NULL;
84         if (!tal_check(msg, abortstr))
85                 return NULL;
86         return (struct rfc822_msg *)msg;
87 }
88
89 #ifdef CCAN_RFC822_DEBUG
90 #define CHECK(msg, str) do { rfc822_check((msg), (str)); } while (0)
91 #else
92 #define CHECK(msg, str) do { } while (0)
93 #endif
94
95 struct rfc822_msg *rfc822_start(const void *ctx, const char *p, size_t len)
96 {
97         struct rfc822_msg *msg;
98         int i;
99
100         msg = tal(ctx, struct rfc822_msg);
101         ALLOC_CHECK(msg, NULL);
102
103         msg->data = p;
104         msg->end = p + len;
105
106         msg->remainder = msg->data;
107         msg->body = NULL;
108
109         list_head_init(&msg->headers);
110
111         for (i = 0; i < INDEX_HASH_SIZE; i++)
112                 list_head_init(&msg->header_index[i]);
113
114         CHECK(msg, "<rfc22_start");
115
116         return msg;
117 }
118
119 void rfc822_free(struct rfc822_msg *msg)
120 {
121         CHECK(msg, ">rfc822_free");
122         tal_free(msg);
123 }
124
125 static struct rfc822_header *next_header_cached(struct rfc822_msg *msg,
126                                                 struct rfc822_header *hdr)
127 {
128         struct list_node *h = &msg->headers.n;
129         const struct list_node *n = h;
130
131         CHECK(msg, ">next_header_cached");
132
133         if (hdr)
134                 n = &hdr->list;
135
136         if (n->next == h)
137                 return NULL;
138
139         CHECK(msg, "<next_header_cached");
140
141         return list_entry(n->next, struct rfc822_header, list);
142 }
143
144 static const char *next_line(const char *start, const char *end)
145 {
146         const char *p = memchr(start, '\n', end - start);
147
148         return p ? (p + 1) : end;
149 }
150
151 static struct rfc822_header *index_header(struct rfc822_msg *msg,
152                                           struct rfc822_header *hdr);
153
154 static struct rfc822_header *next_header_parse(struct rfc822_msg *msg)
155 {
156         const char *h, *eh, *ev, *colon;
157         struct rfc822_header *hi;
158
159         CHECK(msg, ">next_header_parse");
160
161         if (!msg->remainder)
162                 return NULL;
163
164         if (msg->body && (msg->remainder >= msg->body))
165                 return NULL;
166
167         h = msg->remainder;
168         eh = next_line(h, msg->end);
169
170         ev = eh;
171         if ((ev > h) && (ev[-1] == '\n'))
172                 ev--;
173         if ((ev > h) && (ev[-1] == '\r'))
174                 ev--;
175         if (ev == h) {
176                 /* Found the end of the headers */
177
178                 assert(!msg->body || (msg->body == eh));
179
180                 if (eh < msg->end)
181                         msg->body = eh;
182                 return NULL;
183         }
184
185         while ((eh < msg->end) && rfc822_iswsp(*eh))
186                 eh = next_line(eh, msg->end);
187
188         if (eh >= msg->end)
189                 msg->remainder = NULL;
190         else
191                 msg->remainder = eh;
192
193
194         hi = talz(msg, struct rfc822_header);
195         ALLOC_CHECK(hi, NULL);
196
197         hi->all = bytestring(h, eh - h);
198         list_add_tail(&msg->headers, &hi->list);
199
200         colon = memchr(h, ':', hi->all.len);
201         if (colon) {
202                 hi->rawname = bytestring(h, colon - h);
203                 hi->rawvalue = bytestring(colon + 1, eh - colon - 1);
204         } else {
205                 hi->rawname = bytestring_NULL;
206                 hi->rawvalue = bytestring_NULL;
207         }
208
209         CHECK(msg, "<next_header_parse");
210
211         return index_header(msg, hi);
212 }
213
214 struct rfc822_header *rfc822_next_header(struct rfc822_msg *msg,
215                                          struct rfc822_header *hdr)
216 {
217         struct rfc822_header *h;
218
219         CHECK(msg, ">rfc822_next_header");
220
221         h = next_header_cached(msg, hdr);
222         if (h)
223                 return h;
224
225         return next_header_parse(msg);
226 }
227
228 struct bytestring rfc822_body(struct rfc822_msg *msg)
229 {
230         CHECK(msg, ">rfc822_body");
231
232         if (!msg->body && msg->remainder) {
233                 const char *p, *q;
234
235                 p = memmem(msg->remainder, msg->end - msg->remainder,
236                            "\n\r\n", 3);
237                 q = memmem(msg->remainder, msg->end - msg->remainder,
238                            "\n\n", 2);
239
240                 if (p && (!q || (p < q)))
241                         msg->body = p + 3;
242                 else if (q && (!p || (q < p)))
243                         msg->body = q + 2;
244
245                 if (msg->body >= msg->end) {
246                         assert(msg->body == msg->end);
247                         msg->body = NULL;
248                 }
249         }
250
251         CHECK(msg, "<rfc822_body");
252
253         if (msg->body)
254                 return bytestring(msg->body, msg->end - msg->body);
255         else
256                 return bytestring_NULL;
257 }
258
259 enum rfc822_header_errors rfc822_header_errors(struct rfc822_msg *msg,
260                                                struct rfc822_header *hdr)
261 {
262         enum rfc822_header_errors err = 0;
263         int i;
264
265         if (!hdr->rawname.ptr) {
266                 err |= RFC822_HDR_NO_COLON;
267         } else {
268                 for (i = 0; i < hdr->rawname.len; i++) {
269                         char c = hdr->rawname.ptr[i];
270
271                         assert(c != ':');
272
273                         if ((c < 33) || (c > 126)) {
274                                 err |= RFC822_HDR_BAD_NAME_CHARS;
275                                 break;
276                         }
277                 }
278         }
279         return err;
280 }
281
282 struct bytestring rfc822_header_raw_content(struct rfc822_msg *msg,
283                                             struct rfc822_header *hdr)
284 {
285         return hdr->all;
286 }
287
288 struct bytestring rfc822_header_raw_name(struct rfc822_msg *msg,
289                                          struct rfc822_header *hdr)
290 {
291         return hdr->rawname;
292 }
293
294 struct bytestring rfc822_header_raw_value(struct rfc822_msg *msg,
295                                           struct rfc822_header *hdr)
296 {
297         return hdr->rawvalue;
298 }
299
300 static void get_line(struct bytestring in, struct bytestring *first,
301                      struct bytestring *rest)
302 {
303         size_t rawlen, trimlen;
304         const char *inp = in.ptr;
305         const char *nl;
306
307         nl = memchr(inp, '\n', in.len);
308         if (!nl)
309                 rawlen = in.len;
310         else
311                 rawlen = nl - inp + 1;
312
313         trimlen = rawlen;
314         if ((trimlen > 0) && (inp[trimlen-1] == '\n')) {
315                 trimlen--;
316                 if ((trimlen > 0) && (inp[trimlen-1] == '\r'))
317                         trimlen--;
318         }
319
320         *first = bytestring(in.ptr, trimlen);
321
322         if (rawlen < in.len)
323                 *rest = bytestring(in.ptr + rawlen, in.len - rawlen);
324         else
325                 *rest = bytestring_NULL;
326 }
327
328
329 struct bytestring rfc822_header_unfolded_value(struct rfc822_msg *msg,
330                                                struct rfc822_header *hdr)
331 {
332         struct bytestring raw = rfc822_header_raw_value(msg, hdr);
333         struct bytestring next, rest;
334         int lines = 0;
335         size_t len = 0;
336
337         if (!hdr->unfolded.ptr) {
338                 rest = raw;
339                 while (rest.ptr) {
340                         get_line(rest, &next, &rest);
341                         lines++;
342                         len += next.len;
343                 }
344
345                 if (lines <= 1) {
346                         hdr->unfolded = bytestring(raw.ptr, len);
347                 } else {
348                         char *unfold = tal_arr(msg, char, len);
349                         char *p = unfold;
350
351                         ALLOC_CHECK(unfold, bytestring_NULL);
352
353                         rest = raw;
354                         while (rest.ptr) {
355                                 get_line(rest, &next, &rest);
356                                 memcpy(p, next.ptr, next.len);
357                                 p += next.len;
358                         }
359
360                         assert(p == (unfold + len));
361                         hdr->unfolded = bytestring(unfold, len);
362                 }
363         }
364
365         return hdr->unfolded;
366 }
367
368 /* Specifically locale *un*aware tolower() - headers should be ascii
369  * only, and if they're not best to leave them as is */
370 static char xtolower(char c)
371 {
372         if ((c >= 'A') && (c <= 'Z'))
373                 return 'a' + (c - 'A');
374         else
375                 return c;
376 }
377
378 static bool hdr_name_eq(struct bytestring a, struct bytestring b)
379 {
380         int i;
381
382         if (a.len != b.len)
383                 return false;
384
385         for (i = 0; i < a.len; i++)
386                 if (xtolower(a.ptr[i]) != xtolower(b.ptr[i]))
387                         return false;
388
389         return true;
390 }
391
392 bool rfc822_header_is(struct rfc822_msg *msg, struct rfc822_header *hdr,
393                       const char *name)
394 {
395         struct bytestring hname = rfc822_header_raw_name(msg, hdr);
396
397         if (!hname.ptr || !name)
398                 return false;
399
400         return hdr_name_eq(hname, bytestring_from_string(name));
401 }
402
403 static unsigned headerhash(struct bytestring name)
404 {
405         /*
406          * This is stolen from hash_string() in ccan/hash, but adapted
407          * to add the xtolower() call and use a bytestring
408          */
409         unsigned ret = 0;
410         size_t i;
411
412         for (i = 0; i < name.len; i++)
413                 ret = (ret << 5) - ret + xtolower(name.ptr[i]);
414
415         return ret % INDEX_HASH_SIZE;
416 }
417
418 static struct rfc822_headers_of_name *headers_of_name(struct rfc822_msg *msg,
419                                                       struct bytestring name)
420 {
421         unsigned hash = headerhash(name);
422         struct rfc822_headers_of_name *hn;
423
424         list_for_each(&msg->header_index[hash], hn, bucket) {
425                 if (hdr_name_eq(hn->name, name))
426                         return hn;
427         }
428
429         return NULL;
430 }
431
432 static struct rfc822_header *index_header(struct rfc822_msg *msg,
433                                           struct rfc822_header *hdr)
434 {
435         struct bytestring hname = rfc822_header_raw_name(msg, hdr);
436         struct rfc822_headers_of_name *hn = headers_of_name(msg, hname);
437
438         if (!hn) {
439                 unsigned hash = headerhash(hname);
440
441                 hn = talz(msg, struct rfc822_headers_of_name);
442                 ALLOC_CHECK(hn, NULL);
443
444                 hn->name = hname;
445                 hn->first = NULL;
446                 hn->lastptr = &hn->first;
447                 list_add_tail(&msg->header_index[hash], &hn->bucket);
448         }
449
450         hdr->name_next = NULL;
451         *(hn->lastptr) = hdr;
452         hn->lastptr = &hdr->name_next;
453         return hdr;
454 }
455
456 struct rfc822_header *rfc822_first_header_of_name(struct rfc822_msg *msg,
457                                                   const char *name)
458 {
459         struct bytestring namebs = bytestring_from_string(name);
460         struct rfc822_headers_of_name *hn = headers_of_name(msg, namebs);
461         struct rfc822_header *hdr;
462
463         if (hn)
464                 return hn->first;
465
466         do {
467                 hdr = next_header_parse(msg);
468                 if (hdr && rfc822_header_is(msg, hdr, name))
469                         return hdr;
470         } while (hdr);
471
472         return NULL;
473 }
474
475 struct rfc822_header *rfc822_next_header_of_name(struct rfc822_msg *msg,
476                                                  struct rfc822_header *hdr,
477                                                  const char *name)
478 {
479         if (!hdr)
480                 return rfc822_first_header_of_name(msg, name);
481
482         if (hdr->name_next) {
483                 assert(rfc822_header_is(msg, hdr->name_next, name));
484                 return hdr->name_next;
485         }
486
487         do {
488                 hdr = next_header_parse(msg);
489                 if (hdr && rfc822_header_is(msg, hdr, name))
490                         return hdr;
491         } while (hdr);
492
493         return NULL;
494 }