X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Frfc822%2Frfc822.c;h=7f9442a8d04d235908abfd8c15e32520d97acd0f;hp=3bd3dd747ae28f532b4eef592297cc0e9b689d4f;hb=127c7534fc4aaa9499c552fdd3e4e9e56c16ca40;hpb=de28abde1337e9255812ba720ffeef2c83a25dde diff --git a/ccan/rfc822/rfc822.c b/ccan/rfc822/rfc822.c index 3bd3dd74..7f9442a8 100644 --- a/ccan/rfc822/rfc822.c +++ b/ccan/rfc822/rfc822.c @@ -57,10 +57,18 @@ void rfc822_set_allocation_failure_handler(void (*h)(const char *)) } \ } while (0) +/* + * No real point doing fancy resizing hashes, when any given mail + * message is unlikely to have more than a fairly small number of + * distinct header types. This should be ample. + */ +#define INDEX_HASH_SIZE 63 + struct rfc822_msg { const char *data, *end; const char *remainder; struct list_head headers; + struct list_head header_index[INDEX_HASH_SIZE]; const char *body; }; @@ -68,6 +76,14 @@ struct rfc822_header { struct bytestring all, rawname, rawvalue; struct bytestring unfolded; struct list_node list; + struct rfc822_header *name_next; +}; + +struct rfc822_headers_of_name { + struct bytestring name; + struct rfc822_header *first; + struct rfc822_header **lastptr; + struct list_node bucket; }; struct rfc822_msg *rfc822_check(const struct rfc822_msg *msg, @@ -88,6 +104,7 @@ struct rfc822_msg *rfc822_check(const struct rfc822_msg *msg, struct rfc822_msg *rfc822_start(const void *ctx, const char *p, size_t len) { struct rfc822_msg *msg; + int i; msg = talloc(ctx, struct rfc822_msg); ALLOC_CHECK(msg, NULL); @@ -100,6 +117,9 @@ struct rfc822_msg *rfc822_start(const void *ctx, const char *p, size_t len) list_head_init(&msg->headers); + for (i = 0; i < INDEX_HASH_SIZE; i++) + list_head_init(&msg->header_index[i]); + CHECK(msg, "body && (msg->remainder >= msg->body)) return NULL; - eh = h = msg->remainder; - do { - eh = next_line(eh, msg->end); - } while ((eh < msg->end) && rfc822_iswsp(*eh)); - - if (eh >= msg->end) - msg->remainder = NULL; - else - msg->remainder = eh; + h = msg->remainder; + eh = next_line(h, msg->end); ev = eh; if ((ev > h) && (ev[-1] == '\n')) ev--; if ((ev > h) && (ev[-1] == '\r')) ev--; - if (ev == h) { /* Found the end of the headers */ + + assert(!msg->body || (msg->body == eh)); + if (eh < msg->end) msg->body = eh; return NULL; } + while ((eh < msg->end) && rfc822_iswsp(*eh)) + eh = next_line(eh, msg->end); + + if (eh >= msg->end) + msg->remainder = NULL; + else + msg->remainder = eh; + + hi = talloc_zero(msg, struct rfc822_header); ALLOC_CHECK(hi, NULL); @@ -190,7 +217,7 @@ static struct rfc822_header *next_header_parse(struct rfc822_msg *msg) CHECK(msg, " 126)) { - err |= RFC822_HDR_BAD_NAME; + err |= RFC822_HDR_BAD_NAME_CHARS; break; } } @@ -346,3 +373,131 @@ struct bytestring rfc822_header_unfolded_value(struct rfc822_msg *msg, return hdr->unfolded; } + +/* Specifically locale *un*aware tolower() - headers should be ascii + * only, and if they're not best to leave them as is */ +static char xtolower(char c) +{ + if ((c >= 'A') && (c <= 'Z')) + return 'a' + (c - 'A'); + else + return c; +} + +static bool hdr_name_eq(struct bytestring a, struct bytestring b) +{ + int i; + + if (a.len != b.len) + return false; + + for (i = 0; i < a.len; i++) + if (xtolower(a.ptr[i]) != xtolower(b.ptr[i])) + return false; + + return true; +} + +bool rfc822_header_is(struct rfc822_msg *msg, struct rfc822_header *hdr, + const char *name) +{ + struct bytestring hname = rfc822_header_raw_name(msg, hdr); + + if (!hname.ptr || !name) + return false; + + return hdr_name_eq(hname, bytestring_from_string(name)); +} + +static unsigned headerhash(struct bytestring name) +{ + /* + * This is stolen from hash_string() in ccan/hash, but adapted + * to add the xtolower() call and use a bytestring + */ + unsigned ret = 0; + size_t i; + + for (i = 0; i < name.len; i++) + ret = (ret << 5) - ret + xtolower(name.ptr[i]); + + return ret % INDEX_HASH_SIZE; +} + +static struct rfc822_headers_of_name *headers_of_name(struct rfc822_msg *msg, + struct bytestring name) +{ + unsigned hash = headerhash(name); + struct rfc822_headers_of_name *hn; + + list_for_each(&msg->header_index[hash], hn, bucket) { + if (hdr_name_eq(hn->name, name)) + return hn; + } + + return NULL; +} + +static struct rfc822_header *index_header(struct rfc822_msg *msg, + struct rfc822_header *hdr) +{ + struct bytestring hname = rfc822_header_raw_name(msg, hdr); + struct rfc822_headers_of_name *hn = headers_of_name(msg, hname); + + if (!hn) { + unsigned hash = headerhash(hname); + + hn = talloc_zero(msg, struct rfc822_headers_of_name); + ALLOC_CHECK(hn, NULL); + + hn->name = hname; + hn->first = NULL; + hn->lastptr = &hn->first; + list_add_tail(&msg->header_index[hash], &hn->bucket); + } + + hdr->name_next = NULL; + *(hn->lastptr) = hdr; + hn->lastptr = &hdr->name_next; + return hdr; +} + +struct rfc822_header *rfc822_first_header_of_name(struct rfc822_msg *msg, + const char *name) +{ + struct bytestring namebs = bytestring_from_string(name); + struct rfc822_headers_of_name *hn = headers_of_name(msg, namebs); + struct rfc822_header *hdr; + + if (hn) + return hn->first; + + do { + hdr = next_header_parse(msg); + if (hdr && rfc822_header_is(msg, hdr, name)) + return hdr; + } while (hdr); + + return NULL; +} + +struct rfc822_header *rfc822_next_header_of_name(struct rfc822_msg *msg, + struct rfc822_header *hdr, + const char *name) +{ + if (!hdr) + return rfc822_first_header_of_name(msg, name); + + if (hdr->name_next) { + assert(rfc822_header_is(msg, hdr->name_next, name)); + return hdr->name_next; + } + + do { + hdr = next_header_parse(msg); + if (hdr && rfc822_header_is(msg, hdr, name)) + return hdr; + } while (hdr); + + return NULL; +}