]> git.ozlabs.org Git - ccan/blobdiff - ccan/rfc822/rfc822.c
rfc822: Index headers by name
[ccan] / ccan / rfc822 / rfc822.c
index 6a0f95fc5f31ed7d9246c21719328b9641823774..7f9442a8d04d235908abfd8c15e32520d97acd0f 100644 (file)
@@ -57,10 +57,18 @@ void rfc822_set_allocation_failure_handler(void (*h)(const char *))
                } \
        } while (0)
 
+/*
+ * No real point doing fancy resizing hashes, when any given mail
+ * message is unlikely to have more than a fairly small number of
+ * distinct header types.  This should be ample.
+ */
+#define INDEX_HASH_SIZE                63
+
 struct rfc822_msg {
        const char *data, *end;
        const char *remainder;
        struct list_head headers;
+       struct list_head header_index[INDEX_HASH_SIZE];
        const char *body;
 };
 
@@ -68,6 +76,14 @@ struct rfc822_header {
        struct bytestring all, rawname, rawvalue;
        struct bytestring unfolded;
        struct list_node list;
+       struct rfc822_header *name_next;
+};
+
+struct rfc822_headers_of_name {
+       struct bytestring name;
+       struct rfc822_header *first;
+       struct rfc822_header **lastptr;
+       struct list_node bucket;
 };
 
 struct rfc822_msg *rfc822_check(const struct rfc822_msg *msg,
@@ -88,6 +104,7 @@ struct rfc822_msg *rfc822_check(const struct rfc822_msg *msg,
 struct rfc822_msg *rfc822_start(const void *ctx, const char *p, size_t len)
 {
        struct rfc822_msg *msg;
+       int i;
 
        msg = talloc(ctx, struct rfc822_msg);
        ALLOC_CHECK(msg, NULL);
@@ -100,6 +117,9 @@ struct rfc822_msg *rfc822_start(const void *ctx, const char *p, size_t len)
 
        list_head_init(&msg->headers);
 
+       for (i = 0; i < INDEX_HASH_SIZE; i++)
+               list_head_init(&msg->header_index[i]);
+
        CHECK(msg, "<rfc22_start");
 
        return msg;
@@ -137,6 +157,9 @@ static const char *next_line(const char *start, const char *end)
        return p ? (p + 1) : end;
 }
 
+static struct rfc822_header *index_header(struct rfc822_msg *msg,
+                                         struct rfc822_header *hdr);
+
 static struct rfc822_header *next_header_parse(struct rfc822_msg *msg)
 {
        const char *h, *eh, *ev, *colon;
@@ -194,7 +217,7 @@ static struct rfc822_header *next_header_parse(struct rfc822_msg *msg)
 
        CHECK(msg, "<next_header_parse");
 
-       return hi;
+       return index_header(msg, hi);
 }
 
 struct rfc822_header *rfc822_next_header(struct rfc822_msg *msg,
@@ -350,3 +373,131 @@ struct bytestring rfc822_header_unfolded_value(struct rfc822_msg *msg,
 
        return hdr->unfolded;
 }
+
+/* Specifically locale *un*aware tolower() - headers should be ascii
+ * only, and if they're not best to leave them as is */
+static char xtolower(char c)
+{
+       if ((c >= 'A') && (c <= 'Z'))
+               return 'a' + (c - 'A');
+       else
+               return c;
+}
+
+static bool hdr_name_eq(struct bytestring a, struct bytestring b)
+{
+       int i;
+
+       if (a.len != b.len)
+               return false;
+
+       for (i = 0; i < a.len; i++)
+               if (xtolower(a.ptr[i]) != xtolower(b.ptr[i]))
+                       return false;
+
+       return true;
+}
+
+bool rfc822_header_is(struct rfc822_msg *msg, struct rfc822_header *hdr,
+                     const char *name)
+{
+       struct bytestring hname = rfc822_header_raw_name(msg, hdr);
+
+       if (!hname.ptr || !name)
+               return false;
+
+       return hdr_name_eq(hname, bytestring_from_string(name));
+}
+
+static unsigned headerhash(struct bytestring name)
+{
+       /*
+        * This is stolen from hash_string() in ccan/hash, but adapted
+        * to add the xtolower() call and use a bytestring
+        */
+       unsigned ret = 0;
+       size_t i;
+
+       for (i = 0; i < name.len; i++)
+               ret = (ret << 5) - ret + xtolower(name.ptr[i]);
+
+       return ret % INDEX_HASH_SIZE;
+}
+
+static struct rfc822_headers_of_name *headers_of_name(struct rfc822_msg *msg,
+                                                     struct bytestring name)
+{
+       unsigned hash = headerhash(name);
+       struct rfc822_headers_of_name *hn;
+
+       list_for_each(&msg->header_index[hash], hn, bucket) {
+               if (hdr_name_eq(hn->name, name))
+                       return hn;
+       }
+
+       return NULL;
+}
+
+static struct rfc822_header *index_header(struct rfc822_msg *msg,
+                                         struct rfc822_header *hdr)
+{
+       struct bytestring hname = rfc822_header_raw_name(msg, hdr);
+       struct rfc822_headers_of_name *hn = headers_of_name(msg, hname);
+
+       if (!hn) {
+               unsigned hash = headerhash(hname);
+
+               hn = talloc_zero(msg, struct rfc822_headers_of_name);
+               ALLOC_CHECK(hn, NULL);
+
+               hn->name = hname;
+               hn->first = NULL;
+               hn->lastptr = &hn->first;
+               list_add_tail(&msg->header_index[hash], &hn->bucket);
+       }
+
+       hdr->name_next = NULL;
+       *(hn->lastptr) = hdr;
+       hn->lastptr = &hdr->name_next;
+       return hdr;
+}
+
+struct rfc822_header *rfc822_first_header_of_name(struct rfc822_msg *msg,
+                                                 const char *name)
+{
+       struct bytestring namebs = bytestring_from_string(name);
+       struct rfc822_headers_of_name *hn = headers_of_name(msg, namebs);
+       struct rfc822_header *hdr;
+
+       if (hn)
+               return hn->first;
+
+       do {
+               hdr = next_header_parse(msg);
+               if (hdr && rfc822_header_is(msg, hdr, name))
+                       return hdr;
+       } while (hdr);
+
+       return NULL;
+}
+
+struct rfc822_header *rfc822_next_header_of_name(struct rfc822_msg *msg,
+                                                struct rfc822_header *hdr,
+                                                const char *name)
+{
+       if (!hdr)
+               return rfc822_first_header_of_name(msg, name);
+
+       if (hdr->name_next) {
+               assert(rfc822_header_is(msg, hdr->name_next, name));
+               return hdr->name_next;
+       }
+
+       do {
+               hdr = next_header_parse(msg);
+               if (hdr && rfc822_header_is(msg, hdr, name))
+                       return hdr;
+       } while (hdr);
+
+       return NULL;
+}