parser: allow short-form List-Id headers
authorJeremy Kerr <jk@ozlabs.org>
Sat, 1 Aug 2009 06:05:37 +0000 (16:05 +1000)
committerJeremy Kerr <jk@ozlabs.org>
Sat, 1 Aug 2009 06:08:05 +0000 (16:08 +1000)
Some lists (eg, nongnu.org) have short-form List-Id headers. For
example:

List-Id: qemu-devel.nongnu.org

Although RFC 2919 prescribes that:

 list-id-header = "List-ID:" [phrase] "<" list-id ">" CRLF

we should allow for these headers anyway.

Based on an original patch from "J.H." <warthog9@kernel.org>.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
apps/patchwork/bin/parsemail.py
apps/patchwork/tests/patchparser.py

index 7f6727fc79c9d7e7efaf36f303e8f3c2ee188a4d..68bd94cd37174a4f25851f2a2596cea2e56f34d9 100755 (executable)
@@ -57,11 +57,17 @@ def clean_header(header):
 
 def find_project(mail):
     project = None
-    listid_re = re.compile('.*<([^>]+)>.*', re.S)
+    listid_res = [re.compile('.*<([^>]+)>.*', re.S),
+                  re.compile('^([\S]+)$', re.S)]
 
     for header in list_id_headers:
         if header in mail:
-            match = listid_re.match(mail.get(header))
+
+            for listid_re in listid_res:
+                match = listid_re.match(mail.get(header))
+                if match:
+                    break
+
             if not match:
                 continue
 
index 0fad67b2eecf7c6875a30cd7e6d3948fe4135bb3..f6909ce9e458608300f060b09a3c9d316267ebb3 100644 (file)
@@ -34,7 +34,8 @@ class PatchTest(unittest.TestCase):
     default_subject = defaults.subject
     project = defaults.project
 
-from patchwork.bin.parsemail import find_content, find_author, parse_mail
+from patchwork.bin.parsemail import find_content, find_author, find_project, \
+                                    parse_mail
 
 class InlinePatchTest(PatchTest):
     patch_filename = '0001-add-line.patch'
@@ -275,3 +276,49 @@ class MultipleProjectPatchCommentTest(MultipleProjectPatchTest):
             # and the one we parsed in setUp()
             self.assertEquals(Comment.objects.filter(patch = patch).count(), 2)
 
+class ListIdHeaderTest(unittest.TestCase):
+    """ Test that we parse List-Id headers from mails correctly """
+    def setUp(self):
+        self.project = Project(linkname = 'test-project-1', name = 'Project 1',
+                listid = '1.example.com', listemail='1@example.com')
+        self.project.save()
+
+    def testNoListId(self):
+        email = MIMEText('')
+        project = find_project(email)
+        self.assertEquals(project, None)
+
+    def testBlankListId(self):
+        email = MIMEText('')
+        email['List-Id'] = ''
+        project = find_project(email)
+        self.assertEquals(project, None)
+
+    def testWhitespaceListId(self):
+        email = MIMEText('')
+        email['List-Id'] = ' '
+        project = find_project(email)
+        self.assertEquals(project, None)
+
+    def testSubstringListId(self):
+        email = MIMEText('')
+        email['List-Id'] = 'example.com'
+        project = find_project(email)
+        self.assertEquals(project, None)
+
+    def testShortListId(self):
+        """ Some mailing lists have List-Id headers in short formats, where it
+            is only the list ID itself (without enclosing angle-brackets). """
+        email = MIMEText('')
+        email['List-Id'] = self.project.listid
+        project = find_project(email)
+        self.assertEquals(project, self.project)
+
+    def testLongListId(self):
+        email = MIMEText('')
+        email['List-Id'] = 'Test text <%s>' % self.project.listid
+        project = find_project(email)
+        self.assertEquals(project, self.project)
+
+    def tearDown(self):
+        self.project.delete()