]> git.ozlabs.org Git - patchwork/commitdiff
[parser] Decode From: headers
authorJeremy Kerr <jk@ozlabs.org>
Sat, 20 Sep 2008 02:09:10 +0000 (12:09 +1000)
committerJeremy Kerr <jk@ozlabs.org>
Sat, 20 Sep 2008 02:09:10 +0000 (12:09 +1000)
We're getting a few utf-8 encoded From: addresses, so decode before
saving to the DB.

Also, add tests.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
apps/patchwork/bin/parsemail.py
apps/patchwork/tests/patchparser.py

index b0f1497656ab8ac4170c93f34975454b4f9b0fad..07554bcb3cbffeaada7ddb3741667379ca5db295 100755 (executable)
@@ -26,11 +26,11 @@ import time
 import operator
 from email import message_from_file
 try:
-    from email.header import Header
+    from email.header import Header, decode_header
     from email.utils import parsedate_tz, mktime_tz
 except ImportError:
     # Python 2.4 compatibility
-    from email.Header import Header
+    from email.Header import Header, decode_header
     from email.Utils import parsedate_tz, mktime_tz
 
 from patchwork.parser import parse_patch
@@ -38,6 +38,17 @@ from patchwork.models import Patch, Project, Person, Comment
 
 list_id_headers = ['List-ID', 'X-Mailing-List']
 
+def clean_header(header):
+    """ Decode (possibly non-ascii) headers """
+
+    def decode(str, fragment):
+        (frag_str, frag_encoding) = fragment
+        if frag_encoding:
+            return str + frag_str.decode(frag_encoding)
+        return str + frag_str.decode()
+
+    return reduce(decode, decode_header(header), u'').strip()
+
 def find_project(mail):
     project = None
     listid_re = re.compile('.*<([^>]+)>.*', re.S)
@@ -60,7 +71,7 @@ def find_project(mail):
 
 def find_author(mail):
 
-    from_header = mail.get('From').strip()
+    from_header = clean_header(mail.get('From'))
     (name, email) = (None, None)
 
     # tuple of (regex, fn)
index 2e207bf7e9ec781a1ebb88f3b2b99146e86d503f..6fe7968bfafa32aa12eef70c26412f72ceb21e70 100644 (file)
@@ -21,7 +21,8 @@ import unittest
 import os
 from email.mime.text import MIMEText
 from email.mime.multipart import MIMEMultipart
-from patchwork.models import Project
+from email import message_from_string
+from patchwork.models import Project, Person
 
 test_mail_dir  = 'patchwork/tests/mail'
 test_patch_dir = 'patchwork/tests/patches'
@@ -55,7 +56,7 @@ class PatchTest(unittest.TestCase):
         return file(os.path.join(test_patch_dir, filename)).read()
 
 
-from patchwork.bin.parsemail import find_content
+from patchwork.bin.parsemail import find_content, find_author
 
 class InlinePatchTest(PatchTest):
     patch_filename = '0001-add-line.patch'
@@ -126,3 +127,45 @@ class UpdateSigCommentTest(SignatureCommentTest):
     """ Test for '---\nUpdate: v2' style comments to patches, with a sig """
     patch_filename = '0001-add-line.patch'
     test_comment = 'Test comment\nmore comment\n---\nUpdate: test update'
+
+class SenderEncodingTest(unittest.TestCase):
+    sender_name = u'example user'
+    sender_email = 'user@example.com'
+    from_header = 'example user <user@example.com>'
+
+    def setUp(self):
+        mail = 'From: %s\n' % self.from_header + \
+               'Subject: test\n\n' + \
+               'test'
+        self.email = message_from_string(mail)
+        (self.person, new) = find_author(self.email)
+        self.person.save()
+
+    def tearDown(self):
+        self.person.delete()
+
+    def testName(self):
+        self.assertEquals(self.person.name, self.sender_name)
+
+    def testEmail(self):
+        self.assertEquals(self.person.email, self.sender_email)
+
+    def testDBQueryName(self):
+        db_person = Person.objects.get(name = self.sender_name)
+        self.assertEquals(self.person, db_person)
+
+    def testDBQueryEmail(self):
+        db_person = Person.objects.get(email = self.sender_email)
+        self.assertEquals(self.person, db_person)
+
+
+class SenderUTF8QPEncodingTest(SenderEncodingTest):
+    sender_name = u'\xe9xample user'
+    from_header = '=?utf-8?q?=C3=A9xample=20user?= <user@example.com>'
+
+class SenderUTF8QPSplitEncodingTest(SenderEncodingTest):
+    sender_name = u'\xe9xample user'
+    from_header = '=?utf-8?q?=C3=A9xample=20?= user <user@example.com>'
+
+class SenderUTF8B64EncodingTest(SenderUTF8QPEncodingTest):
+    from_header = '=?utf-8?B?w6l4YW1wbGUgdXNlcg==?= <user@example.com>'