From 918fab011f24f22f9915674c104258e20a5fcf26 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Sat, 20 Sep 2008 12:09:10 +1000 Subject: [PATCH] [parser] Decode From: headers We're getting a few utf-8 encoded From: addresses, so decode before saving to the DB. Also, add tests. Signed-off-by: Jeremy Kerr --- apps/patchwork/bin/parsemail.py | 17 +++++++++-- apps/patchwork/tests/patchparser.py | 47 +++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py index b0f1497..07554bc 100755 --- a/apps/patchwork/bin/parsemail.py +++ b/apps/patchwork/bin/parsemail.py @@ -26,11 +26,11 @@ import time import operator from email import message_from_file try: - from email.header import Header + from email.header import Header, decode_header from email.utils import parsedate_tz, mktime_tz except ImportError: # Python 2.4 compatibility - from email.Header import Header + from email.Header import Header, decode_header from email.Utils import parsedate_tz, mktime_tz from patchwork.parser import parse_patch @@ -38,6 +38,17 @@ from patchwork.models import Patch, Project, Person, Comment list_id_headers = ['List-ID', 'X-Mailing-List'] +def clean_header(header): + """ Decode (possibly non-ascii) headers """ + + def decode(str, fragment): + (frag_str, frag_encoding) = fragment + if frag_encoding: + return str + frag_str.decode(frag_encoding) + return str + frag_str.decode() + + return reduce(decode, decode_header(header), u'').strip() + def find_project(mail): project = None listid_re = re.compile('.*<([^>]+)>.*', re.S) @@ -60,7 +71,7 @@ def find_project(mail): def find_author(mail): - from_header = mail.get('From').strip() + from_header = clean_header(mail.get('From')) (name, email) = (None, None) # tuple of (regex, fn) diff --git a/apps/patchwork/tests/patchparser.py b/apps/patchwork/tests/patchparser.py index 2e207bf..6fe7968 100644 --- a/apps/patchwork/tests/patchparser.py +++ b/apps/patchwork/tests/patchparser.py @@ -21,7 +21,8 @@ import unittest import os from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart -from patchwork.models import Project +from email import message_from_string +from patchwork.models import Project, Person test_mail_dir = 'patchwork/tests/mail' test_patch_dir = 'patchwork/tests/patches' @@ -55,7 +56,7 @@ class PatchTest(unittest.TestCase): return file(os.path.join(test_patch_dir, filename)).read() -from patchwork.bin.parsemail import find_content +from patchwork.bin.parsemail import find_content, find_author class InlinePatchTest(PatchTest): patch_filename = '0001-add-line.patch' @@ -126,3 +127,45 @@ class UpdateSigCommentTest(SignatureCommentTest): """ Test for '---\nUpdate: v2' style comments to patches, with a sig """ patch_filename = '0001-add-line.patch' test_comment = 'Test comment\nmore comment\n---\nUpdate: test update' + +class SenderEncodingTest(unittest.TestCase): + sender_name = u'example user' + sender_email = 'user@example.com' + from_header = 'example user ' + + def setUp(self): + mail = 'From: %s\n' % self.from_header + \ + 'Subject: test\n\n' + \ + 'test' + self.email = message_from_string(mail) + (self.person, new) = find_author(self.email) + self.person.save() + + def tearDown(self): + self.person.delete() + + def testName(self): + self.assertEquals(self.person.name, self.sender_name) + + def testEmail(self): + self.assertEquals(self.person.email, self.sender_email) + + def testDBQueryName(self): + db_person = Person.objects.get(name = self.sender_name) + self.assertEquals(self.person, db_person) + + def testDBQueryEmail(self): + db_person = Person.objects.get(email = self.sender_email) + self.assertEquals(self.person, db_person) + + +class SenderUTF8QPEncodingTest(SenderEncodingTest): + sender_name = u'\xe9xample user' + from_header = '=?utf-8?q?=C3=A9xample=20user?= ' + +class SenderUTF8QPSplitEncodingTest(SenderEncodingTest): + sender_name = u'\xe9xample user' + from_header = '=?utf-8?q?=C3=A9xample=20?= user ' + +class SenderUTF8B64EncodingTest(SenderUTF8QPEncodingTest): + from_header = '=?utf-8?B?w6l4YW1wbGUgdXNlcg==?= ' -- 2.39.2