X-Git-Url: https://git.ozlabs.org/?a=blobdiff_plain;ds=sidebyside;f=apps%2Fpatchwork%2Fbin%2Fparsemail.py;h=e2beeae1b752d8ce03d64ee77d839caf294cd09d;hb=07ded8bef209073969ca66049c91783ce59fc0f7;hp=2310ae8bc187fa52e73b9ffb7378feee0d38193f;hpb=96467db48884d72bc04fc23c8f957190fa004779;p=patchwork diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py index 2310ae8..e2beeae 100755 --- a/apps/patchwork/bin/parsemail.py +++ b/apps/patchwork/bin/parsemail.py @@ -26,11 +26,11 @@ import time import operator from email import message_from_file try: - from email.header import Header + from email.header import Header, decode_header from email.utils import parsedate_tz, mktime_tz except ImportError: # Python 2.4 compatibility - from email.Header import Header + from email.Header import Header, decode_header from email.Utils import parsedate_tz, mktime_tz from patchwork.parser import parse_patch @@ -38,6 +38,23 @@ from patchwork.models import Patch, Project, Person, Comment list_id_headers = ['List-ID', 'X-Mailing-List'] +whitespace_re = re.compile('\s+') +def normalise_space(str): + return whitespace_re.sub(' ', str).strip() + +def clean_header(header): + """ Decode (possibly non-ascii) headers """ + + def decode(fragment): + (frag_str, frag_encoding) = fragment + if frag_encoding: + return frag_str.decode(frag_encoding) + return frag_str.decode() + + fragments = map(decode, decode_header(header)) + + return normalise_space(u' '.join(fragments)) + def find_project(mail): project = None listid_re = re.compile('.*<([^>]+)>.*', re.S) @@ -60,7 +77,7 @@ def find_project(mail): def find_author(mail): - from_header = mail.get('From').strip() + from_header = clean_header(mail.get('From')) (name, email) = (None, None) # tuple of (regex, fn) @@ -93,7 +110,7 @@ def find_author(mail): new_person = False try: - person = Person.objects.get(email = email) + person = Person.objects.get(email__iexact = email) except Person.DoesNotExist: person = Person(name = name, email = email) new_person = True @@ -103,7 +120,6 @@ def find_author(mail): def mail_date(mail): t = parsedate_tz(mail.get('Date', '')) if not t: - print "using now()" return datetime.datetime.utcnow() return datetime.datetime.utcfromtimestamp(mktime_tz(t)) @@ -149,9 +165,9 @@ def find_content(project, mail): if patchbuf: mail_headers(mail) - patch = Patch(name = clean_subject(mail.get('Subject')), - content = patchbuf, date = mail_date(mail), - headers = mail_headers(mail)) + name = clean_subject(mail.get('Subject'), [project.linkname]) + patch = Patch(name = name, content = patchbuf, + date = mail_date(mail), headers = mail_headers(mail)) if commentbuf: if patch: @@ -224,7 +240,6 @@ def split_prefixes(prefix): re_re = re.compile('^(re|fwd?)[:\s]\s*', re.I) prefix_re = re.compile('^\[([^\]]*)\]\s*(.*)$') -whitespace_re = re.compile('\s+') def clean_subject(subject, drop_prefixes = None): """ Clean a Subject: header from an incoming patch. @@ -240,6 +255,8 @@ def clean_subject(subject, drop_prefixes = None): 'meep' >>> clean_subject('[PATCH] meep') 'meep' + >>> clean_subject('[PATCH] meep \\n meep') + 'meep meep' >>> clean_subject('[PATCH RFC] meep') '[RFC] meep' >>> clean_subject('[PATCH,RFC] meep') @@ -274,6 +291,8 @@ def clean_subject(subject, drop_prefixes = None): # remove Re:, Fwd:, etc subject = re_re.sub(' ', subject) + subject = normalise_space(subject) + prefixes = [] match = prefix_re.match(subject) @@ -286,7 +305,7 @@ def clean_subject(subject, drop_prefixes = None): subject = match.group(2) match = prefix_re.match(subject) - subject = whitespace_re.sub(' ', subject) + subject = normalise_space(subject) subject = subject.strip() if prefixes: @@ -294,8 +313,9 @@ def clean_subject(subject, drop_prefixes = None): return subject -sig_re = re.compile('^(-{2,3} ?|_+)\n.*', re.S | re.M) +sig_re = re.compile('^(-- |_+)\n.*', re.S | re.M) def clean_content(str): + """ Try to remove signature (-- ) and list footer (_____) cruft """ str = sig_re.sub('', str) return str.strip()