X-Git-Url: https://git.ozlabs.org/?a=blobdiff_plain;f=apps%2Fpatchwork%2Fbin%2Fparsemail.py;h=b0f1497656ab8ac4170c93f34975454b4f9b0fad;hb=5787cddc0bde4514cba96a360f89841e13d2e506;hp=d41bd92b04048bd0d2db2da2adb2629f5a50712a;hpb=c561ebe710d6e6a43aa4afc6c2036a215378ce87;p=patchwork diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py index d41bd92..b0f1497 100755 --- a/apps/patchwork/bin/parsemail.py +++ b/apps/patchwork/bin/parsemail.py @@ -25,10 +25,15 @@ import datetime import time import operator from email import message_from_file -from email.header import Header -from email.utils import parsedate_tz, mktime_tz - -from patchparser import parse_patch +try: + from email.header import Header + from email.utils import parsedate_tz, mktime_tz +except ImportError: + # Python 2.4 compatibility + from email.Header import Header + from email.Utils import parsedate_tz, mktime_tz + +from patchwork.parser import parse_patch from patchwork.models import Patch, Project, Person, Comment list_id_headers = ['List-ID', 'X-Mailing-List'] @@ -85,17 +90,19 @@ def find_author(mail): if name is not None: name = name.strip() + new_person = False + try: person = Person.objects.get(email = email) except Person.DoesNotExist: person = Person(name = name, email = email) + new_person = True - return person + return (person, new_person) def mail_date(mail): t = parsedate_tz(mail.get('Date', '')) if not t: - print "using now()" return datetime.datetime.utcnow() return datetime.datetime.utcfromtimestamp(mktime_tz(t)) @@ -141,14 +148,14 @@ def find_content(project, mail): if patchbuf: mail_headers(mail) - patch = Patch(name = clean_subject(mail.get('Subject')), - content = patchbuf, date = mail_date(mail), - headers = mail_headers(mail)) + name = clean_subject(mail.get('Subject'), [project.linkname]) + patch = Patch(name = name, content = patchbuf, + date = mail_date(mail), headers = mail_headers(mail)) if commentbuf: if patch: - cpatch = patch - else: + cpatch = patch + else: cpatch = find_patch_for_comment(mail) if not cpatch: return (None, None) @@ -191,18 +198,109 @@ def find_patch_for_comment(mail): return None +split_re = re.compile('[,\s]+') + +def split_prefixes(prefix): + """ Turn a prefix string into a list of prefix tokens + + >>> split_prefixes('PATCH') + ['PATCH'] + >>> split_prefixes('PATCH,RFC') + ['PATCH', 'RFC'] + >>> split_prefixes('') + [] + >>> split_prefixes('PATCH,') + ['PATCH'] + >>> split_prefixes('PATCH ') + ['PATCH'] + >>> split_prefixes('PATCH,RFC') + ['PATCH', 'RFC'] + >>> split_prefixes('PATCH 1/2') + ['PATCH', '1/2'] + """ + matches = split_re.split(prefix) + return [ s for s in matches if s != '' ] + re_re = re.compile('^(re|fwd?)[:\s]\s*', re.I) -prefix_re = re.compile('^\[.*\]\s*') +prefix_re = re.compile('^\[([^\]]*)\]\s*(.*)$') whitespace_re = re.compile('\s+') -def clean_subject(subject): +def clean_subject(subject, drop_prefixes = None): + """ Clean a Subject: header from an incoming patch. + + Removes Re: and Fwd: strings, as well as [PATCH]-style prefixes. By + default, only [PATCH] is removed, and we keep any other bracketed data + in the subject. If drop_prefixes is provided, remove those too, + comparing case-insensitively. + + >>> clean_subject('meep') + 'meep' + >>> clean_subject('Re: meep') + 'meep' + >>> clean_subject('[PATCH] meep') + 'meep' + >>> clean_subject('[PATCH] meep \\n meep') + 'meep meep' + >>> clean_subject('[PATCH RFC] meep') + '[RFC] meep' + >>> clean_subject('[PATCH,RFC] meep') + '[RFC] meep' + >>> clean_subject('[PATCH,1/2] meep') + '[1/2] meep' + >>> clean_subject('[PATCH RFC 1/2] meep') + '[RFC,1/2] meep' + >>> clean_subject('[PATCH] [RFC] meep') + '[RFC] meep' + >>> clean_subject('[PATCH] [RFC,1/2] meep') + '[RFC,1/2] meep' + >>> clean_subject('[PATCH] [RFC] [1/2] meep') + '[RFC,1/2] meep' + >>> clean_subject('[PATCH] rewrite [a-z] regexes') + 'rewrite [a-z] regexes' + >>> clean_subject('[PATCH] [RFC] rewrite [a-z] regexes') + '[RFC] rewrite [a-z] regexes' + >>> clean_subject('[foo] [bar] meep', ['foo']) + '[bar] meep' + >>> clean_subject('[FOO] [bar] meep', ['foo']) + '[bar] meep' + """ + + if drop_prefixes is None: + drop_prefixes = [] + else: + drop_prefixes = [ s.lower() for s in drop_prefixes ] + + drop_prefixes.append('patch') + + # remove Re:, Fwd:, etc subject = re_re.sub(' ', subject) - subject = prefix_re.sub('', subject) + + # normalise whitespace subject = whitespace_re.sub(' ', subject) - return subject.strip() -sig_re = re.compile('^(-{2,3} ?|_+)\n.*', re.S | re.M) + prefixes = [] + + match = prefix_re.match(subject) + + while match: + prefix_str = match.group(1) + prefixes += [ p for p in split_prefixes(prefix_str) \ + if p.lower() not in drop_prefixes] + + subject = match.group(2) + match = prefix_re.match(subject) + + subject = whitespace_re.sub(' ', subject) + + subject = subject.strip() + if prefixes: + subject = '[%s] %s' % (','.join(prefixes), subject) + + return subject + +sig_re = re.compile('^(-- |_+)\n.*', re.S | re.M) def clean_content(str): + """ Try to remove signature (-- ) and list footer (_____) cruft """ str = sig_re.sub('', str) return str.strip() @@ -230,22 +328,26 @@ def main(args): msgid = mail.get('Message-Id').strip() - author = find_author(mail) + (author, save_required) = find_author(mail) (patch, comment) = find_content(project, mail) if patch: - author.save() + # we delay the saving until we know we have a patch. + if save_required: + author.save() + save_required = False patch.submitter = author patch.msgid = msgid patch.project = project try: patch.save() except Exception, ex: - print ex.message + print str(ex) if comment: - author.save() + if save_required: + author.save() # looks like the original constructor for Comment takes the pk # when the Comment is created. reset it here. if patch: @@ -255,7 +357,7 @@ def main(args): try: comment.save() except Exception, ex: - print ex.message + print str(ex) return 0