3 # Patchwork - automated patch tracking system
4 # Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org>
6 # This file is part of the Patchwork package.
8 # Patchwork is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # Patchwork is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Patchwork; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 from email import message_from_file
29 from email.header import Header, decode_header
30 from email.utils import parsedate_tz, mktime_tz
32 # Python 2.4 compatibility
33 from email.Header import Header, decode_header
34 from email.Utils import parsedate_tz, mktime_tz
36 from patchwork.parser import parse_patch
37 from patchwork.models import Patch, Project, Person, Comment, State
38 from django.contrib.auth.models import User
40 default_patch_state = 'New'
41 list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list']
43 whitespace_re = re.compile('\s+')
44 def normalise_space(str):
45 return whitespace_re.sub(' ', str).strip()
47 def clean_header(header):
48 """ Decode (possibly non-ascii) headers """
51 (frag_str, frag_encoding) = fragment
53 return frag_str.decode(frag_encoding)
54 return frag_str.decode()
56 fragments = map(decode, decode_header(header))
58 return normalise_space(u' '.join(fragments))
60 def find_project(mail):
62 listid_res = [re.compile('.*<([^>]+)>.*', re.S),
63 re.compile('^([\S]+)$', re.S)]
65 for header in list_id_headers:
68 for listid_re in listid_res:
69 match = listid_re.match(mail.get(header))
76 listid = match.group(1)
79 project = Project.objects.get(listid = listid)
86 def find_author(mail):
88 from_header = clean_header(mail.get('From'))
89 (name, email) = (None, None)
91 # tuple of (regex, fn)
92 # - where fn returns a (name, email) tuple from the match groups resulting
93 # from re.match().groups()
95 # for "Firstname Lastname" <example@example.com> style addresses
96 (re.compile('"?(.*?)"?\s*<([^>]+)>'), (lambda g: (g[0], g[1]))),
98 # for example@example.com (Firstname Lastname) style addresses
99 (re.compile('"?(.*?)"?\s*\(([^\)]+)\)'), (lambda g: (g[1], g[0]))),
102 (re.compile('(.*)'), (lambda g: (None, g[0]))),
105 for regex, fn in from_res:
106 match = regex.match(from_header)
108 (name, email) = fn(match.groups())
112 raise Exception("Could not parse From: header")
114 email = email.strip()
121 person = Person.objects.get(email__iexact = email)
122 except Person.DoesNotExist:
123 person = Person(name = name, email = email)
126 return (person, new_person)
129 t = parsedate_tz(mail.get('Date', ''))
131 return datetime.datetime.utcnow()
132 return datetime.datetime.utcfromtimestamp(mktime_tz(t))
134 def mail_headers(mail):
135 return reduce(operator.__concat__,
136 ['%s: %s\n' % (k, Header(v, header_name = k, \
137 continuation_ws = '\t').encode()) \
138 for (k, v) in mail.items()])
140 def find_pull_request(content):
141 git_re = re.compile('^The following changes since commit.*' +
142 '^are available in the git repository at:\n'
143 '^\s*(git://[^\n]+)$',
144 re.DOTALL | re.MULTILINE)
145 match = git_re.search(content)
147 return match.group(1)
150 def find_content(project, mail):
155 for part in mail.walk():
156 if part.get_content_maintype() != 'text':
159 payload = part.get_payload(decode=True)
160 charset = part.get_content_charset()
161 subtype = part.get_content_subtype()
163 # if we don't have a charset, assume utf-8
167 if not isinstance(payload, unicode):
168 payload = unicode(payload, charset)
170 if subtype in ['x-patch', 'x-diff']:
173 elif subtype == 'plain':
177 (patchbuf, c) = parse_patch(payload)
180 pullurl = find_pull_request(payload)
183 commentbuf += c.strip() + '\n'
188 if pullurl or patchbuf:
189 name = clean_subject(mail.get('Subject'), [project.linkname])
190 patch = Patch(name = name, pull_url = pullurl, content = patchbuf,
191 date = mail_date(mail), headers = mail_headers(mail))
197 cpatch = find_patch_for_comment(project, mail)
200 comment = Comment(patch = cpatch, date = mail_date(mail),
201 content = clean_content(commentbuf),
202 headers = mail_headers(mail))
204 return (patch, comment)
206 def find_patch_for_comment(project, mail):
207 # construct a list of possible reply message ids
209 if 'In-Reply-To' in mail:
210 refs.append(mail.get('In-Reply-To'))
212 if 'References' in mail:
213 rs = mail.get('References').split()
222 # first, check for a direct reply
224 patch = Patch.objects.get(project = project, msgid = ref)
226 except Patch.DoesNotExist:
229 # see if we have comments that refer to a patch
231 comment = Comment.objects.get(patch__project = project, msgid = ref)
233 except Comment.DoesNotExist:
239 split_re = re.compile('[,\s]+')
241 def split_prefixes(prefix):
242 """ Turn a prefix string into a list of prefix tokens
244 >>> split_prefixes('PATCH')
246 >>> split_prefixes('PATCH,RFC')
248 >>> split_prefixes('')
250 >>> split_prefixes('PATCH,')
252 >>> split_prefixes('PATCH ')
254 >>> split_prefixes('PATCH,RFC')
256 >>> split_prefixes('PATCH 1/2')
259 matches = split_re.split(prefix)
260 return [ s for s in matches if s != '' ]
262 re_re = re.compile('^(re|fwd?)[:\s]\s*', re.I)
263 prefix_re = re.compile('^\[([^\]]*)\]\s*(.*)$')
265 def clean_subject(subject, drop_prefixes = None):
266 """ Clean a Subject: header from an incoming patch.
268 Removes Re: and Fwd: strings, as well as [PATCH]-style prefixes. By
269 default, only [PATCH] is removed, and we keep any other bracketed data
270 in the subject. If drop_prefixes is provided, remove those too,
271 comparing case-insensitively.
273 >>> clean_subject('meep')
275 >>> clean_subject('Re: meep')
277 >>> clean_subject('[PATCH] meep')
279 >>> clean_subject('[PATCH] meep \\n meep')
281 >>> clean_subject('[PATCH RFC] meep')
283 >>> clean_subject('[PATCH,RFC] meep')
285 >>> clean_subject('[PATCH,1/2] meep')
287 >>> clean_subject('[PATCH RFC 1/2] meep')
289 >>> clean_subject('[PATCH] [RFC] meep')
291 >>> clean_subject('[PATCH] [RFC,1/2] meep')
293 >>> clean_subject('[PATCH] [RFC] [1/2] meep')
295 >>> clean_subject('[PATCH] rewrite [a-z] regexes')
296 'rewrite [a-z] regexes'
297 >>> clean_subject('[PATCH] [RFC] rewrite [a-z] regexes')
298 '[RFC] rewrite [a-z] regexes'
299 >>> clean_subject('[foo] [bar] meep', ['foo'])
301 >>> clean_subject('[FOO] [bar] meep', ['foo'])
305 subject = clean_header(subject)
307 if drop_prefixes is None:
310 drop_prefixes = [ s.lower() for s in drop_prefixes ]
312 drop_prefixes.append('patch')
314 # remove Re:, Fwd:, etc
315 subject = re_re.sub(' ', subject)
317 subject = normalise_space(subject)
321 match = prefix_re.match(subject)
324 prefix_str = match.group(1)
325 prefixes += [ p for p in split_prefixes(prefix_str) \
326 if p.lower() not in drop_prefixes]
328 subject = match.group(2)
329 match = prefix_re.match(subject)
331 subject = normalise_space(subject)
333 subject = subject.strip()
335 subject = '[%s] %s' % (','.join(prefixes), subject)
339 sig_re = re.compile('^(-- |_+)\n.*', re.S | re.M)
340 def clean_content(str):
341 """ Try to remove signature (-- ) and list footer (_____) cruft """
342 str = sig_re.sub('', str)
345 def get_state(state_name):
346 """ Return the state with the given name or the default State """
349 return State.objects.get(name__iexact=state_name)
350 except State.DoesNotExist:
352 return State.objects.get(name=default_patch_state)
354 def get_delegate(delegate_email):
355 """ Return the delegate with the given email or None """
358 return User.objects.get(email__iexact=delegate_email)
359 except User.DoesNotExist:
363 def parse_mail(mail):
365 # some basic sanity checks
366 if 'From' not in mail:
369 if 'Subject' not in mail:
372 if 'Message-Id' not in mail:
375 hint = mail.get('X-Patchwork-Hint', '').lower()
379 project = find_project(mail)
381 print "no project found"
384 msgid = mail.get('Message-Id').strip()
386 (author, save_required) = find_author(mail)
388 (patch, comment) = find_content(project, mail)
391 # we delay the saving until we know we have a patch.
394 save_required = False
395 patch.submitter = author
397 patch.project = project
398 patch.state = get_state(mail.get('X-Patchwork-State', '').strip())
399 patch.delegate = get_delegate(
400 mail.get('X-Patchwork-Delegate', '').strip())
403 except Exception, ex:
409 # looks like the original constructor for Comment takes the pk
410 # when the Comment is created. reset it here.
412 comment.patch = patch
413 comment.submitter = author
414 comment.msgid = msgid
417 except Exception, ex:
423 mail = message_from_file(sys.stdin)
424 return parse_mail(mail)
426 if __name__ == '__main__':
427 sys.exit(main(sys.argv))