3 # Patchwork - automated patch tracking system
4 # Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org>
6 # This file is part of the Patchwork package.
8 # Patchwork is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # Patchwork is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Patchwork; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 from email import message_from_file
29 from email.header import Header, decode_header
30 from email.utils import parsedate_tz, mktime_tz
32 # Python 2.4 compatibility
33 from email.Header import Header, decode_header
34 from email.Utils import parsedate_tz, mktime_tz
36 from patchwork.parser import parse_patch
37 from patchwork.models import Patch, Project, Person, Comment, State
38 from django.contrib.auth.models import User
40 default_patch_state = 'New'
41 list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list']
43 whitespace_re = re.compile('\s+')
44 def normalise_space(str):
45 return whitespace_re.sub(' ', str).strip()
47 def clean_header(header):
48 """ Decode (possibly non-ascii) headers """
51 (frag_str, frag_encoding) = fragment
53 return frag_str.decode(frag_encoding)
54 return frag_str.decode()
56 fragments = map(decode, decode_header(header))
58 return normalise_space(u' '.join(fragments))
60 def find_project(mail):
62 listid_res = [re.compile('.*<([^>]+)>.*', re.S),
63 re.compile('^([\S]+)$', re.S)]
65 for header in list_id_headers:
68 for listid_re in listid_res:
69 match = listid_re.match(mail.get(header))
76 listid = match.group(1)
79 project = Project.objects.get(listid = listid)
86 def find_author(mail):
88 from_header = clean_header(mail.get('From'))
89 (name, email) = (None, None)
91 # tuple of (regex, fn)
92 # - where fn returns a (name, email) tuple from the match groups resulting
93 # from re.match().groups()
95 # for "Firstname Lastname" <example@example.com> style addresses
96 (re.compile('"?(.*?)"?\s*<([^>]+)>'), (lambda g: (g[0], g[1]))),
98 # for example@example.com (Firstname Lastname) style addresses
99 (re.compile('"?(.*?)"?\s*\(([^\)]+)\)'), (lambda g: (g[1], g[0]))),
102 (re.compile('(.*)'), (lambda g: (None, g[0]))),
105 for regex, fn in from_res:
106 match = regex.match(from_header)
108 (name, email) = fn(match.groups())
112 raise Exception("Could not parse From: header")
114 email = email.strip()
121 person = Person.objects.get(email__iexact = email)
122 except Person.DoesNotExist:
123 person = Person(name = name, email = email)
126 return (person, new_person)
129 t = parsedate_tz(mail.get('Date', ''))
131 return datetime.datetime.utcnow()
132 return datetime.datetime.utcfromtimestamp(mktime_tz(t))
134 def mail_headers(mail):
135 return reduce(operator.__concat__,
136 ['%s: %s\n' % (k, Header(v, header_name = k, \
137 continuation_ws = '\t').encode()) \
138 for (k, v) in mail.items()])
140 def find_pull_request(content):
141 git_re = re.compile('^The following changes since commit.*' +
142 '^are available in the git repository at:\n'
143 '^\s*(git://[^\n]+)$',
144 re.DOTALL | re.MULTILINE)
145 match = git_re.search(content)
147 return match.group(1)
150 def find_content(project, mail):
155 for part in mail.walk():
156 if part.get_content_maintype() != 'text':
159 payload = part.get_payload(decode=True)
160 charset = part.get_content_charset()
161 subtype = part.get_content_subtype()
163 # if we don't have a charset, assume utf-8
167 if not isinstance(payload, unicode):
168 payload = unicode(payload, charset)
170 if subtype in ['x-patch', 'x-diff']:
173 elif subtype == 'plain':
177 (patchbuf, c) = parse_patch(payload)
180 pullurl = find_pull_request(payload)
183 commentbuf += c.strip() + '\n'
190 name = clean_subject(mail.get('Subject'), [project.linkname])
191 patch = Patch(name = name, content = patchbuf,
192 date = mail_date(mail), headers = mail_headers(mail))
195 name = clean_subject(mail.get('Subject'), [project.linkname])
196 patch = Patch(name = name, pull_url = pullurl,
197 date = mail_date(mail), headers = mail_headers(mail))
203 cpatch = find_patch_for_comment(project, mail)
206 comment = Comment(patch = cpatch, date = mail_date(mail),
207 content = clean_content(commentbuf),
208 headers = mail_headers(mail))
210 return (patch, comment)
212 def find_patch_for_comment(project, mail):
213 # construct a list of possible reply message ids
215 if 'In-Reply-To' in mail:
216 refs.append(mail.get('In-Reply-To'))
218 if 'References' in mail:
219 rs = mail.get('References').split()
228 # first, check for a direct reply
230 patch = Patch.objects.get(project = project, msgid = ref)
232 except Patch.DoesNotExist:
235 # see if we have comments that refer to a patch
237 comment = Comment.objects.get(patch__project = project, msgid = ref)
239 except Comment.DoesNotExist:
245 split_re = re.compile('[,\s]+')
247 def split_prefixes(prefix):
248 """ Turn a prefix string into a list of prefix tokens
250 >>> split_prefixes('PATCH')
252 >>> split_prefixes('PATCH,RFC')
254 >>> split_prefixes('')
256 >>> split_prefixes('PATCH,')
258 >>> split_prefixes('PATCH ')
260 >>> split_prefixes('PATCH,RFC')
262 >>> split_prefixes('PATCH 1/2')
265 matches = split_re.split(prefix)
266 return [ s for s in matches if s != '' ]
268 re_re = re.compile('^(re|fwd?)[:\s]\s*', re.I)
269 prefix_re = re.compile('^\[([^\]]*)\]\s*(.*)$')
271 def clean_subject(subject, drop_prefixes = None):
272 """ Clean a Subject: header from an incoming patch.
274 Removes Re: and Fwd: strings, as well as [PATCH]-style prefixes. By
275 default, only [PATCH] is removed, and we keep any other bracketed data
276 in the subject. If drop_prefixes is provided, remove those too,
277 comparing case-insensitively.
279 >>> clean_subject('meep')
281 >>> clean_subject('Re: meep')
283 >>> clean_subject('[PATCH] meep')
285 >>> clean_subject('[PATCH] meep \\n meep')
287 >>> clean_subject('[PATCH RFC] meep')
289 >>> clean_subject('[PATCH,RFC] meep')
291 >>> clean_subject('[PATCH,1/2] meep')
293 >>> clean_subject('[PATCH RFC 1/2] meep')
295 >>> clean_subject('[PATCH] [RFC] meep')
297 >>> clean_subject('[PATCH] [RFC,1/2] meep')
299 >>> clean_subject('[PATCH] [RFC] [1/2] meep')
301 >>> clean_subject('[PATCH] rewrite [a-z] regexes')
302 'rewrite [a-z] regexes'
303 >>> clean_subject('[PATCH] [RFC] rewrite [a-z] regexes')
304 '[RFC] rewrite [a-z] regexes'
305 >>> clean_subject('[foo] [bar] meep', ['foo'])
307 >>> clean_subject('[FOO] [bar] meep', ['foo'])
311 subject = clean_header(subject)
313 if drop_prefixes is None:
316 drop_prefixes = [ s.lower() for s in drop_prefixes ]
318 drop_prefixes.append('patch')
320 # remove Re:, Fwd:, etc
321 subject = re_re.sub(' ', subject)
323 subject = normalise_space(subject)
327 match = prefix_re.match(subject)
330 prefix_str = match.group(1)
331 prefixes += [ p for p in split_prefixes(prefix_str) \
332 if p.lower() not in drop_prefixes]
334 subject = match.group(2)
335 match = prefix_re.match(subject)
337 subject = normalise_space(subject)
339 subject = subject.strip()
341 subject = '[%s] %s' % (','.join(prefixes), subject)
345 sig_re = re.compile('^(-- |_+)\n.*', re.S | re.M)
346 def clean_content(str):
347 """ Try to remove signature (-- ) and list footer (_____) cruft """
348 str = sig_re.sub('', str)
351 def get_state(state_name):
352 """ Return the state with the given name or the default State """
355 return State.objects.get(name__iexact=state_name)
356 except State.DoesNotExist:
358 return State.objects.get(name=default_patch_state)
360 def get_delegate(delegate_email):
361 """ Return the delegate with the given email or None """
364 return User.objects.get(email__iexact=delegate_email)
365 except User.DoesNotExist:
369 def parse_mail(mail):
371 # some basic sanity checks
372 if 'From' not in mail:
375 if 'Subject' not in mail:
378 if 'Message-Id' not in mail:
381 hint = mail.get('X-Patchwork-Hint', '').lower()
385 project = find_project(mail)
387 print "no project found"
390 msgid = mail.get('Message-Id').strip()
392 (author, save_required) = find_author(mail)
394 (patch, comment) = find_content(project, mail)
397 # we delay the saving until we know we have a patch.
400 save_required = False
401 patch.submitter = author
403 patch.project = project
404 patch.state = get_state(mail.get('X-Patchwork-State', '').strip())
405 patch.delegate = get_delegate(
406 mail.get('X-Patchwork-Delegate', '').strip())
409 except Exception, ex:
415 # looks like the original constructor for Comment takes the pk
416 # when the Comment is created. reset it here.
418 comment.patch = patch
419 comment.submitter = author
420 comment.msgid = msgid
423 except Exception, ex:
429 mail = message_from_file(sys.stdin)
430 return parse_mail(mail)
432 if __name__ == '__main__':
433 sys.exit(main(sys.argv))