git.ozlabs.org Git - patchwork/blob - patchwork/parser.py

   1 #!/usr/bin/env python
   2 #
   3 # Patchwork - automated patch tracking system
   4 # Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org>
   5 #
   6 # This file is part of the Patchwork package.
   7 #
   8 # Patchwork is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # Patchwork is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with Patchwork; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22
  23 import re
  24
  25 try:
  26     import hashlib
  27     sha1_hash = hashlib.sha1
  28 except ImportError:
  29     import sha
  30     sha1_hash = sha.sha
  31
  32 _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
  33 _filename_re = re.compile('^(---|\+\+\+) (\S+)')
  34
  35 def parse_patch(text):
  36     patchbuf = ''
  37     commentbuf = ''
  38     buf = ''
  39
  40     # state specified the line we just saw, and what to expect next
  41     state = 0
  42     # 0: text
  43     # 1: suspected patch header (diff, ====, Index:)
  44     # 2: patch header line 1 (---)
  45     # 3: patch header line 2 (+++)
  46     # 4: patch hunk header line (@@ line)
  47     # 5: patch hunk content
  48     # 6: patch meta header (rename from/rename to)
  49     #
  50     # valid transitions:
  51     #  0 -> 1 (diff, ===, Index:)
  52     #  0 -> 2 (---)
  53     #  1 -> 2 (---)
  54     #  2 -> 3 (+++)
  55     #  3 -> 4 (@@ line)
  56     #  4 -> 5 (patch content)
  57     #  5 -> 1 (run out of lines from @@-specifed count)
  58     #  1 -> 6 (rename from / rename to)
  59     #  6 -> 2 (---)
  60     #  6 -> 1 (other text)
  61     #
  62     # Suspected patch header is stored into buf, and appended to
  63     # patchbuf if we find a following hunk. Otherwise, append to
  64     # comment after parsing.
  65
  66     # line counts while parsing a patch hunk
  67     lc = (0, 0)
  68     hunk = 0
  69
  70
  71     for line in text.split('\n'):
  72         line += '\n'
  73
  74         if state == 0:
  75             if line.startswith('diff ') or line.startswith('===') \
  76                     or line.startswith('Index: '):
  77                 state = 1
  78                 buf += line
  79
  80             elif line.startswith('--- '):
  81                 state = 2
  82                 buf += line
  83
  84             else:
  85                 commentbuf += line
  86
  87         elif state == 1:
  88             buf += line
  89             if line.startswith('--- '):
  90                 state = 2
  91
  92             if line.startswith('rename from ') or line.startswith('rename to '):
  93                 state = 6
  94
  95         elif state == 2:
  96             if line.startswith('+++ '):
  97                 state = 3
  98                 buf += line
  99
 100             elif hunk:
 101                 state = 1
 102                 buf += line
 103
 104             else:
 105                 state = 0
 106                 commentbuf += buf + line
 107                 buf = ''
 108
 109         elif state == 3:
 110             match = _hunk_re.match(line)
 111             if match:
 112
 113                 def fn(x):
 114                     if not x:
 115                         return 1
 116                     return int(x)
 117
 118                 lc = map(fn, match.groups())
 119
 120                 state = 4
 121                 patchbuf += buf + line
 122                 buf = ''
 123
 124             elif line.startswith('--- '):
 125                 patchbuf += buf + line
 126                 buf = ''
 127                 state = 2
 128
 129             elif hunk and line.startswith('\ No newline at end of file'):
 130                 # If we had a hunk and now we see this, it's part of the patch,
 131                 # and we're still expecting another @@ line.
 132                 patchbuf += line
 133
 134             elif hunk:
 135                 state = 1
 136                 buf += line
 137
 138             else:
 139                 state = 0
 140                 commentbuf += buf + line
 141                 buf = ''
 142
 143         elif state == 4 or state == 5:
 144             if line.startswith('-'):
 145                 lc[0] -= 1
 146             elif line.startswith('+'):
 147                 lc[1] -= 1
 148             elif line.startswith('\ No newline at end of file'):
 149                 # Special case: Not included as part of the hunk's line count
 150                 pass
 151             else:
 152                 lc[0] -= 1
 153                 lc[1] -= 1
 154
 155             patchbuf += line
 156
 157             if lc[0] <= 0 and lc[1] <= 0:
 158                 state = 3
 159                 hunk += 1
 160             else:
 161                 state = 5
 162
 163         elif state == 6:
 164             if line.startswith('rename to ') or line.startswith('rename from '):
 165                 patchbuf += buf + line
 166                 buf = ''
 167
 168             elif line.startswith('--- '):
 169                 patchbuf += buf + line
 170                 buf = ''
 171                 state = 2
 172
 173             else:
 174                 buf += line
 175                 state = 1
 176
 177         else:
 178             raise Exception("Unknown state %d! (line '%s')" % (state, line))
 179
 180     commentbuf += buf
 181
 182     if patchbuf == '':
 183         patchbuf = None
 184
 185     if commentbuf == '':
 186         commentbuf = None
 187
 188     return (patchbuf, commentbuf)
 189
 190 def hash_patch(str):
 191     # normalise spaces
 192     str = str.replace('\r', '')
 193     str = str.strip() + '\n'
 194
 195     prefixes = ['-', '+', ' ']
 196     hash = sha1_hash()
 197
 198     for line in str.split('\n'):
 199
 200         if len(line) <= 0:
 201             continue
 202
 203         hunk_match = _hunk_re.match(line)
 204         filename_match = _filename_re.match(line)
 205
 206         if filename_match:
 207             # normalise -p1 top-directories
 208             if filename_match.group(1) == '---':
 209                 filename = 'a/'
 210             else:
 211                 filename = 'b/'
 212             filename += '/'.join(filename_match.group(2).split('/')[1:])
 213
 214             line = filename_match.group(1) + ' ' + filename
 215
 216         elif hunk_match:
 217             # remove line numbers, but leave line counts
 218             def fn(x):
 219                 if not x:
 220                     return 1
 221                 return int(x)
 222             line_nos = map(fn, hunk_match.groups())
 223             line = '@@ -%d +%d @@' % tuple(line_nos)
 224
 225         elif line[0] in prefixes:
 226             # if we have a +, - or context line, leave as-is
 227             pass
 228
 229         else:
 230             # other lines are ignored
 231             continue
 232
 233         hash.update(line.encode('utf-8') + '\n')
 234
 235     return hash
 236
 237
 238 def main(args):
 239     from optparse import OptionParser
 240
 241     parser = OptionParser()
 242     parser.add_option('-p', '--patch', action = 'store_true',
 243             dest = 'print_patch', help = 'print parsed patch')
 244     parser.add_option('-c', '--comment', action = 'store_true',
 245             dest = 'print_comment', help = 'print parsed comment')
 246     parser.add_option('-#', '--hash', action = 'store_true',
 247             dest = 'print_hash', help = 'print patch hash')
 248
 249     (options, args) = parser.parse_args()
 250
 251     # decode from (assumed) UTF-8
 252     content = sys.stdin.read().decode('utf-8')
 253
 254     (patch, comment) = parse_patch(content)
 255
 256     if options.print_hash and patch:
 257         print hash_patch(patch).hexdigest()
 258
 259     if options.print_patch and patch:
 260         print "Patch: ------\n" + patch
 261
 262     if options.print_comment and comment:
 263         print "Comment: ----\n" + comment
 264
 265 if __name__ == '__main__':
 266     import sys
 267     sys.exit(main(sys.argv))