git.ozlabs.org Git - patchwork/blob - patchwork/parser.py

   1 #!/usr/bin/env python
   2 #
   3 # Patchwork - automated patch tracking system
   4 # Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org>
   5 #
   6 # This file is part of the Patchwork package.
   7 #
   8 # Patchwork is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # Patchwork is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with Patchwork; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22
  23 import re
  24 from collections import Counter
  25
  26 try:
  27     import hashlib
  28     sha1_hash = hashlib.sha1
  29 except ImportError:
  30     import sha
  31     sha1_hash = sha.sha
  32
  33 _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
  34 _filename_re = re.compile('^(---|\+\+\+) (\S+)')
  35
  36 def parse_patch(text):
  37     patchbuf = ''
  38     commentbuf = ''
  39     buf = ''
  40
  41     # state specified the line we just saw, and what to expect next
  42     state = 0
  43     # 0: text
  44     # 1: suspected patch header (diff, ====, Index:)
  45     # 2: patch header line 1 (---)
  46     # 3: patch header line 2 (+++)
  47     # 4: patch hunk header line (@@ line)
  48     # 5: patch hunk content
  49     # 6: patch meta header (rename from/rename to)
  50     #
  51     # valid transitions:
  52     #  0 -> 1 (diff, ===, Index:)
  53     #  0 -> 2 (---)
  54     #  1 -> 2 (---)
  55     #  2 -> 3 (+++)
  56     #  3 -> 4 (@@ line)
  57     #  4 -> 5 (patch content)
  58     #  5 -> 1 (run out of lines from @@-specifed count)
  59     #  1 -> 6 (rename from / rename to)
  60     #  6 -> 2 (---)
  61     #  6 -> 1 (other text)
  62     #
  63     # Suspected patch header is stored into buf, and appended to
  64     # patchbuf if we find a following hunk. Otherwise, append to
  65     # comment after parsing.
  66
  67     # line counts while parsing a patch hunk
  68     lc = (0, 0)
  69     hunk = 0
  70
  71
  72     for line in text.split('\n'):
  73         line += '\n'
  74
  75         if state == 0:
  76             if line.startswith('diff ') or line.startswith('===') \
  77                     or line.startswith('Index: '):
  78                 state = 1
  79                 buf += line
  80
  81             elif line.startswith('--- '):
  82                 state = 2
  83                 buf += line
  84
  85             else:
  86                 commentbuf += line
  87
  88         elif state == 1:
  89             buf += line
  90             if line.startswith('--- '):
  91                 state = 2
  92
  93             if line.startswith('rename from ') or line.startswith('rename to '):
  94                 state = 6
  95
  96         elif state == 2:
  97             if line.startswith('+++ '):
  98                 state = 3
  99                 buf += line
 100
 101             elif hunk:
 102                 state = 1
 103                 buf += line
 104
 105             else:
 106                 state = 0
 107                 commentbuf += buf + line
 108                 buf = ''
 109
 110         elif state == 3:
 111             match = _hunk_re.match(line)
 112             if match:
 113
 114                 def fn(x):
 115                     if not x:
 116                         return 1
 117                     return int(x)
 118
 119                 lc = map(fn, match.groups())
 120
 121                 state = 4
 122                 patchbuf += buf + line
 123                 buf = ''
 124
 125             elif line.startswith('--- '):
 126                 patchbuf += buf + line
 127                 buf = ''
 128                 state = 2
 129
 130             elif hunk and line.startswith('\ No newline at end of file'):
 131                 # If we had a hunk and now we see this, it's part of the patch,
 132                 # and we're still expecting another @@ line.
 133                 patchbuf += line
 134
 135             elif hunk:
 136                 state = 1
 137                 buf += line
 138
 139             else:
 140                 state = 0
 141                 commentbuf += buf + line
 142                 buf = ''
 143
 144         elif state == 4 or state == 5:
 145             if line.startswith('-'):
 146                 lc[0] -= 1
 147             elif line.startswith('+'):
 148                 lc[1] -= 1
 149             elif line.startswith('\ No newline at end of file'):
 150                 # Special case: Not included as part of the hunk's line count
 151                 pass
 152             else:
 153                 lc[0] -= 1
 154                 lc[1] -= 1
 155
 156             patchbuf += line
 157
 158             if lc[0] <= 0 and lc[1] <= 0:
 159                 state = 3
 160                 hunk += 1
 161             else:
 162                 state = 5
 163
 164         elif state == 6:
 165             if line.startswith('rename to ') or line.startswith('rename from '):
 166                 patchbuf += buf + line
 167                 buf = ''
 168
 169             elif line.startswith('--- '):
 170                 patchbuf += buf + line
 171                 buf = ''
 172                 state = 2
 173
 174             else:
 175                 buf += line
 176                 state = 1
 177
 178         else:
 179             raise Exception("Unknown state %d! (line '%s')" % (state, line))
 180
 181     commentbuf += buf
 182
 183     if patchbuf == '':
 184         patchbuf = None
 185
 186     if commentbuf == '':
 187         commentbuf = None
 188
 189     return (patchbuf, commentbuf)
 190
 191 def hash_patch(str):
 192     # normalise spaces
 193     str = str.replace('\r', '')
 194     str = str.strip() + '\n'
 195
 196     prefixes = ['-', '+', ' ']
 197     hash = sha1_hash()
 198
 199     for line in str.split('\n'):
 200
 201         if len(line) <= 0:
 202             continue
 203
 204         hunk_match = _hunk_re.match(line)
 205         filename_match = _filename_re.match(line)
 206
 207         if filename_match:
 208             # normalise -p1 top-directories
 209             if filename_match.group(1) == '---':
 210                 filename = 'a/'
 211             else:
 212                 filename = 'b/'
 213             filename += '/'.join(filename_match.group(2).split('/')[1:])
 214
 215             line = filename_match.group(1) + ' ' + filename
 216
 217         elif hunk_match:
 218             # remove line numbers, but leave line counts
 219             def fn(x):
 220                 if not x:
 221                     return 1
 222                 return int(x)
 223             line_nos = map(fn, hunk_match.groups())
 224             line = '@@ -%d +%d @@' % tuple(line_nos)
 225
 226         elif line[0] in prefixes:
 227             # if we have a +, - or context line, leave as-is
 228             pass
 229
 230         else:
 231             # other lines are ignored
 232             continue
 233
 234         hash.update(line.encode('utf-8') + '\n')
 235
 236     return hash
 237
 238 def extract_tags(content, tags):
 239     counts = Counter()
 240
 241     for tag in tags:
 242         regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE)
 243         counts[tag] = len(regex.findall(content))
 244
 245     return counts
 246
 247 def main(args):
 248     from optparse import OptionParser
 249
 250     parser = OptionParser()
 251     parser.add_option('-p', '--patch', action = 'store_true',
 252             dest = 'print_patch', help = 'print parsed patch')
 253     parser.add_option('-c', '--comment', action = 'store_true',
 254             dest = 'print_comment', help = 'print parsed comment')
 255     parser.add_option('-#', '--hash', action = 'store_true',
 256             dest = 'print_hash', help = 'print patch hash')
 257
 258     (options, args) = parser.parse_args()
 259
 260     # decode from (assumed) UTF-8
 261     content = sys.stdin.read().decode('utf-8')
 262
 263     (patch, comment) = parse_patch(content)
 264
 265     if options.print_hash and patch:
 266         print hash_patch(patch).hexdigest()
 267
 268     if options.print_patch and patch:
 269         print "Patch: ------\n" + patch
 270
 271     if options.print_comment and comment:
 272         print "Comment: ----\n" + comment
 273
 274 if __name__ == '__main__':
 275     import sys
 276     sys.exit(main(sys.argv))