git.ozlabs.org Git - patchwork/blob - patchwork/parser.py

   1 #!/usr/bin/env python
   2 #
   3 # Patchwork - automated patch tracking system
   4 # Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org>
   5 #
   6 # This file is part of the Patchwork package.
   7 #
   8 # Patchwork is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # Patchwork is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with Patchwork; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22
  23 import hashlib
  24 import re
  25 from collections import Counter
  26
  27 _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
  28 _filename_re = re.compile('^(---|\+\+\+) (\S+)')
  29
  30 def parse_patch(text):
  31     patchbuf = ''
  32     commentbuf = ''
  33     buf = ''
  34
  35     # state specified the line we just saw, and what to expect next
  36     state = 0
  37     # 0: text
  38     # 1: suspected patch header (diff, ====, Index:)
  39     # 2: patch header line 1 (---)
  40     # 3: patch header line 2 (+++)
  41     # 4: patch hunk header line (@@ line)
  42     # 5: patch hunk content
  43     # 6: patch meta header (rename from/rename to)
  44     #
  45     # valid transitions:
  46     #  0 -> 1 (diff, ===, Index:)
  47     #  0 -> 2 (---)
  48     #  1 -> 2 (---)
  49     #  2 -> 3 (+++)
  50     #  3 -> 4 (@@ line)
  51     #  4 -> 5 (patch content)
  52     #  5 -> 1 (run out of lines from @@-specifed count)
  53     #  1 -> 6 (rename from / rename to)
  54     #  6 -> 2 (---)
  55     #  6 -> 1 (other text)
  56     #
  57     # Suspected patch header is stored into buf, and appended to
  58     # patchbuf if we find a following hunk. Otherwise, append to
  59     # comment after parsing.
  60
  61     # line counts while parsing a patch hunk
  62     lc = (0, 0)
  63     hunk = 0
  64
  65
  66     for line in text.split('\n'):
  67         line += '\n'
  68
  69         if state == 0:
  70             if line.startswith('diff ') or line.startswith('===') \
  71                     or line.startswith('Index: '):
  72                 state = 1
  73                 buf += line
  74
  75             elif line.startswith('--- '):
  76                 state = 2
  77                 buf += line
  78
  79             else:
  80                 commentbuf += line
  81
  82         elif state == 1:
  83             buf += line
  84             if line.startswith('--- '):
  85                 state = 2
  86
  87             if line.startswith('rename from ') or line.startswith('rename to '):
  88                 state = 6
  89
  90         elif state == 2:
  91             if line.startswith('+++ '):
  92                 state = 3
  93                 buf += line
  94
  95             elif hunk:
  96                 state = 1
  97                 buf += line
  98
  99             else:
 100                 state = 0
 101                 commentbuf += buf + line
 102                 buf = ''
 103
 104         elif state == 3:
 105             match = _hunk_re.match(line)
 106             if match:
 107
 108                 def fn(x):
 109                     if not x:
 110                         return 1
 111                     return int(x)
 112
 113                 lc = map(fn, match.groups())
 114
 115                 state = 4
 116                 patchbuf += buf + line
 117                 buf = ''
 118
 119             elif line.startswith('--- '):
 120                 patchbuf += buf + line
 121                 buf = ''
 122                 state = 2
 123
 124             elif hunk and line.startswith('\ No newline at end of file'):
 125                 # If we had a hunk and now we see this, it's part of the patch,
 126                 # and we're still expecting another @@ line.
 127                 patchbuf += line
 128
 129             elif hunk:
 130                 state = 1
 131                 buf += line
 132
 133             else:
 134                 state = 0
 135                 commentbuf += buf + line
 136                 buf = ''
 137
 138         elif state == 4 or state == 5:
 139             if line.startswith('-'):
 140                 lc[0] -= 1
 141             elif line.startswith('+'):
 142                 lc[1] -= 1
 143             elif line.startswith('\ No newline at end of file'):
 144                 # Special case: Not included as part of the hunk's line count
 145                 pass
 146             else:
 147                 lc[0] -= 1
 148                 lc[1] -= 1
 149
 150             patchbuf += line
 151
 152             if lc[0] <= 0 and lc[1] <= 0:
 153                 state = 3
 154                 hunk += 1
 155             else:
 156                 state = 5
 157
 158         elif state == 6:
 159             if line.startswith('rename to ') or line.startswith('rename from '):
 160                 patchbuf += buf + line
 161                 buf = ''
 162
 163             elif line.startswith('--- '):
 164                 patchbuf += buf + line
 165                 buf = ''
 166                 state = 2
 167
 168             else:
 169                 buf += line
 170                 state = 1
 171
 172         else:
 173             raise Exception("Unknown state %d! (line '%s')" % (state, line))
 174
 175     commentbuf += buf
 176
 177     if patchbuf == '':
 178         patchbuf = None
 179
 180     if commentbuf == '':
 181         commentbuf = None
 182
 183     return (patchbuf, commentbuf)
 184
 185 def hash_patch(str):
 186     # normalise spaces
 187     str = str.replace('\r', '')
 188     str = str.strip() + '\n'
 189
 190     prefixes = ['-', '+', ' ']
 191     hash = hashlib.sha1()
 192
 193     for line in str.split('\n'):
 194
 195         if len(line) <= 0:
 196             continue
 197
 198         hunk_match = _hunk_re.match(line)
 199         filename_match = _filename_re.match(line)
 200
 201         if filename_match:
 202             # normalise -p1 top-directories
 203             if filename_match.group(1) == '---':
 204                 filename = 'a/'
 205             else:
 206                 filename = 'b/'
 207             filename += '/'.join(filename_match.group(2).split('/')[1:])
 208
 209             line = filename_match.group(1) + ' ' + filename
 210
 211         elif hunk_match:
 212             # remove line numbers, but leave line counts
 213             def fn(x):
 214                 if not x:
 215                     return 1
 216                 return int(x)
 217             line_nos = map(fn, hunk_match.groups())
 218             line = '@@ -%d +%d @@' % tuple(line_nos)
 219
 220         elif line[0] in prefixes:
 221             # if we have a +, - or context line, leave as-is
 222             pass
 223
 224         else:
 225             # other lines are ignored
 226             continue
 227
 228         hash.update(line.encode('utf-8') + '\n')
 229
 230     return hash
 231
 232 def extract_tags(content, tags):
 233     counts = Counter()
 234
 235     for tag in tags:
 236         regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE)
 237         counts[tag] = len(regex.findall(content))
 238
 239     return counts
 240
 241 def main(args):
 242     from optparse import OptionParser
 243
 244     parser = OptionParser()
 245     parser.add_option('-p', '--patch', action = 'store_true',
 246             dest = 'print_patch', help = 'print parsed patch')
 247     parser.add_option('-c', '--comment', action = 'store_true',
 248             dest = 'print_comment', help = 'print parsed comment')
 249     parser.add_option('-#', '--hash', action = 'store_true',
 250             dest = 'print_hash', help = 'print patch hash')
 251
 252     (options, args) = parser.parse_args()
 253
 254     # decode from (assumed) UTF-8
 255     content = sys.stdin.read().decode('utf-8')
 256
 257     (patch, comment) = parse_patch(content)
 258
 259     if options.print_hash and patch:
 260         print hash_patch(patch).hexdigest()
 261
 262     if options.print_patch and patch:
 263         print "Patch: ------\n" + patch
 264
 265     if options.print_comment and comment:
 266         print "Comment: ----\n" + comment
 267
 268 if __name__ == '__main__':
 269     import sys
 270     sys.exit(main(sys.argv))