git.ozlabs.org Git - patchwork/blob - apps/patchwork/parser.py

   1 #!/usr/bin/python
   2 #
   3 # Patchwork - automated patch tracking system
   4 # Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org>
   5 #
   6 # This file is part of the Patchwork package.
   7 #
   8 # Patchwork is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # Patchwork is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with Patchwork; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22
  23 import re
  24 import hashlib
  25
  26 _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
  27 _filename_re = re.compile('^(---|\+\+\+) (\S+)')
  28
  29 def parse_patch(text):
  30     patchbuf = ''
  31     commentbuf = ''
  32     buf = ''
  33
  34     # state specified the line we just saw, and what to expect next
  35     state = 0
  36     # 0: text
  37     # 1: suspected patch header (diff, ====, Index:)
  38     # 2: patch header line 1 (---)
  39     # 3: patch header line 2 (+++)
  40     # 4: patch hunk header line (@@ line)
  41     # 5: patch hunk content
  42     #
  43     # valid transitions:
  44     #  0 -> 1 (diff, ===, Index:)
  45     #  0 -> 2 (---)
  46     #  1 -> 2 (---)
  47     #  2 -> 3 (+++)
  48     #  3 -> 4 (@@ line)
  49     #  4 -> 5 (patch content)
  50     #  5 -> 1 (run out of lines from @@-specifed count)
  51     #
  52     # Suspected patch header is stored into buf, and appended to
  53     # patchbuf if we find a following hunk. Otherwise, append to
  54     # comment after parsing.
  55
  56     # line counts while parsing a patch hunk
  57     lc = (0, 0)
  58     hunk = 0
  59
  60
  61     for line in text.split('\n'):
  62         line += '\n'
  63
  64         if state == 0:
  65             if line.startswith('diff') or line.startswith('===') \
  66                     or line.startswith('Index: '):
  67                 state = 1
  68                 buf += line
  69
  70             elif line.startswith('--- '):
  71                 state = 2
  72                 buf += line
  73
  74             else:
  75                 commentbuf += line
  76
  77         elif state == 1:
  78             buf += line
  79             if line.startswith('--- '):
  80                 state = 2
  81
  82         elif state == 2:
  83             if line.startswith('+++ '):
  84                 state = 3
  85                 buf += line
  86
  87             elif hunk:
  88                 state = 1
  89                 buf += line
  90
  91             else:
  92                 state = 0
  93                 commentbuf += buf + line
  94                 buf = ''
  95
  96         elif state == 3:
  97             match = _hunk_re.match(line)
  98             if match:
  99
 100                 def fn(x):
 101                     if not x:
 102                         return 1
 103                     return int(x)
 104
 105                 lc = map(fn, match.groups())
 106
 107                 state = 4
 108                 patchbuf += buf + line
 109                 buf = ''
 110
 111             elif line.startswith('--- '):
 112                 patchbuf += buf + line
 113                 buf = ''
 114                 state = 2
 115
 116             elif hunk:
 117                 state = 1
 118                 buf += line
 119
 120             else:
 121                 state = 0
 122                 commentbuf += buf + line
 123                 buf = ''
 124
 125         elif state == 4 or state == 5:
 126             if line.startswith('-'):
 127                 lc[0] -= 1
 128             elif line.startswith('+'):
 129                 lc[1] -= 1
 130             else:
 131                 lc[0] -= 1
 132                 lc[1] -= 1
 133
 134             patchbuf += line
 135
 136             if lc[0] <= 0 and lc[1] <= 0:
 137                 state = 3
 138                 hunk += 1
 139             else:
 140                 state = 5
 141
 142         else:
 143             raise Exception("Unknown state %d! (line '%s')" % (state, line))
 144
 145     commentbuf += buf
 146
 147     if patchbuf == '':
 148         patchbuf = None
 149
 150     if commentbuf == '':
 151         commentbuf = None
 152
 153     return (patchbuf, commentbuf)
 154
 155 def patch_hash(str):
 156     str = str.replace('\r', '')
 157     str = str.strip() + '\n'
 158     lines = str.split('\n')
 159
 160     prefixes = ['-', '+', ' ']
 161     hash = hashlib.sha1()
 162
 163     for line in str.split('\n'):
 164
 165         if len(line) <= 0:
 166             continue
 167
 168         hunk_match = _hunk_re.match(line)
 169         filename_match = _filename_re.match(line)
 170
 171         if filename_match:
 172             # normalise -p1 top-directories
 173             if filename_match.group(1) == '---':
 174                 filename = 'a/'
 175             else:
 176                 filename = 'b/'
 177             filename += '/'.join(filename_match.group(2).split('/')[1:])
 178
 179             line = filename_match.group(1) + ' ' + filename
 180
 181
 182         elif hunk_match:
 183             # remove line numbers
 184             def fn(x):
 185                 if not x:
 186                     return 1
 187                 return int(x)
 188             line_nos = map(fn, hunk_match.groups())
 189             line = '@@ -%d +%d @@' % tuple(line_nos)
 190
 191         elif line[0] in prefixes:
 192             pass
 193
 194         else:
 195             continue
 196
 197         hash.update(line + '\n')
 198
 199 if __name__ == '__main__':
 200     import sys
 201 #    (patch, comment) = parse_patch(sys.stdin.read())
 202 #    if patch:
 203 #        print "Patch: ------\n" + patch
 204 #    if comment:
 205 #        print "Comment: ----\n" + comment
 206     normalise_patch_content(sys.stdin.read())