X-Git-Url: https://git.ozlabs.org/?p=patchwork;a=blobdiff_plain;f=apps%2Fpatchwork%2Fparser.py;h=a51a7b609af0c1df36a85f0d9fac4670efd9322e;hp=16d1de4c6f8e03fdbed64231098d86a2774781e4;hb=f09e982f58384946111d4157fd2b7c2b31b78612;hpb=4ede11b48f3b056d655a2e4a74d3627292bfbb9f

diff --git a/apps/patchwork/parser.py b/apps/patchwork/parser.py
index 16d1de4..a51a7b6 100644
--- a/apps/patchwork/parser.py
+++ b/apps/patchwork/parser.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 # Patchwork - automated patch tracking system
 # Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org>
@@ -22,6 +22,16 @@
 
 import re
 
+try:
+    import hashlib
+    sha1_hash = hashlib.sha1
+except ImportError:
+    import sha
+    sha1_hash = sha.sha
+
+_hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
+_filename_re = re.compile('^(---|\+\+\+) (\S+)')
+
 def parse_patch(text):
     patchbuf = ''
     commentbuf = ''
@@ -35,6 +45,7 @@ def parse_patch(text):
     # 3: patch header line 2 (+++)
     # 4: patch hunk header line (@@ line)
     # 5: patch hunk content
+    # 6: patch meta header (rename from/rename to)
     #
     # valid transitions:
     #  0 -> 1 (diff, ===, Index:)
@@ -44,6 +55,9 @@ def parse_patch(text):
     #  3 -> 4 (@@ line)
     #  4 -> 5 (patch content)
     #  5 -> 1 (run out of lines from @@-specifed count)
+    #  1 -> 6 (rename from / rename to)
+    #  6 -> 2 (---)
+    #  6 -> 1 (other text)
     #
     # Suspected patch header is stored into buf, and appended to
     # patchbuf if we find a following hunk. Otherwise, append to
@@ -53,13 +67,12 @@ def parse_patch(text):
     lc = (0, 0)
     hunk = 0
 
-    hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
 
     for line in text.split('\n'):
         line += '\n'
 
         if state == 0:
-            if line.startswith('diff') or line.startswith('===') \
+            if line.startswith('diff ') or line.startswith('===') \
                     or line.startswith('Index: '):
                 state = 1
                 buf += line
@@ -76,6 +89,9 @@ def parse_patch(text):
             if line.startswith('--- '):
                 state = 2
 
+            if line.startswith('rename from ') or line.startswith('rename to '):
+                state = 6
+
         elif state == 2:
             if line.startswith('+++ '):
                 state = 3
@@ -91,7 +107,7 @@ def parse_patch(text):
                 buf = ''
 
         elif state == 3:
-            match = hunk_re.match(line)
+            match = _hunk_re.match(line)
             if match:
 
                 def fn(x):
@@ -110,6 +126,11 @@ def parse_patch(text):
                 buf = ''
                 state = 2
 
+            elif hunk and line.startswith('\ No newline at end of file'):
+                # If we had a hunk and now we see this, it's part of the patch,
+                # and we're still expecting another @@ line.
+                patchbuf += line
+
             elif hunk:
                 state = 1
                 buf += line
@@ -124,6 +145,9 @@ def parse_patch(text):
                 lc[0] -= 1
             elif line.startswith('+'):
                 lc[1] -= 1
+            elif line.startswith('\ No newline at end of file'):
+                # Special case: Not included as part of the hunk's line count
+                pass
             else:
                 lc[0] -= 1
                 lc[1] -= 1
@@ -136,6 +160,20 @@ def parse_patch(text):
             else:
                 state = 5
 
+        elif state == 6:
+            if line.startswith('rename to ') or line.startswith('rename from '):
+                patchbuf += buf + line
+                buf = ''
+
+            elif line.startswith('--- '):
+                patchbuf += buf + line
+                buf = ''
+                state = 2
+
+            else:
+                buf += line
+                state = 1
+
         else:
             raise Exception("Unknown state %d! (line '%s')" % (state, line))
 
@@ -149,10 +187,81 @@ def parse_patch(text):
 
     return (patchbuf, commentbuf)
 
-if __name__ == '__main__':
-    import sys
-    (patch, comment) = parse_patch(sys.stdin.read())
-    if patch:
+def hash_patch(str):
+    # normalise spaces
+    str = str.replace('\r', '')
+    str = str.strip() + '\n'
+
+    prefixes = ['-', '+', ' ']
+    hash = sha1_hash()
+
+    for line in str.split('\n'):
+
+        if len(line) <= 0:
+            continue
+
+        hunk_match = _hunk_re.match(line)
+        filename_match = _filename_re.match(line)
+
+        if filename_match:
+            # normalise -p1 top-directories
+            if filename_match.group(1) == '---':
+                filename = 'a/'
+            else:
+                filename = 'b/'
+            filename += '/'.join(filename_match.group(2).split('/')[1:])
+
+            line = filename_match.group(1) + ' ' + filename
+
+        elif hunk_match:
+            # remove line numbers, but leave line counts
+            def fn(x):
+                if not x:
+                    return 1
+                return int(x)
+            line_nos = map(fn, hunk_match.groups())
+            line = '@@ -%d +%d @@' % tuple(line_nos)
+
+        elif line[0] in prefixes:
+            # if we have a +, - or context line, leave as-is
+            pass
+
+        else:
+            # other lines are ignored
+            continue
+
+        hash.update(line.encode('utf-8') + '\n')
+
+    return hash
+
+
+def main(args):
+    from optparse import OptionParser
+
+    parser = OptionParser()
+    parser.add_option('-p', '--patch', action = 'store_true',
+            dest = 'print_patch', help = 'print parsed patch')
+    parser.add_option('-c', '--comment', action = 'store_true',
+            dest = 'print_comment', help = 'print parsed comment')
+    parser.add_option('-#', '--hash', action = 'store_true',
+            dest = 'print_hash', help = 'print patch hash')
+
+    (options, args) = parser.parse_args()
+
+    # decode from (assumed) UTF-8
+    content = sys.stdin.read().decode('utf-8')
+
+    (patch, comment) = parse_patch(content)
+
+    if options.print_hash and patch:
+        print hash_patch(patch).hexdigest()
+
+    if options.print_patch and patch:
         print "Patch: ------\n" + patch
-    if comment:
+
+    if options.print_comment and comment:
         print "Comment: ----\n" + comment
+
+if __name__ == '__main__':
+    import sys
+    sys.exit(main(sys.argv))