Always use #!/usr/bin/env python

[patchwork] / apps / patchwork / parser.py
diff --git a/apps/patchwork/parser.py b/apps/patchwork/parser.py

index 16d1de4c6f8e03fdbed64231098d86a2774781e4..76f409cd5b4aa66b9e22856717fed2bdba83ff68 100644 (file)
--- a/apps/patchwork/parser.py
+++ b/apps/patchwork/parser.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
  #
  # Patchwork - automated patch tracking system
  # Copyright (C) 2008 Jeremy Kerr <jk@ozlabs.org>
@@ -22,6 +22,16 @@
  
  import re
  
+try:
+    import hashlib
+    sha1_hash = hashlib.sha1
+except ImportError:
+    import sha
+    sha1_hash = sha.sha
+
+_hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
+_filename_re = re.compile('^(---|\+\+\+) (\S+)')
+
  def parse_patch(text):
      patchbuf = ''
      commentbuf = ''
@@ -35,6 +45,7 @@ def parse_patch(text):
      # 3: patch header line 2 (+++)
      # 4: patch hunk header line (@@ line)
      # 5: patch hunk content
+    # 6: patch meta header (rename from/rename to)
      #
      # valid transitions:
      #  0 -> 1 (diff, ===, Index:)
@@ -44,6 +55,9 @@ def parse_patch(text):
      #  3 -> 4 (@@ line)
      #  4 -> 5 (patch content)
      #  5 -> 1 (run out of lines from @@-specifed count)
+    #  1 -> 6 (rename from / rename to)
+    #  6 -> 2 (---)
+    #  6 -> 1 (other text)
      #
      # Suspected patch header is stored into buf, and appended to
      # patchbuf if we find a following hunk. Otherwise, append to
@@ -53,13 +67,12 @@ def parse_patch(text):
      lc = (0, 0)
      hunk = 0
  
-    hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
  
      for line in text.split('\n'):
          line += '\n'
  
          if state == 0:
-            if line.startswith('diff') or line.startswith('===') \
+            if line.startswith('diff ') or line.startswith('===') \
                      or line.startswith('Index: '):
                  state = 1
                  buf += line
@@ -76,6 +89,9 @@ def parse_patch(text):
              if line.startswith('--- '):
                  state = 2
  
+            if line.startswith('rename from ') or line.startswith('rename to '):
+                state = 6
+
          elif state == 2:
              if line.startswith('+++ '):
                  state = 3
@@ -91,7 +107,7 @@ def parse_patch(text):
                  buf = ''
  
          elif state == 3:
-            match = hunk_re.match(line)
+            match = _hunk_re.match(line)
              if match:
  
                  def fn(x):
@@ -124,6 +140,9 @@ def parse_patch(text):
                  lc[0] -= 1
              elif line.startswith('+'):
                  lc[1] -= 1
+            elif line.startswith('\ No newline at end of file'):
+                # Special case: Not included as part of the hunk's line count
+                pass
              else:
                  lc[0] -= 1
                  lc[1] -= 1
@@ -136,6 +155,20 @@ def parse_patch(text):
              else:
                  state = 5
  
+        elif state == 6:
+            if line.startswith('rename to ') or line.startswith('rename from '):
+                patchbuf += buf + line
+                buf = ''
+
+            elif line.startswith('--- '):
+                patchbuf += buf + line
+                buf = ''
+                state = 2
+
+            else:
+                buf += line
+                state = 1
+
          else:
              raise Exception("Unknown state %d! (line '%s')" % (state, line))
  
@@ -149,10 +182,81 @@ def parse_patch(text):
  
      return (patchbuf, commentbuf)
  
-if __name__ == '__main__':
-    import sys
-    (patch, comment) = parse_patch(sys.stdin.read())
-    if patch:
+def hash_patch(str):
+    # normalise spaces
+    str = str.replace('\r', '')
+    str = str.strip() + '\n'
+
+    prefixes = ['-', '+', ' ']
+    hash = sha1_hash()
+
+    for line in str.split('\n'):
+
+        if len(line) <= 0:
+            continue
+
+        hunk_match = _hunk_re.match(line)
+        filename_match = _filename_re.match(line)
+
+        if filename_match:
+            # normalise -p1 top-directories
+            if filename_match.group(1) == '---':
+                filename = 'a/'
+            else:
+                filename = 'b/'
+            filename += '/'.join(filename_match.group(2).split('/')[1:])
+
+            line = filename_match.group(1) + ' ' + filename
+
+        elif hunk_match:
+            # remove line numbers, but leave line counts
+            def fn(x):
+                if not x:
+                    return 1
+                return int(x)
+            line_nos = map(fn, hunk_match.groups())
+            line = '@@ -%d +%d @@' % tuple(line_nos)
+
+        elif line[0] in prefixes:
+            # if we have a +, - or context line, leave as-is
+            pass
+
+        else:
+            # other lines are ignored
+            continue
+
+        hash.update(line.encode('utf-8') + '\n')
+
+    return hash
+
+
+def main(args):
+    from optparse import OptionParser
+
+    parser = OptionParser()
+    parser.add_option('-p', '--patch', action = 'store_true',
+            dest = 'print_patch', help = 'print parsed patch')
+    parser.add_option('-c', '--comment', action = 'store_true',
+            dest = 'print_comment', help = 'print parsed comment')
+    parser.add_option('-#', '--hash', action = 'store_true',
+            dest = 'print_hash', help = 'print patch hash')
+
+    (options, args) = parser.parse_args()
+
+    # decode from (assumed) UTF-8
+    content = sys.stdin.read().decode('utf-8')
+
+    (patch, comment) = parse_patch(content)
+
+    if options.print_hash and patch:
+        print hash_patch(patch).hexdigest()
+
+    if options.print_patch and patch:
          print "Patch: ------\n" + patch
-    if comment:
+
+    if options.print_comment and comment:
          print "Comment: ----\n" + comment
+
+if __name__ == '__main__':
+    import sys
+    sys.exit(main(sys.argv))