]> git.ozlabs.org Git - patchwork/commitdiff
Use hex strings for hash values
authorJeremy Kerr <jk@ozlabs.org>
Wed, 10 Sep 2008 01:21:19 +0000 (11:21 +1000)
committerJeremy Kerr <jk@ozlabs.org>
Wed, 10 Sep 2008 01:21:19 +0000 (11:21 +1000)
Binary strings a too hard to manage in DB queries and XMLRPC methods,
as we get all kinds of encoding issues.

Change HashField to use a hex string, and add a migration script for db
updates. The patches should be rehashed after migration.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
apps/patchwork/models.py
lib/sql/migration/001-hex-hash-types.sql [new file with mode: 0644]

index b4161c73e9044d963788013bc8317535414d7f19..e2b636ead6208c8a62f9d02f40f9a60d704c581c 100644 (file)
@@ -169,7 +169,7 @@ class State(models.Model):
     class Meta:
         ordering = ['ordering']
 
-class HashField(models.Field):
+class HashField(models.CharField):
     __metaclass__ = models.SubfieldBase
 
     def __init__(self, algorithm = 'sha1', *args, **kwargs):
@@ -177,6 +177,7 @@ class HashField(models.Field):
         try:
             import hashlib
             self.hashlib = True
+            n_bytes = len(hashlib.new(self.algorithm).hexdigest())
         except ImportError:
             self.hashlib = False
             if algorithm == 'sha1':
@@ -187,31 +188,18 @@ class HashField(models.Field):
                 self.hash_constructor = md5.new
             else:
                 raise NameError("Unknown algorithm '%s'" % algorithm)
-            
+            n_bytes = len(self.hash_constructor().hexdigest())
+
+        kwargs['max_length'] = n_bytes
         super(HashField, self).__init__(*args, **kwargs)
 
     def db_type(self):
         if self.hashlib:
             import hashlib
-            n_bytes = len(hashlib.new(self.algorithm).digest())
-        else:
-            n_bytes = len(self.hash_constructor().digest())
-        if settings.DATABASE_ENGINE.startswith('postgresql'):
-            return 'bytea'
-        elif settings.DATABASE_ENGINE == 'mysql':
-            return 'binary(%d)' % n_bytes
+            n_bytes = len(hashlib.new(self.algorithm).hexdigest())
         else:
-            raise Exception("Unknown database engine '%s'" % \
-                            settings.DATABASE_ENGINE)
-
-    def to_python(self, value):
-        return value
-
-    def get_db_prep_save(self, value):
-        return ''.join(map(lambda x: '\\%03o' % ord(x), value))
-
-    def get_manipulator_field_objs(self):
-        return [oldforms.TextField]
+            n_bytes = len(self.hash_constructor().hexdigest())
+        return 'char(%d)' % n_bytes
 
 class Patch(models.Model):
     project = models.ForeignKey(Project)
@@ -225,7 +213,7 @@ class Patch(models.Model):
     headers = models.TextField(blank = True)
     content = models.TextField()
     commit_ref = models.CharField(max_length=255, null = True, blank = True)
-    hash = HashField(null = True)
+    hash = HashField(null = True, db_index = True)
 
     def __str__(self):
         return self.name
@@ -240,7 +228,7 @@ class Patch(models.Model):
             self.state = State.objects.get(ordering =  0)
 
         if self.hash is None:
-            self.hash = hash_patch(self.content).digest()
+            self.hash = hash_patch(self.content).hexdigest()
 
         super(Patch, self).save()
 
diff --git a/lib/sql/migration/001-hex-hash-types.sql b/lib/sql/migration/001-hex-hash-types.sql
new file mode 100644 (file)
index 0000000..cef6b0b
--- /dev/null
@@ -0,0 +1,8 @@
+BEGIN;
+ALTER TABLE patchwork_patch ALTER COLUMN hash DROP NOT NULL;
+UPDATE patchwork_patch SET hash = NULL;
+COMMIT;
+BEGIN;
+ALTER TABLE patchwork_patch ALTER COLUMN hash TYPE CHAR(40);
+CREATE INDEX "patchwork_patch_hash" ON "patchwork_patch" ("hash");
+COMMIT;