X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fhash%2Fhash.c;h=cb4465405e2eff8022a014e0690583e6d16d9ac0;hp=886101a13ac3a76155fc33c05fe69c385579bd83;hb=7449ae0ff0ef5c96b7a76010986e00ebb28b2a65;hpb=0953f929bc024a9107869a40516b89932d5482e0

diff --git a/ccan/hash/hash.c b/ccan/hash/hash.c
index 886101a1..cb446540 100644
--- a/ccan/hash/hash.c
+++ b/ccan/hash/hash.c
@@ -42,7 +42,7 @@ on 1 byte), but shoehorning those bytes into integers efficiently is messy.
 #include <sys/param.h>  /* attempt to define endianness */
 #endif
 
-#include "hash/hash.h"
+#include "hash.h"
 #ifdef linux
 # include <endian.h>    /* attempt to define endianness */
 #endif
@@ -63,8 +63,7 @@ on 1 byte), but shoehorning those bytes into integers efficiently is messy.
 # define HASH_LITTLE_ENDIAN 0
 # define HASH_BIG_ENDIAN 1
 #else
-# define HASH_LITTLE_ENDIAN 0
-# define HASH_BIG_ENDIAN 0
+# error Unknown endian
 #endif
 
 #define hashsize(n) ((uint32_t)1<<(n))
@@ -209,63 +208,16 @@ uint32_t        initval)         /* the previous hash, or an arbitrary value */
   return c;
 }
 
-
-#if 0
-/*
---------------------------------------------------------------------
-hash_word2() -- same as hash_word(), but take two seeds and return two
-32-bit values.  pc and pb must both be nonnull, and *pc and *pb must
-both be initialized with seeds.  If you pass in (*pb)==0, the output 
-(*pc) will be the same as the return value from hash_word().
---------------------------------------------------------------------
-*/
-void hash_word2 (
-const uint32_t *k,                   /* the key, an array of uint32_t values */
-size_t          length,               /* the length of the key, in uint32_ts */
-uint32_t       *pc,                      /* IN: seed OUT: primary hash value */
-uint32_t       *pb)               /* IN: more seed OUT: secondary hash value */
-{
-  uint32_t a,b,c;
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
-  c += *pb;
-
-  /*------------------------------------------------- handle most of the key */
-  while (length > 3)
-  {
-    a += k[0];
-    b += k[1];
-    c += k[2];
-    mix(a,b,c);
-    length -= 3;
-    k += 3;
-  }
-
-  /*------------------------------------------- handle the last 3 uint32_t's */
-  switch(length)                     /* all the case statements fall through */
-  { 
-  case 3 : c+=k[2];
-  case 2 : b+=k[1];
-  case 1 : a+=k[0];
-    final(a,b,c);
-  case 0:     /* case 0: nothing left to add */
-    break;
-  }
-  /*------------------------------------------------------ report the result */
-  *pc=c; *pb=b;
-}
-#endif
-
 /*
 -------------------------------------------------------------------------------
 hashlittle() -- hash a variable-length key into a 32-bit value
   k       : the key (the unaligned variable-length array of bytes)
   length  : the length of the key, counting by bytes
-  initval : can be any 4-byte value
+  val2    : IN: can be any 4-byte value OUT: second 32 bit hash.
 Returns a 32-bit value.  Every bit of the key affects every bit of
 the return value.  Two keys differing by one or two bits will have
-totally different hash values.
+totally different hash values.  Note that the return value is better
+mixed than val2, so use that first.
 
 The best hash table sizes are powers of 2.  There is no need to do
 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
@@ -284,13 +236,13 @@ acceptable.  Do NOT use for cryptographic purposes.
 -------------------------------------------------------------------------------
 */
 
-static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
+static uint32_t hashlittle( const void *key, size_t length, uint32_t *val2 )
 {
   uint32_t a,b,c;                                          /* internal state */
   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
 
   /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2;
 
   u.ptr = key;
   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
@@ -452,209 +404,23 @@ static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
   }
 
   final(a,b,c);
+  *val2 = b;
   return c;
 }
 
-#if 0
-/*
- * hashlittle2: return 2 32-bit hash values
- *
- * This is identical to hashlittle(), except it returns two 32-bit hash
- * values instead of just one.  This is good enough for hash table
- * lookup with 2^^64 buckets, or if you want a second hash if you're not
- * happy with the first, or if you want a probably-unique 64-bit ID for
- * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
- * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
- */
-void hashlittle2( 
-  const void *key,       /* the key to hash */
-  size_t      length,    /* length of the key */
-  uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
-  uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
-{
-  uint32_t a,b,c;                                          /* internal state */
-  union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
-  c += *pb;
-
-  u.ptr = key;
-  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
-    const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
-    const uint8_t  *k8;
-
-    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      b += k[1];
-      c += k[2];
-      mix(a,b,c);
-      length -= 12;
-      k += 3;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    /* 
-     * "k[2]&0xffffff" actually reads beyond the end of the string, but
-     * then masks off the part it's not allowed to read.  Because the
-     * string is aligned, the masked-off tail is in the same word as the
-     * rest of the string.  Every machine with memory protection I've seen
-     * does it on word boundaries, so is OK with this.  But VALGRIND will
-     * still catch it and complain.  The masking trick does make the hash
-     * noticably faster for short strings (like English words).
-     */
-#ifndef VALGRIND
-
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
-    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
-    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
-    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
-    case 5 : b+=k[1]&0xff; a+=k[0]; break;
-    case 4 : a+=k[0]; break;
-    case 3 : a+=k[0]&0xffffff; break;
-    case 2 : a+=k[0]&0xffff; break;
-    case 1 : a+=k[0]&0xff; break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-
-#else /* make valgrind happy */
-
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
-    case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
-    case 9 : c+=k8[8];                   /* fall through */
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
-    case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
-    case 5 : b+=k8[4];                   /* fall through */
-    case 4 : a+=k[0]; break;
-    case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
-    case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
-    case 1 : a+=k8[0]; break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-
-#endif /* !valgrind */
-
-  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
-    const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
-    const uint8_t  *k8;
-
-    /*--------------- all but last block: aligned reads and different mixing */
-    while (length > 12)
-    {
-      a += k[0] + (((uint32_t)k[1])<<16);
-      b += k[2] + (((uint32_t)k[3])<<16);
-      c += k[4] + (((uint32_t)k[5])<<16);
-      mix(a,b,c);
-      length -= 12;
-      k += 6;
-    }
-
-    /*----------------------------- handle the last (probably partial) block */
-    k8 = (const uint8_t *)k;
-    switch(length)
-    {
-    case 12: c+=k[4]+(((uint32_t)k[5])<<16);
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
-    case 10: c+=k[4];
-             b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 9 : c+=k8[8];                      /* fall through */
-    case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
-    case 6 : b+=k[2];
-             a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 5 : b+=k8[4];                      /* fall through */
-    case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
-             break;
-    case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
-    case 2 : a+=k[0];
-             break;
-    case 1 : a+=k8[0];
-             break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-
-  } else {                        /* need to read the key one byte at a time */
-    const uint8_t *k = (const uint8_t *)key;
-
-    /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
-    while (length > 12)
-    {
-      a += k[0];
-      a += ((uint32_t)k[1])<<8;
-      a += ((uint32_t)k[2])<<16;
-      a += ((uint32_t)k[3])<<24;
-      b += k[4];
-      b += ((uint32_t)k[5])<<8;
-      b += ((uint32_t)k[6])<<16;
-      b += ((uint32_t)k[7])<<24;
-      c += k[8];
-      c += ((uint32_t)k[9])<<8;
-      c += ((uint32_t)k[10])<<16;
-      c += ((uint32_t)k[11])<<24;
-      mix(a,b,c);
-      length -= 12;
-      k += 12;
-    }
-
-    /*-------------------------------- last block: affect all 32 bits of (c) */
-    switch(length)                   /* all the case statements fall through */
-    {
-    case 12: c+=((uint32_t)k[11])<<24;
-    case 11: c+=((uint32_t)k[10])<<16;
-    case 10: c+=((uint32_t)k[9])<<8;
-    case 9 : c+=k[8];
-    case 8 : b+=((uint32_t)k[7])<<24;
-    case 7 : b+=((uint32_t)k[6])<<16;
-    case 6 : b+=((uint32_t)k[5])<<8;
-    case 5 : b+=k[4];
-    case 4 : a+=((uint32_t)k[3])<<24;
-    case 3 : a+=((uint32_t)k[2])<<16;
-    case 2 : a+=((uint32_t)k[1])<<8;
-    case 1 : a+=k[0];
-             break;
-    case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
-    }
-  }
-
-  final(a,b,c);
-  *pc=c; *pb=b;
-}
-#endif
-
-
 /*
  * hashbig():
  * This is the same as hash_word() on big-endian machines.  It is different
  * from hashlittle() on all machines.  hashbig() takes advantage of
  * big-endian byte ordering. 
  */
-static uint32_t hashbig( const void *key, size_t length, uint32_t initval)
+static uint32_t hashbig( const void *key, size_t length, uint32_t *val2)
 {
   uint32_t a,b,c;
   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
 
   /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
+  a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2;
 
   u.ptr = key;
   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
@@ -769,23 +535,167 @@ static uint32_t hashbig( const void *key, size_t length, uint32_t initval)
   }
 
   final(a,b,c);
+  *val2 = b;
   return c;
 }
 
-uint32_t hash_any_stable(const void *key, size_t length, uint32_t base)
+/* I basically use hashlittle here, but use native endian within each
+ * element.  This delivers least-surprise: hash such as "int arr[] = {
+ * 1, 2 }; hash_stable(arr, 2, 0);" will be the same on big and little
+ * endian machines, even though a bytewise hash wouldn't be. */
+uint64_t hash64_stable_64(const void *key, size_t n, uint32_t base)
+{
+	const uint64_t *k = key;
+	uint32_t a,b,c;
+
+	/* Set up the internal state */
+	a = b = c = 0xdeadbeef + ((uint32_t)n*8) + base;
+
+	while (n > 3) {
+		a += (uint32_t)k[0];
+		b += (uint32_t)(k[0] >> 32);
+		c += (uint32_t)k[1];
+		mix(a,b,c);
+		a += (uint32_t)(k[1] >> 32);
+		b += (uint32_t)k[2];
+		c += (uint32_t)(k[2] >> 32);
+		mix(a,b,c);
+		n -= 3;
+		k += 3;
+	}
+	switch (n) {
+	case 2:
+		a += (uint32_t)k[0];
+		b += (uint32_t)(k[0] >> 32);
+		c += (uint32_t)k[1];
+		mix(a,b,c);
+		a += (uint32_t)(k[1] >> 32);
+		break;
+	case 1:
+		a += (uint32_t)k[0];
+		b += (uint32_t)(k[0] >> 32);
+		break;
+	case 0:
+		return c;
+	}
+	final(a,b,c);
+	return ((uint64_t)b << 32) | c;
+}
+
+uint64_t hash64_stable_32(const void *key, size_t n, uint32_t base)
+{
+	const uint32_t *k = key;
+	uint32_t a,b,c;
+
+	/* Set up the internal state */
+	a = b = c = 0xdeadbeef + ((uint32_t)n*4) + base;
+
+	while (n > 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+		mix(a,b,c);
+
+		n -= 3;
+		k += 3;
+	}
+	switch (n) {
+	case 2:
+		b += (uint32_t)k[1];
+	case 1:
+		a += (uint32_t)k[0];
+ 		break;
+	case 0:
+		return c;
+	}
+	final(a,b,c);
+	return ((uint64_t)b << 32) | c;
+}
+
+uint64_t hash64_stable_16(const void *key, size_t n, uint32_t base)
+{
+	const uint16_t *k = key;
+	uint32_t a,b,c;
+
+	/* Set up the internal state */
+	a = b = c = 0xdeadbeef + ((uint32_t)n*2) + base;
+
+	while (n > 6) {
+		a += (uint32_t)k[0] + ((uint32_t)k[1] << 16);
+		b += (uint32_t)k[2] + ((uint32_t)k[3] << 16);
+		c += (uint32_t)k[4] + ((uint32_t)k[5] << 16);
+		mix(a,b,c);
+
+		n -= 6;
+		k += 6;
+	}
+
+	switch (n) {
+	case 5:
+		c += (uint32_t)k[4];
+	case 4:
+		b += ((uint32_t)k[3] << 16);
+	case 3:
+		b += (uint32_t)k[2];
+	case 2:
+		a += ((uint32_t)k[1] << 16);
+	case 1:
+		a += (uint32_t)k[0];
+		break;
+	case 0:
+		return c;
+	}
+	final(a,b,c);
+	return ((uint64_t)b << 32) | c;
+}
+	
+uint64_t hash64_stable_8(const void *key, size_t n, uint32_t base)
 {
-	/* We use hashlittle as our stable hash. */
-	return hashlittle(key, length, base);
+	uint32_t lower = hashlittle(key, n, &base);
+
+	return ((uint64_t)base << 32) | lower;	
 }
 
 uint32_t hash_any(const void *key, size_t length, uint32_t base)
 {
 	if (HASH_BIG_ENDIAN)
-		return hashbig(key, length, base);
+		return hashbig(key, length, &base);
 	else
-		/* We call hash_any_stable not hashlittle.  This way we know
-		 * that hashlittle will be inlined in hash_any_stable. */
-		return hash_any_stable(key, length, base);
+		return hashlittle(key, length, &base);
+}
+
+uint32_t hash_stable_64(const void *key, size_t n, uint32_t base)
+{
+	return hash64_stable_64(key, n, base);
+}
+
+uint32_t hash_stable_32(const void *key, size_t n, uint32_t base)
+{
+	return hash64_stable_32(key, n, base);
+}
+
+uint32_t hash_stable_16(const void *key, size_t n, uint32_t base)
+{
+	return hash64_stable_16(key, n, base);
+}
+
+uint32_t hash_stable_8(const void *key, size_t n, uint32_t base)
+{
+	return hashlittle(key, n, &base);
+}
+
+/* Jenkins' lookup8 is a 64 bit hash, but he says it's obsolete.  Use
+ * the plain one and recombine into 64 bits. */
+uint64_t hash64_any(const void *key, size_t length, uint32_t base)
+{
+	uint32_t lower;
+
+	if (HASH_BIG_ENDIAN)
+		lower = hashbig(key, length, &base);
+	else
+		lower = hashlittle(key, length, &base);
+
+	return ((uint64_t)base << 32) | lower;
 }
 
 #ifdef SELF_TEST