From: Rusty Russell Date: Mon, 16 Aug 2010 06:07:19 +0000 (+0930) Subject: hash: 64 bit variants. X-Git-Url: http://git.ozlabs.org/?p=ccan;a=commitdiff_plain;h=7449ae0ff0ef5c96b7a76010986e00ebb28b2a65 hash: 64 bit variants. --- diff --git a/ccan/hash/_info b/ccan/hash/_info index 80f52f15..399edbe8 100644 --- a/ccan/hash/_info +++ b/ccan/hash/_info @@ -1,4 +1,5 @@ #include +#include /** * hash - routines for hashing bytes @@ -22,6 +23,7 @@ int main(int argc, char *argv[]) return 1; if (strcmp(argv[1], "depends") == 0) { + printf("ccan/build_assert\n"); return 0; } diff --git a/ccan/hash/hash.c b/ccan/hash/hash.c index 5e106ad3..cb446540 100644 --- a/ccan/hash/hash.c +++ b/ccan/hash/hash.c @@ -208,63 +208,16 @@ uint32_t initval) /* the previous hash, or an arbitrary value */ return c; } - -#if 0 -/* --------------------------------------------------------------------- -hash_word2() -- same as hash_word(), but take two seeds and return two -32-bit values. pc and pb must both be nonnull, and *pc and *pb must -both be initialized with seeds. If you pass in (*pb)==0, the output -(*pc) will be the same as the return value from hash_word(). --------------------------------------------------------------------- -*/ -void hash_word2 ( -const uint32_t *k, /* the key, an array of uint32_t values */ -size_t length, /* the length of the key, in uint32_ts */ -uint32_t *pc, /* IN: seed OUT: primary hash value */ -uint32_t *pb) /* IN: more seed OUT: secondary hash value */ -{ - uint32_t a,b,c; - - /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; - c += *pb; - - /*------------------------------------------------- handle most of the key */ - while (length > 3) - { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 3; - k += 3; - } - - /*------------------------------------------- handle the last 3 uint32_t's */ - switch(length) /* all the case statements fall through */ - { - case 3 : c+=k[2]; - case 2 : b+=k[1]; - case 1 : a+=k[0]; - final(a,b,c); - case 0: /* case 0: nothing left to add */ - break; - } - /*------------------------------------------------------ report the result */ - *pc=c; *pb=b; -} -#endif - /* ------------------------------------------------------------------------------- hashlittle() -- hash a variable-length key into a 32-bit value k : the key (the unaligned variable-length array of bytes) length : the length of the key, counting by bytes - initval : can be any 4-byte value + val2 : IN: can be any 4-byte value OUT: second 32 bit hash. Returns a 32-bit value. Every bit of the key affects every bit of the return value. Two keys differing by one or two bits will have -totally different hash values. +totally different hash values. Note that the return value is better +mixed than val2, so use that first. The best hash table sizes are powers of 2. There is no need to do mod a prime (mod is sooo slow!). If you need less than 32 bits, @@ -283,13 +236,13 @@ acceptable. Do NOT use for cryptographic purposes. ------------------------------------------------------------------------------- */ -static uint32_t hashlittle( const void *key, size_t length, uint32_t initval) +static uint32_t hashlittle( const void *key, size_t length, uint32_t *val2 ) { uint32_t a,b,c; /* internal state */ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2; u.ptr = key; if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { @@ -451,209 +404,23 @@ static uint32_t hashlittle( const void *key, size_t length, uint32_t initval) } final(a,b,c); + *val2 = b; return c; } -#if 0 -/* - * hashlittle2: return 2 32-bit hash values - * - * This is identical to hashlittle(), except it returns two 32-bit hash - * values instead of just one. This is good enough for hash table - * lookup with 2^^64 buckets, or if you want a second hash if you're not - * happy with the first, or if you want a probably-unique 64-bit ID for - * the key. *pc is better mixed than *pb, so use *pc first. If you want - * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". - */ -void hashlittle2( - const void *key, /* the key to hash */ - size_t length, /* length of the key */ - uint32_t *pc, /* IN: primary initval, OUT: primary hash */ - uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */ -{ - uint32_t a,b,c; /* internal state */ - union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ - - /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; - c += *pb; - - u.ptr = key; - if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { - const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ - const uint8_t *k8; - - /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ - while (length > 12) - { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 12; - k += 3; - } - - /*----------------------------- handle the last (probably partial) block */ - /* - * "k[2]&0xffffff" actually reads beyond the end of the string, but - * then masks off the part it's not allowed to read. Because the - * string is aligned, the masked-off tail is in the same word as the - * rest of the string. Every machine with memory protection I've seen - * does it on word boundaries, so is OK with this. But VALGRIND will - * still catch it and complain. The masking trick does make the hash - * noticably faster for short strings (like English words). - */ -#ifndef VALGRIND - - switch(length) - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; - case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; - case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=k[1]&0xffffff; a+=k[0]; break; - case 6 : b+=k[1]&0xffff; a+=k[0]; break; - case 5 : b+=k[1]&0xff; a+=k[0]; break; - case 4 : a+=k[0]; break; - case 3 : a+=k[0]&0xffffff; break; - case 2 : a+=k[0]&0xffff; break; - case 1 : a+=k[0]&0xff; break; - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ - } - -#else /* make valgrind happy */ - - k8 = (const uint8_t *)k; - switch(length) - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ - case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ - case 9 : c+=k8[8]; /* fall through */ - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ - case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ - case 5 : b+=k8[4]; /* fall through */ - case 4 : a+=k[0]; break; - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ - case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ - case 1 : a+=k8[0]; break; - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ - } - -#endif /* !valgrind */ - - } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { - const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ - const uint8_t *k8; - - /*--------------- all but last block: aligned reads and different mixing */ - while (length > 12) - { - a += k[0] + (((uint32_t)k[1])<<16); - b += k[2] + (((uint32_t)k[3])<<16); - c += k[4] + (((uint32_t)k[5])<<16); - mix(a,b,c); - length -= 12; - k += 6; - } - - /*----------------------------- handle the last (probably partial) block */ - k8 = (const uint8_t *)k; - switch(length) - { - case 12: c+=k[4]+(((uint32_t)k[5])<<16); - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ - case 10: c+=k[4]; - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 9 : c+=k8[8]; /* fall through */ - case 8 : b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ - case 6 : b+=k[2]; - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 5 : b+=k8[4]; /* fall through */ - case 4 : a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ - case 2 : a+=k[0]; - break; - case 1 : a+=k8[0]; - break; - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ - } - - } else { /* need to read the key one byte at a time */ - const uint8_t *k = (const uint8_t *)key; - - /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ - while (length > 12) - { - a += k[0]; - a += ((uint32_t)k[1])<<8; - a += ((uint32_t)k[2])<<16; - a += ((uint32_t)k[3])<<24; - b += k[4]; - b += ((uint32_t)k[5])<<8; - b += ((uint32_t)k[6])<<16; - b += ((uint32_t)k[7])<<24; - c += k[8]; - c += ((uint32_t)k[9])<<8; - c += ((uint32_t)k[10])<<16; - c += ((uint32_t)k[11])<<24; - mix(a,b,c); - length -= 12; - k += 12; - } - - /*-------------------------------- last block: affect all 32 bits of (c) */ - switch(length) /* all the case statements fall through */ - { - case 12: c+=((uint32_t)k[11])<<24; - case 11: c+=((uint32_t)k[10])<<16; - case 10: c+=((uint32_t)k[9])<<8; - case 9 : c+=k[8]; - case 8 : b+=((uint32_t)k[7])<<24; - case 7 : b+=((uint32_t)k[6])<<16; - case 6 : b+=((uint32_t)k[5])<<8; - case 5 : b+=k[4]; - case 4 : a+=((uint32_t)k[3])<<24; - case 3 : a+=((uint32_t)k[2])<<16; - case 2 : a+=((uint32_t)k[1])<<8; - case 1 : a+=k[0]; - break; - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ - } - } - - final(a,b,c); - *pc=c; *pb=b; -} -#endif - - /* * hashbig(): * This is the same as hash_word() on big-endian machines. It is different * from hashlittle() on all machines. hashbig() takes advantage of * big-endian byte ordering. */ -static uint32_t hashbig( const void *key, size_t length, uint32_t initval) +static uint32_t hashbig( const void *key, size_t length, uint32_t *val2) { uint32_t a,b,c; union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2; u.ptr = key; if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { @@ -768,6 +535,7 @@ static uint32_t hashbig( const void *key, size_t length, uint32_t initval) } final(a,b,c); + *val2 = b; return c; } @@ -775,7 +543,7 @@ static uint32_t hashbig( const void *key, size_t length, uint32_t initval) * element. This delivers least-surprise: hash such as "int arr[] = { * 1, 2 }; hash_stable(arr, 2, 0);" will be the same on big and little * endian machines, even though a bytewise hash wouldn't be. */ -uint32_t hash_stable_64(const void *key, size_t n, uint32_t base) +uint64_t hash64_stable_64(const void *key, size_t n, uint32_t base) { const uint64_t *k = key; uint32_t a,b,c; @@ -811,10 +579,10 @@ uint32_t hash_stable_64(const void *key, size_t n, uint32_t base) return c; } final(a,b,c); - return c; + return ((uint64_t)b << 32) | c; } -uint32_t hash_stable_32(const void *key, size_t n, uint32_t base) +uint64_t hash64_stable_32(const void *key, size_t n, uint32_t base) { const uint32_t *k = key; uint32_t a,b,c; @@ -841,10 +609,10 @@ uint32_t hash_stable_32(const void *key, size_t n, uint32_t base) return c; } final(a,b,c); - return c; + return ((uint64_t)b << 32) | c; } -uint32_t hash_stable_16(const void *key, size_t n, uint32_t base) +uint64_t hash64_stable_16(const void *key, size_t n, uint32_t base) { const uint16_t *k = key; uint32_t a,b,c; @@ -878,20 +646,56 @@ uint32_t hash_stable_16(const void *key, size_t n, uint32_t base) return c; } final(a,b,c); - return c; + return ((uint64_t)b << 32) | c; } -uint32_t hash_stable_8(const void *key, size_t n, uint32_t base) +uint64_t hash64_stable_8(const void *key, size_t n, uint32_t base) { - return hashlittle(key, n, base); + uint32_t lower = hashlittle(key, n, &base); + + return ((uint64_t)base << 32) | lower; } uint32_t hash_any(const void *key, size_t length, uint32_t base) { if (HASH_BIG_ENDIAN) - return hashbig(key, length, base); + return hashbig(key, length, &base); else - return hashlittle(key, length, base); + return hashlittle(key, length, &base); +} + +uint32_t hash_stable_64(const void *key, size_t n, uint32_t base) +{ + return hash64_stable_64(key, n, base); +} + +uint32_t hash_stable_32(const void *key, size_t n, uint32_t base) +{ + return hash64_stable_32(key, n, base); +} + +uint32_t hash_stable_16(const void *key, size_t n, uint32_t base) +{ + return hash64_stable_16(key, n, base); +} + +uint32_t hash_stable_8(const void *key, size_t n, uint32_t base) +{ + return hashlittle(key, n, &base); +} + +/* Jenkins' lookup8 is a 64 bit hash, but he says it's obsolete. Use + * the plain one and recombine into 64 bits. */ +uint64_t hash64_any(const void *key, size_t length, uint32_t base) +{ + uint32_t lower; + + if (HASH_BIG_ENDIAN) + lower = hashbig(key, length, &base); + else + lower = hashlittle(key, length, &base); + + return ((uint64_t)base << 32) | lower; } #ifdef SELF_TEST diff --git a/ccan/hash/hash.h b/ccan/hash/hash.h index fd09f674..1c531cf9 100644 --- a/ccan/hash/hash.h +++ b/ccan/hash/hash.h @@ -3,6 +3,7 @@ #include #include #include "config.h" +#include /* Stolen mostly from: lookup3.c, by Bob Jenkins, May 2006, Public Domain. * @@ -25,7 +26,7 @@ * It may also change with future versions: it could even detect at runtime * what the fastest hash to use is. * - * See also: hash_stable. + * See also: hash64, hash_stable. * * Example: * #include "hash/hash.h" @@ -69,6 +70,9 @@ * memory representations of integers depend on the machine * endianness. * + * See also: + * hash64_stable + * * Example: * #include "hash/hash.h" * #include @@ -85,11 +89,12 @@ * } */ #define hash_stable(p, num, base) \ - (sizeof(*(p)) == 8 ? hash_stable_64((p), (num), (base)) \ + (EXPR_BUILD_ASSERT(sizeof(*(p)) == 8 || sizeof(*(p)) == 4 \ + || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) + \ + sizeof(*(p)) == 8 ? hash_stable_64((p), (num), (base)) \ : sizeof(*(p)) == 4 ? hash_stable_32((p), (num), (base)) \ : sizeof(*(p)) == 2 ? hash_stable_16((p), (num), (base)) \ - : sizeof(*(p)) == 1 ? hash_stable_8((p), (num), (base)) \ - : hash_stable_fail((p), (num), sizeof(*(p)), (base))) + : hash_stable_8((p), (num), (base))) /** * hash_u32 - fast hash an array of 32-bit values for internal use @@ -107,6 +112,18 @@ */ uint32_t hash_u32(const uint32_t *key, size_t num, uint32_t base); +/* Our underlying operations. */ +uint32_t hash_any(const void *key, size_t length, uint32_t base); +uint32_t hash_stable_64(const void *key, size_t n, uint32_t base); +uint32_t hash_stable_32(const void *key, size_t n, uint32_t base); +uint32_t hash_stable_16(const void *key, size_t n, uint32_t base); +uint32_t hash_stable_8(const void *key, size_t n, uint32_t base); +uint64_t hash64_any(const void *key, size_t length, uint32_t base); +uint64_t hash64_stable_64(const void *key, size_t n, uint32_t base); +uint64_t hash64_stable_32(const void *key, size_t n, uint32_t base); +uint64_t hash64_stable_16(const void *key, size_t n, uint32_t base); +uint64_t hash64_stable_8(const void *key, size_t n, uint32_t base); + /** * hash_string - very fast hash of an ascii string * @str: the nul-terminated string @@ -132,14 +149,6 @@ static inline uint32_t hash_string(const char *string) return ret; } -/* Our underlying operations. */ -uint32_t hash_any(const void *key, size_t length, uint32_t base); -uint32_t hash_stable_64(const void *key, size_t n, uint32_t base); -uint32_t hash_stable_32(const void *key, size_t n, uint32_t base); -uint32_t hash_stable_16(const void *key, size_t n, uint32_t base); -uint32_t hash_stable_8(const void *key, size_t n, uint32_t base); -uint32_t hash_stable_fail(const void *key, size_t n, size_t len, uint32_t base); - /** * hash_pointer - hash a pointer for internal use * @p: the pointer value to hash @@ -195,4 +204,106 @@ static inline uint32_t hash_pointer(const void *p, uint32_t base) } else return hash(&p, 1, base); } + +/** + * hash64 - fast 64-bit hash of an array for internal use + * @p: the array or pointer to first element + * @num: the number of elements to hash + * @base: the base number to roll into the hash (usually 0) + * + * The memory region pointed to by p is combined with the base to form + * a 64-bit hash. + * + * This hash will have different results on different machines, so is + * only useful for internal hashes (ie. not hashes sent across the + * network or saved to disk). + * + * It may also change with future versions: it could even detect at runtime + * what the fastest hash to use is. + * + * See also: hash. + * + * Example: + * #include + * #include + * #include + * + * // Simple demonstration: idential strings will have the same hash, but + * // two different strings will probably not. + * int main(int argc, char *argv[]) + * { + * uint64_t hash1, hash2; + * + * if (argc != 3) + * err(1, "Usage: %s ", argv[0]); + * + * hash1 = hash64(argv[1], strlen(argv[1]), 0); + * hash2 = hash64(argv[2], strlen(argv[2]), 0); + * printf("Hash is %s\n", hash1 == hash2 ? "same" : "different"); + * return 0; + * } + */ +#define hash64(p, num, base) hash64_any((p), (num)*sizeof(*(p)), (base)) + +/** + * hash64_stable - 64 bit hash of an array for external use + * @p: the array or pointer to first element + * @num: the number of elements to hash + * @base: the base number to roll into the hash (usually 0) + * + * The array of simple integer types pointed to by p is combined with + * the base to form a 64-bit hash. + * + * This hash will have the same results on different machines, so can + * be used for external hashes (ie. hashes sent across the network or + * saved to disk). The results will not change in future versions of + * this module. + * + * Note that it is only legal to hand an array of simple integer types + * to this hash (ie. char, uint16_t, int64_t, etc). In these cases, + * the same values will have the same hash result, even though the + * memory representations of integers depend on the machine + * endianness. + * + * See also: + * hash_stable + * + * Example: + * #include + * #include + * #include + * + * int main(int argc, char *argv[]) + * { + * if (argc != 2) + * err(1, "Usage: %s ", argv[0]); + * + * printf("Hash stable result is %llu\n", + * (long long)hash64_stable(argv[1], strlen(argv[1]), 0)); + * return 0; + * } + */ +#define hash64_stable(p, num, base) \ + (EXPR_BUILD_ASSERT(sizeof(*(p)) == 8 || sizeof(*(p)) == 4 \ + || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) + \ + sizeof(*(p)) == 8 ? hash64_stable_64((p), (num), (base)) \ + : sizeof(*(p)) == 4 ? hash64_stable_32((p), (num), (base)) \ + : sizeof(*(p)) == 2 ? hash64_stable_16((p), (num), (base)) \ + : hash64_stable_8((p), (num), (base))) + + +/** + * hashl - fast 32/64-bit hash of an array for internal use + * @p: the array or pointer to first element + * @num: the number of elements to hash + * @base: the base number to roll into the hash (usually 0) + * + * This is either hash() or hash64(), on 32/64 bit long machines. + */ +#define hashl(p, num, base) \ + (EXPR_BUILD_ASSERT(sizeof(long) == sizeof(uint32_t) \ + || sizeof(long) == sizeof(uint64_t)) + \ + (sizeof(long) == sizeof(uint64_t) \ + ? hash64((p), (num), (base)) : hash((p), (num), (base)))) + #endif /* HASH_H */ diff --git a/ccan/hash/test/api-hash_stable.c b/ccan/hash/test/api-hash_stable.c index d07c4a93..bb58d16b 100644 --- a/ccan/hash/test/api-hash_stable.c +++ b/ccan/hash/test/api-hash_stable.c @@ -21,7 +21,7 @@ int main(int argc, char *argv[]) u64array[i] = i; } - plan_tests(132); + plan_tests(264); /* hash_stable is API-guaranteed. */ ok1(hash_stable(u8array, ARRAY_WORDS, 0) == 0x1d4833cc); @@ -160,5 +160,141 @@ int main(int argc, char *argv[]) ok1(hash_stable(u64array, ARRAY_WORDS, 1073741824) == 0x1b346394); ok1(hash_stable(u64array, ARRAY_WORDS, 2147483648U) == 0x6c3a1592); + ok1(hash64_stable(u8array, ARRAY_WORDS, 0) == 16887282882572727244ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 1) == 12032777473133454818ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 2) == 18183407363221487738ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 4) == 17860764172704150171ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 8) == 18076051600675559233ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 16) == 9909361918431556721ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 32) == 12937969888744675813ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 64) == 5245669057381736951ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 128) == 4376874646406519665ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 256) == 14219974419871569521ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 512) == 2263415354134458951ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 1024) == 4953859694526221685ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 2048) == 3432228642067641593ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 4096) == 1219647244417697483ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 8192) == 7629939424585859553ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 16384) == 10041660531376789749ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 32768) == 13859885793922603927ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 65536) == 15069060338344675120ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 131072) == 818163430835601100ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 262144) == 14914314323019517069ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 524288) == 17518437749769352214ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 1048576) == 14920048004901212706ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 2097152) == 8758567366332536138ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 4194304) == 6226655736088907885ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 8388608) == 13716650013685832100ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 16777216) == 305325651636315638ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 33554432) == 16784147606583781671ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 67108864) == 16509467555140798205ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 134217728) == 8717281234694060584ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 268435456) == 8098476701725660537ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 536870912) == 16345871539461094006ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 1073741824) == 3755557000429964408ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 2147483648U) == 15017348801959710081ULL); + + ok1(hash64_stable(u16array, ARRAY_WORDS, 0) == 1038028831307724039ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 1) == 10155473272642627302ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 2) == 5714751190106841420ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 4) == 3923885607767527866ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 8) == 3931017318293995558ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 16) == 1469696588339313177ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 32) == 11522218526952715051ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 64) == 6953517591561958496ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 128) == 7406689491740052867ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 256) == 10101844489704093104ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 512) == 12511348870707245959ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 1024) == 1614019938016861468ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 2048) == 5294796182374592721ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 4096) == 16089570706643716675ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 8192) == 1689302638424579464ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 16384) == 1446340172370386893ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 32768) == 16535503506744393039ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 65536) == 3496794142527150328ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 131072) == 6568245367474548504ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 262144) == 9487676460765485949ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 524288) == 4519762130966530000ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 1048576) == 15623412069215340610ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 2097152) == 544013388676438108ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 4194304) == 5594904760290840266ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 8388608) == 18098755780041592043ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 16777216) == 6389168672387330316ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 33554432) == 896986127732419381ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 67108864) == 13232626471143901354ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 134217728) == 53378562890493093ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 268435456) == 10072361400297824771ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 536870912) == 14511948118285144529ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 1073741824) == 6981033484844447277ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 2147483648U) == 5619339091684126808ULL); + + ok1(hash64_stable(u32array, ARRAY_WORDS, 0) == 3037571077312110476ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 1) == 14732398743825071988ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 2) == 14949132158206672071ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 4) == 1291370080511561429ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 8) == 10792665964172133092ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 16) == 14250138032054339435ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 32) == 17136741522078732741ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 64) == 3260193403318236635ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 128) == 10526616652205653536ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 256) == 9019690373358576579ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 512) == 6997491436599677436ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 1024) == 18302783371416533798ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 2048) == 10149320644446516025ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 4096) == 7073759949410623868ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 8192) == 17442399482223760073ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 16384) == 2983906194216281861ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 32768) == 4975845419129060524ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 65536) == 594019910205413268ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 131072) == 11903010186073691112ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 262144) == 7339636527154847008ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 524288) == 15243305400579108736ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 1048576) == 16737926245392043198ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 2097152) == 15725083267699862972ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 4194304) == 12527834265678833794ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 8388608) == 13908436455987824848ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 16777216) == 9672773345173872588ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 33554432) == 2305314279896710501ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 67108864) == 1866733780381408751ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 134217728) == 11906263969465724709ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 268435456) == 5501594918093830069ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 536870912) == 15823785789276225477ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 1073741824) == 17353000723889475410ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 2147483648U) == 7494736910655503182ULL); + + ok1(hash64_stable(u64array, ARRAY_WORDS, 0) == 9765419389786481410ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 1) == 11182806172127114246ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 2) == 2559155171395472619ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 4) == 3311692033324815378ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 8) == 1297175419505333844ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 16) == 617896928653569210ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 32) == 1517398559958603553ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 64) == 4504821917445110758ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 128) == 1971743331114904452ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 256) == 6177667912354374306ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 512) == 15570521289777792458ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 1024) == 9204559632415917331ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 2048) == 9008982669760028237ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 4096) == 14803537660281700281ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 8192) == 2873966517448487327ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 16384) == 5859277625928363661ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 32768) == 15520461285618185970ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 65536) == 16746489793331175369ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 131072) == 514952025484227461ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 262144) == 10867212269810675249ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 524288) == 9822204377278314587ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 1048576) == 3295088921987850465ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 2097152) == 7559197431498053712ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 4194304) == 1667267269116771849ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 8388608) == 2916804068951374862ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 16777216) == 14422558383125688561ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 33554432) == 10083112683694342602ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 67108864) == 7222777647078298513ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 134217728) == 18424513674048212529ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 268435456) == 14913668581101810784ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 536870912) == 14377721174297902048ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 1073741824) == 6031715005667500948ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 2147483648U) == 4827100319722378642ULL); + return exit_status(); } diff --git a/ccan/hash/test/run.c b/ccan/hash/test/run.c index 2aeec42d..31d4ecb5 100644 --- a/ccan/hash/test/run.c +++ b/ccan/hash/test/run.c @@ -17,7 +17,7 @@ int main(int argc, char *argv[]) for (i = 0; i < ARRAY_WORDS; i++) array[i] = i; - plan_tests(22); + plan_tests(39); /* Hash should be the same, indep of memory alignment. */ val = hash(array, sizeof(array), 0); for (i = 0; i < sizeof(uint32_t); i++) { @@ -51,6 +51,31 @@ int main(int argc, char *argv[]) diag("Byte %i, range %u-%u", i, lowest, highest); } + /* Hash of random values should have random distribution: + * check one byte at a time. */ + for (i = 0; i < sizeof(uint64_t); i++) { + unsigned int lowest = -1U, highest = 0; + + memset(results, 0, sizeof(results)); + + for (j = 0; j < 256000; j++) { + for (k = 0; k < ARRAY_WORDS; k++) + array[k] = random(); + results[(hash64(array, sizeof(array), 0) >> i*8)&0xFF]++; + } + + for (j = 0; j < 256; j++) { + if (results[j] < lowest) + lowest = results[j]; + if (results[j] > highest) + highest = results[j]; + } + /* Expect within 20% */ + ok(lowest > 800, "Byte %i lowest %i", i, lowest); + ok(highest < 1200, "Byte %i highest %i", i, highest); + diag("Byte %i, range %u-%u", i, lowest, highest); + } + /* Hash of pointer values should also have random distribution. */ for (i = 0; i < sizeof(uint32_t); i++) { unsigned int lowest = -1U, highest = 0; @@ -75,6 +100,13 @@ int main(int argc, char *argv[]) diag("hash_pointer byte %i, range %u-%u", i, lowest, highest); } + if (sizeof(long) == sizeof(uint32_t)) + ok1(hashl(array, sizeof(array), 0) + == hash(array, sizeof(array), 0)); + else + ok1(hashl(array, sizeof(array), 0) + == hash64(array, sizeof(array), 0)); + /* String hash: weak, so only test bottom byte */ for (i = 0; i < 1; i++) { unsigned int num = 0, cursor, lowest = -1U, highest = 0;