X-Git-Url: https://git.ozlabs.org/?p=ccan;a=blobdiff_plain;f=ccan%2Fhash%2Fhash.c;h=9b3f58b2b20a649785b7f053c4c7828413610148;hp=104a2958ff26b7597827f5593a73412fe61e7da0;hb=c4c5fed020ba44b9930119672a36a1cb33aff090;hpb=a86b21063f9627872914961b651f97be0aadad12;ds=sidebyside diff --git a/ccan/hash/hash.c b/ccan/hash/hash.c index 104a2958..9b3f58b2 100644 --- a/ccan/hash/hash.c +++ b/ccan/hash/hash.c @@ -42,7 +42,7 @@ on 1 byte), but shoehorning those bytes into integers efficiently is messy. #include /* attempt to define endianness */ #endif -#include "hash/hash.h" +#include "hash.h" #ifdef linux # include /* attempt to define endianness */ #endif @@ -208,63 +208,16 @@ uint32_t initval) /* the previous hash, or an arbitrary value */ return c; } - -#if 0 -/* --------------------------------------------------------------------- -hash_word2() -- same as hash_word(), but take two seeds and return two -32-bit values. pc and pb must both be nonnull, and *pc and *pb must -both be initialized with seeds. If you pass in (*pb)==0, the output -(*pc) will be the same as the return value from hash_word(). --------------------------------------------------------------------- -*/ -void hash_word2 ( -const uint32_t *k, /* the key, an array of uint32_t values */ -size_t length, /* the length of the key, in uint32_ts */ -uint32_t *pc, /* IN: seed OUT: primary hash value */ -uint32_t *pb) /* IN: more seed OUT: secondary hash value */ -{ - uint32_t a,b,c; - - /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; - c += *pb; - - /*------------------------------------------------- handle most of the key */ - while (length > 3) - { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 3; - k += 3; - } - - /*------------------------------------------- handle the last 3 uint32_t's */ - switch(length) /* all the case statements fall through */ - { - case 3 : c+=k[2]; - case 2 : b+=k[1]; - case 1 : a+=k[0]; - final(a,b,c); - case 0: /* case 0: nothing left to add */ - break; - } - /*------------------------------------------------------ report the result */ - *pc=c; *pb=b; -} -#endif - /* ------------------------------------------------------------------------------- hashlittle() -- hash a variable-length key into a 32-bit value k : the key (the unaligned variable-length array of bytes) length : the length of the key, counting by bytes - initval : can be any 4-byte value + val2 : IN: can be any 4-byte value OUT: second 32 bit hash. Returns a 32-bit value. Every bit of the key affects every bit of the return value. Two keys differing by one or two bits will have -totally different hash values. +totally different hash values. Note that the return value is better +mixed than val2, so use that first. The best hash table sizes are powers of 2. There is no need to do mod a prime (mod is sooo slow!). If you need less than 32 bits, @@ -283,13 +236,13 @@ acceptable. Do NOT use for cryptographic purposes. ------------------------------------------------------------------------------- */ -static uint32_t hashlittle( const void *key, size_t length, uint32_t initval) +static uint32_t hashlittle( const void *key, size_t length, uint32_t *val2 ) { uint32_t a,b,c; /* internal state */ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2; u.ptr = key; if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { @@ -451,209 +404,23 @@ static uint32_t hashlittle( const void *key, size_t length, uint32_t initval) } final(a,b,c); + *val2 = b; return c; } -#if 0 -/* - * hashlittle2: return 2 32-bit hash values - * - * This is identical to hashlittle(), except it returns two 32-bit hash - * values instead of just one. This is good enough for hash table - * lookup with 2^^64 buckets, or if you want a second hash if you're not - * happy with the first, or if you want a probably-unique 64-bit ID for - * the key. *pc is better mixed than *pb, so use *pc first. If you want - * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". - */ -void hashlittle2( - const void *key, /* the key to hash */ - size_t length, /* length of the key */ - uint32_t *pc, /* IN: primary initval, OUT: primary hash */ - uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */ -{ - uint32_t a,b,c; /* internal state */ - union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ - - /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; - c += *pb; - - u.ptr = key; - if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { - const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ - const uint8_t *k8; - - /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ - while (length > 12) - { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 12; - k += 3; - } - - /*----------------------------- handle the last (probably partial) block */ - /* - * "k[2]&0xffffff" actually reads beyond the end of the string, but - * then masks off the part it's not allowed to read. Because the - * string is aligned, the masked-off tail is in the same word as the - * rest of the string. Every machine with memory protection I've seen - * does it on word boundaries, so is OK with this. But VALGRIND will - * still catch it and complain. The masking trick does make the hash - * noticably faster for short strings (like English words). - */ -#ifndef VALGRIND - - switch(length) - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; - case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; - case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=k[1]&0xffffff; a+=k[0]; break; - case 6 : b+=k[1]&0xffff; a+=k[0]; break; - case 5 : b+=k[1]&0xff; a+=k[0]; break; - case 4 : a+=k[0]; break; - case 3 : a+=k[0]&0xffffff; break; - case 2 : a+=k[0]&0xffff; break; - case 1 : a+=k[0]&0xff; break; - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ - } - -#else /* make valgrind happy */ - - k8 = (const uint8_t *)k; - switch(length) - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ - case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ - case 9 : c+=k8[8]; /* fall through */ - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ - case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ - case 5 : b+=k8[4]; /* fall through */ - case 4 : a+=k[0]; break; - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ - case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ - case 1 : a+=k8[0]; break; - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ - } - -#endif /* !valgrind */ - - } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { - const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ - const uint8_t *k8; - - /*--------------- all but last block: aligned reads and different mixing */ - while (length > 12) - { - a += k[0] + (((uint32_t)k[1])<<16); - b += k[2] + (((uint32_t)k[3])<<16); - c += k[4] + (((uint32_t)k[5])<<16); - mix(a,b,c); - length -= 12; - k += 6; - } - - /*----------------------------- handle the last (probably partial) block */ - k8 = (const uint8_t *)k; - switch(length) - { - case 12: c+=k[4]+(((uint32_t)k[5])<<16); - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ - case 10: c+=k[4]; - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 9 : c+=k8[8]; /* fall through */ - case 8 : b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ - case 6 : b+=k[2]; - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 5 : b+=k8[4]; /* fall through */ - case 4 : a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ - case 2 : a+=k[0]; - break; - case 1 : a+=k8[0]; - break; - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ - } - - } else { /* need to read the key one byte at a time */ - const uint8_t *k = (const uint8_t *)key; - - /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ - while (length > 12) - { - a += k[0]; - a += ((uint32_t)k[1])<<8; - a += ((uint32_t)k[2])<<16; - a += ((uint32_t)k[3])<<24; - b += k[4]; - b += ((uint32_t)k[5])<<8; - b += ((uint32_t)k[6])<<16; - b += ((uint32_t)k[7])<<24; - c += k[8]; - c += ((uint32_t)k[9])<<8; - c += ((uint32_t)k[10])<<16; - c += ((uint32_t)k[11])<<24; - mix(a,b,c); - length -= 12; - k += 12; - } - - /*-------------------------------- last block: affect all 32 bits of (c) */ - switch(length) /* all the case statements fall through */ - { - case 12: c+=((uint32_t)k[11])<<24; - case 11: c+=((uint32_t)k[10])<<16; - case 10: c+=((uint32_t)k[9])<<8; - case 9 : c+=k[8]; - case 8 : b+=((uint32_t)k[7])<<24; - case 7 : b+=((uint32_t)k[6])<<16; - case 6 : b+=((uint32_t)k[5])<<8; - case 5 : b+=k[4]; - case 4 : a+=((uint32_t)k[3])<<24; - case 3 : a+=((uint32_t)k[2])<<16; - case 2 : a+=((uint32_t)k[1])<<8; - case 1 : a+=k[0]; - break; - case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ - } - } - - final(a,b,c); - *pc=c; *pb=b; -} -#endif - - /* * hashbig(): * This is the same as hash_word() on big-endian machines. It is different * from hashlittle() on all machines. hashbig() takes advantage of * big-endian byte ordering. */ -static uint32_t hashbig( const void *key, size_t length, uint32_t initval) +static uint32_t hashbig( const void *key, size_t length, uint32_t *val2) { uint32_t a,b,c; union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2; u.ptr = key; if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { @@ -768,6 +535,7 @@ static uint32_t hashbig( const void *key, size_t length, uint32_t initval) } final(a,b,c); + *val2 = b; return c; } @@ -775,13 +543,13 @@ static uint32_t hashbig( const void *key, size_t length, uint32_t initval) * element. This delivers least-surprise: hash such as "int arr[] = { * 1, 2 }; hash_stable(arr, 2, 0);" will be the same on big and little * endian machines, even though a bytewise hash wouldn't be. */ -uint32_t hash_stable_64(const void *key, size_t n, uint32_t base) +uint64_t hash64_stable_64(const void *key, size_t n, uint64_t base) { const uint64_t *k = key; uint32_t a,b,c; /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)n*8) + base; + a = b = c = 0xdeadbeef + ((uint32_t)n*8) + (base >> 32) + base; while (n > 3) { a += (uint32_t)k[0]; @@ -811,16 +579,16 @@ uint32_t hash_stable_64(const void *key, size_t n, uint32_t base) return c; } final(a,b,c); - return c; + return ((uint64_t)b << 32) | c; } -uint32_t hash_stable_32(const void *key, size_t n, uint32_t base) +uint64_t hash64_stable_32(const void *key, size_t n, uint64_t base) { const uint32_t *k = key; uint32_t a,b,c; /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)n*4) + base; + a = b = c = 0xdeadbeef + ((uint32_t)n*4) + (base >> 32) + base; while (n > 3) { a += k[0]; @@ -841,16 +609,16 @@ uint32_t hash_stable_32(const void *key, size_t n, uint32_t base) return c; } final(a,b,c); - return c; + return ((uint64_t)b << 32) | c; } -uint32_t hash_stable_16(const void *key, size_t n, uint32_t base) +uint64_t hash64_stable_16(const void *key, size_t n, uint64_t base) { const uint16_t *k = key; uint32_t a,b,c; /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)n*2) + base; + a = b = c = 0xdeadbeef + ((uint32_t)n*2) + (base >> 32) + base; while (n > 6) { a += (uint32_t)k[0] + ((uint32_t)k[1] << 16); @@ -878,20 +646,58 @@ uint32_t hash_stable_16(const void *key, size_t n, uint32_t base) return c; } final(a,b,c); - return c; + return ((uint64_t)b << 32) | c; } - -uint32_t hash_stable_8(const void *key, size_t n, uint32_t base) + +uint64_t hash64_stable_8(const void *key, size_t n, uint64_t base) { - return hashlittle(key, n, base); + uint32_t b32 = base + (base >> 32); + uint32_t lower = hashlittle(key, n, &b32); + + return ((uint64_t)b32 << 32) | lower; } uint32_t hash_any(const void *key, size_t length, uint32_t base) { if (HASH_BIG_ENDIAN) - return hashbig(key, length, base); + return hashbig(key, length, &base); else - return hashlittle(key, length, base); + return hashlittle(key, length, &base); +} + +uint32_t hash_stable_64(const void *key, size_t n, uint32_t base) +{ + return hash64_stable_64(key, n, base); +} + +uint32_t hash_stable_32(const void *key, size_t n, uint32_t base) +{ + return hash64_stable_32(key, n, base); +} + +uint32_t hash_stable_16(const void *key, size_t n, uint32_t base) +{ + return hash64_stable_16(key, n, base); +} + +uint32_t hash_stable_8(const void *key, size_t n, uint32_t base) +{ + return hashlittle(key, n, &base); +} + +/* Jenkins' lookup8 is a 64 bit hash, but he says it's obsolete. Use + * the plain one and recombine into 64 bits. */ +uint64_t hash64_any(const void *key, size_t length, uint64_t base) +{ + uint32_t b32 = base + (base >> 32); + uint32_t lower; + + if (HASH_BIG_ENDIAN) + lower = hashbig(key, length, &b32); + else + lower = hashlittle(key, length, &b32); + + return ((uint64_t)b32 << 32) | lower; } #ifdef SELF_TEST