1 /* CC0 (Public domain) - see LICENSE file for details */
3 -------------------------------------------------------------------------------
4 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
6 These are functions for producing 32-bit hashes for hash table lookup.
7 hash_word(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
8 are externally useful functions. Routines to test the hash are included
9 if SELF_TEST is defined. You can use this free for any purpose. It's in
10 the public domain. It has no warranty.
12 You probably want to use hashlittle(). hashlittle() and hashbig()
13 hash byte arrays. hashlittle() is is faster than hashbig() on
14 little-endian machines. Intel and AMD are little-endian machines.
15 On second thought, you probably want hashlittle2(), which is identical to
16 hashlittle() except it returns two 32-bit hashes for the price of one.
17 You could implement hashbig2() if you wanted but I haven't bothered here.
19 If you want to find a hash of, say, exactly 7 integers, do
20 a = i1; b = i2; c = i3;
22 a += i4; b += i5; c += i6;
26 then use c as the hash value. If you have a variable length array of
27 4-byte integers to hash, use hash_word(). If you have a byte array (like
28 a character string), use hashlittle(). If you have several byte arrays, or
29 a mix of things, see the comments above hashlittle().
31 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
32 then mix those integers. This is fast (you can do a lot more thorough
33 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
34 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
35 -------------------------------------------------------------------------------
40 #include <stdio.h> /* defines printf for tests */
41 #include <time.h> /* defines time_t for timings in the test */
42 #include <stdint.h> /* defines uint32_t etc */
43 #include <sys/param.h> /* attempt to define endianness */
46 # include <endian.h> /* attempt to define endianness */
50 * My best guess at if you are big-endian or little-endian. This may
53 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
54 __BYTE_ORDER == __LITTLE_ENDIAN) || \
55 (defined(i386) || defined(__i386__) || defined(__i486__) || \
56 defined(__i586__) || defined(__i686__) || defined(__x86_64) || \
57 defined(vax) || defined(MIPSEL))
58 # define HASH_LITTLE_ENDIAN 1
59 # define HASH_BIG_ENDIAN 0
60 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
61 __BYTE_ORDER == __BIG_ENDIAN) || \
62 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
63 # define HASH_LITTLE_ENDIAN 0
64 # define HASH_BIG_ENDIAN 1
66 # error Unknown endian
68 #endif /* old hash.c headers. */
72 #if HAVE_LITTLE_ENDIAN
73 #define HASH_LITTLE_ENDIAN 1
74 #define HASH_BIG_ENDIAN 0
76 #define HASH_LITTLE_ENDIAN 0
77 #define HASH_BIG_ENDIAN 1
82 #define hashsize(n) ((uint32_t)1<<(n))
83 #define hashmask(n) (hashsize(n)-1)
84 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
87 -------------------------------------------------------------------------------
88 mix -- mix 3 32-bit values reversibly.
90 This is reversible, so any information in (a,b,c) before mix() is
91 still in (a,b,c) after mix().
93 If four pairs of (a,b,c) inputs are run through mix(), or through
94 mix() in reverse, there are at least 32 bits of the output that
95 are sometimes the same for one pair and different for another pair.
97 * pairs that differed by one bit, by two bits, in any combination
98 of top bits of (a,b,c), or in any combination of bottom bits of
100 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
101 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
102 is commonly produced by subtraction) look like a single 1-bit
104 * the base values were pseudorandom, all zero but one bit set, or
105 all zero plus a counter that starts at zero.
107 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
112 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
113 for "differ" defined as + with a one-bit base and a two-bit delta. I
114 used http://burtleburtle.net/bob/hash/avalanche.html to choose
115 the operations, constants, and arrangements of the variables.
117 This does not achieve avalanche. There are input bits of (a,b,c)
118 that fail to affect some output bits of (a,b,c), especially of a. The
119 most thoroughly mixed value is c, but it doesn't really even achieve
122 This allows some parallelism. Read-after-writes are good at doubling
123 the number of bits affected, so the goal of mixing pulls in the opposite
124 direction as the goal of parallelism. I did what I could. Rotates
125 seem to cost as much as shifts on every machine I could lay my hands
126 on, and rotates are much kinder to the top and bottom bits, so I used
128 -------------------------------------------------------------------------------
132 a -= c; a ^= rot(c, 4); c += b; \
133 b -= a; b ^= rot(a, 6); a += c; \
134 c -= b; c ^= rot(b, 8); b += a; \
135 a -= c; a ^= rot(c,16); c += b; \
136 b -= a; b ^= rot(a,19); a += c; \
137 c -= b; c ^= rot(b, 4); b += a; \
141 -------------------------------------------------------------------------------
142 final -- final mixing of 3 32-bit values (a,b,c) into c
144 Pairs of (a,b,c) values differing in only a few bits will usually
145 produce values of c that look totally different. This was tested for
146 * pairs that differed by one bit, by two bits, in any combination
147 of top bits of (a,b,c), or in any combination of bottom bits of
149 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
150 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
151 is commonly produced by subtraction) look like a single 1-bit
153 * the base values were pseudorandom, all zero but one bit set, or
154 all zero plus a counter that starts at zero.
156 These constants passed:
159 and these came close:
163 -------------------------------------------------------------------------------
165 #define final(a,b,c) \
167 c ^= b; c -= rot(b,14); \
168 a ^= c; a -= rot(c,11); \
169 b ^= a; b -= rot(a,25); \
170 c ^= b; c -= rot(b,16); \
171 a ^= c; a -= rot(c,4); \
172 b ^= a; b -= rot(a,14); \
173 c ^= b; c -= rot(b,24); \
177 --------------------------------------------------------------------
178 This works on all machines. To be useful, it requires
179 -- that the key be an array of uint32_t's, and
180 -- that the length be the number of uint32_t's in the key
182 The function hash_word() is identical to hashlittle() on little-endian
183 machines, and identical to hashbig() on big-endian machines,
184 except that the length has to be measured in uint32_ts rather than in
185 bytes. hashlittle() is more complicated than hash_word() only because
186 hashlittle() has to dance around fitting the key bytes into registers.
187 --------------------------------------------------------------------
190 const uint32_t *k, /* the key, an array of uint32_t values */
191 size_t length, /* the length of the key, in uint32_ts */
192 uint32_t initval) /* the previous hash, or an arbitrary value */
196 /* Set up the internal state */
197 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
199 /*------------------------------------------------- handle most of the key */
210 /*------------------------------------------- handle the last 3 uint32_t's */
211 switch(length) /* all the case statements fall through */
217 case 0: /* case 0: nothing left to add */
220 /*------------------------------------------------------ report the result */
225 -------------------------------------------------------------------------------
226 hashlittle() -- hash a variable-length key into a 32-bit value
227 k : the key (the unaligned variable-length array of bytes)
228 length : the length of the key, counting by bytes
229 val2 : IN: can be any 4-byte value OUT: second 32 bit hash.
230 Returns a 32-bit value. Every bit of the key affects every bit of
231 the return value. Two keys differing by one or two bits will have
232 totally different hash values. Note that the return value is better
233 mixed than val2, so use that first.
235 The best hash table sizes are powers of 2. There is no need to do
236 mod a prime (mod is sooo slow!). If you need less than 32 bits,
237 use a bitmask. For example, if you need only 10 bits, do
238 h = (h & hashmask(10));
239 In which case, the hash table should have hashsize(10) elements.
241 If you are hashing n strings (uint8_t **)k, do it like this:
242 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
244 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
245 code any way you wish, private, educational, or commercial. It's free.
247 Use for hash table lookup, or anything where one collision in 2^^32 is
248 acceptable. Do NOT use for cryptographic purposes.
249 -------------------------------------------------------------------------------
252 static uint32_t hashlittle( const void *key, size_t length, uint32_t *val2 )
254 uint32_t a,b,c; /* internal state */
255 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
257 /* Set up the internal state */
258 a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2;
261 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
262 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
265 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
276 /*----------------------------- handle the last (probably partial) block */
278 * "k[2]&0xffffff" actually reads beyond the end of the string, but
279 * then masks off the part it's not allowed to read. Because the
280 * string is aligned, the masked-off tail is in the same word as the
281 * rest of the string. Every machine with memory protection I've seen
282 * does it on word boundaries, so is OK with this. But VALGRIND will
283 * still catch it and complain. The masking trick does make the hash
284 * noticably faster for short strings (like English words).
286 * Not on my testing with gcc 4.5 on an intel i5 CPU, at least --RR.
291 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
292 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
293 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
294 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
295 case 8 : b+=k[1]; a+=k[0]; break;
296 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
297 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
298 case 5 : b+=k[1]&0xff; a+=k[0]; break;
299 case 4 : a+=k[0]; break;
300 case 3 : a+=k[0]&0xffffff; break;
301 case 2 : a+=k[0]&0xffff; break;
302 case 1 : a+=k[0]&0xff; break;
303 case 0 : return c; /* zero length strings require no mixing */
306 #else /* make valgrind happy */
308 k8 = (const uint8_t *)k;
311 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
312 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
313 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
314 case 9 : c+=k8[8]; /* fall through */
315 case 8 : b+=k[1]; a+=k[0]; break;
316 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
317 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
318 case 5 : b+=k8[4]; /* fall through */
319 case 4 : a+=k[0]; break;
320 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
321 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
322 case 1 : a+=k8[0]; break;
326 #endif /* !valgrind */
328 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
329 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
332 /*--------------- all but last block: aligned reads and different mixing */
335 a += k[0] + (((uint32_t)k[1])<<16);
336 b += k[2] + (((uint32_t)k[3])<<16);
337 c += k[4] + (((uint32_t)k[5])<<16);
343 /*----------------------------- handle the last (probably partial) block */
344 k8 = (const uint8_t *)k;
347 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
348 b+=k[2]+(((uint32_t)k[3])<<16);
349 a+=k[0]+(((uint32_t)k[1])<<16);
351 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
353 b+=k[2]+(((uint32_t)k[3])<<16);
354 a+=k[0]+(((uint32_t)k[1])<<16);
356 case 9 : c+=k8[8]; /* fall through */
357 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
358 a+=k[0]+(((uint32_t)k[1])<<16);
360 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
362 a+=k[0]+(((uint32_t)k[1])<<16);
364 case 5 : b+=k8[4]; /* fall through */
365 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
367 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
372 case 0 : return c; /* zero length requires no mixing */
375 } else { /* need to read the key one byte at a time */
376 const uint8_t *k = (const uint8_t *)key;
378 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
382 a += ((uint32_t)k[1])<<8;
383 a += ((uint32_t)k[2])<<16;
384 a += ((uint32_t)k[3])<<24;
386 b += ((uint32_t)k[5])<<8;
387 b += ((uint32_t)k[6])<<16;
388 b += ((uint32_t)k[7])<<24;
390 c += ((uint32_t)k[9])<<8;
391 c += ((uint32_t)k[10])<<16;
392 c += ((uint32_t)k[11])<<24;
398 /*-------------------------------- last block: affect all 32 bits of (c) */
399 switch(length) /* all the case statements fall through */
401 case 12: c+=((uint32_t)k[11])<<24;
402 case 11: c+=((uint32_t)k[10])<<16;
403 case 10: c+=((uint32_t)k[9])<<8;
405 case 8 : b+=((uint32_t)k[7])<<24;
406 case 7 : b+=((uint32_t)k[6])<<16;
407 case 6 : b+=((uint32_t)k[5])<<8;
409 case 4 : a+=((uint32_t)k[3])<<24;
410 case 3 : a+=((uint32_t)k[2])<<16;
411 case 2 : a+=((uint32_t)k[1])<<8;
425 * This is the same as hash_word() on big-endian machines. It is different
426 * from hashlittle() on all machines. hashbig() takes advantage of
427 * big-endian byte ordering.
429 static uint32_t hashbig( const void *key, size_t length, uint32_t *val2)
432 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
434 /* Set up the internal state */
435 a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2;
438 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
439 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
442 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
453 /*----------------------------- handle the last (probably partial) block */
455 * "k[2]<<8" actually reads beyond the end of the string, but
456 * then shifts out the part it's not allowed to read. Because the
457 * string is aligned, the illegal read is in the same word as the
458 * rest of the string. Every machine with memory protection I've seen
459 * does it on word boundaries, so is OK with this. But VALGRIND will
460 * still catch it and complain. The masking trick does make the hash
461 * noticably faster for short strings (like English words).
463 * Not on my testing with gcc 4.5 on an intel i5 CPU, at least --RR.
468 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
469 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
470 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
471 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
472 case 8 : b+=k[1]; a+=k[0]; break;
473 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
474 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
475 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
476 case 4 : a+=k[0]; break;
477 case 3 : a+=k[0]&0xffffff00; break;
478 case 2 : a+=k[0]&0xffff0000; break;
479 case 1 : a+=k[0]&0xff000000; break;
480 case 0 : return c; /* zero length strings require no mixing */
483 #else /* make valgrind happy */
485 k8 = (const uint8_t *)k;
486 switch(length) /* all the case statements fall through */
488 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
489 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
490 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
491 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
492 case 8 : b+=k[1]; a+=k[0]; break;
493 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
494 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
495 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
496 case 4 : a+=k[0]; break;
497 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
498 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
499 case 1 : a+=((uint32_t)k8[0])<<24; break;
503 #endif /* !VALGRIND */
505 } else { /* need to read the key one byte at a time */
506 const uint8_t *k = (const uint8_t *)key;
508 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
511 a += ((uint32_t)k[0])<<24;
512 a += ((uint32_t)k[1])<<16;
513 a += ((uint32_t)k[2])<<8;
514 a += ((uint32_t)k[3]);
515 b += ((uint32_t)k[4])<<24;
516 b += ((uint32_t)k[5])<<16;
517 b += ((uint32_t)k[6])<<8;
518 b += ((uint32_t)k[7]);
519 c += ((uint32_t)k[8])<<24;
520 c += ((uint32_t)k[9])<<16;
521 c += ((uint32_t)k[10])<<8;
522 c += ((uint32_t)k[11]);
528 /*-------------------------------- last block: affect all 32 bits of (c) */
529 switch(length) /* all the case statements fall through */
532 case 11: c+=((uint32_t)k[10])<<8;
533 case 10: c+=((uint32_t)k[9])<<16;
534 case 9 : c+=((uint32_t)k[8])<<24;
536 case 7 : b+=((uint32_t)k[6])<<8;
537 case 6 : b+=((uint32_t)k[5])<<16;
538 case 5 : b+=((uint32_t)k[4])<<24;
540 case 3 : a+=((uint32_t)k[2])<<8;
541 case 2 : a+=((uint32_t)k[1])<<16;
542 case 1 : a+=((uint32_t)k[0])<<24;
553 /* I basically use hashlittle here, but use native endian within each
554 * element. This delivers least-surprise: hash such as "int arr[] = {
555 * 1, 2 }; hash_stable(arr, 2, 0);" will be the same on big and little
556 * endian machines, even though a bytewise hash wouldn't be. */
557 uint64_t hash64_stable_64(const void *key, size_t n, uint64_t base)
559 const uint64_t *k = key;
562 /* Set up the internal state */
563 a = b = c = 0xdeadbeef + ((uint32_t)n*8) + (base >> 32) + base;
567 b += (uint32_t)(k[0] >> 32);
570 a += (uint32_t)(k[1] >> 32);
572 c += (uint32_t)(k[2] >> 32);
580 b += (uint32_t)(k[0] >> 32);
583 a += (uint32_t)(k[1] >> 32);
587 b += (uint32_t)(k[0] >> 32);
593 return ((uint64_t)b << 32) | c;
596 uint64_t hash64_stable_32(const void *key, size_t n, uint64_t base)
598 const uint32_t *k = key;
601 /* Set up the internal state */
602 a = b = c = 0xdeadbeef + ((uint32_t)n*4) + (base >> 32) + base;
623 return ((uint64_t)b << 32) | c;
626 uint64_t hash64_stable_16(const void *key, size_t n, uint64_t base)
628 const uint16_t *k = key;
631 /* Set up the internal state */
632 a = b = c = 0xdeadbeef + ((uint32_t)n*2) + (base >> 32) + base;
635 a += (uint32_t)k[0] + ((uint32_t)k[1] << 16);
636 b += (uint32_t)k[2] + ((uint32_t)k[3] << 16);
637 c += (uint32_t)k[4] + ((uint32_t)k[5] << 16);
648 b += ((uint32_t)k[3] << 16);
652 a += ((uint32_t)k[1] << 16);
660 return ((uint64_t)b << 32) | c;
663 uint64_t hash64_stable_8(const void *key, size_t n, uint64_t base)
665 uint32_t b32 = base + (base >> 32);
666 uint32_t lower = hashlittle(key, n, &b32);
668 return ((uint64_t)b32 << 32) | lower;
671 uint32_t hash_any(const void *key, size_t length, uint32_t base)
674 return hashbig(key, length, &base);
676 return hashlittle(key, length, &base);
679 uint32_t hash_stable_64(const void *key, size_t n, uint32_t base)
681 return hash64_stable_64(key, n, base);
684 uint32_t hash_stable_32(const void *key, size_t n, uint32_t base)
686 return hash64_stable_32(key, n, base);
689 uint32_t hash_stable_16(const void *key, size_t n, uint32_t base)
691 return hash64_stable_16(key, n, base);
694 uint32_t hash_stable_8(const void *key, size_t n, uint32_t base)
696 return hashlittle(key, n, &base);
699 /* Jenkins' lookup8 is a 64 bit hash, but he says it's obsolete. Use
700 * the plain one and recombine into 64 bits. */
701 uint64_t hash64_any(const void *key, size_t length, uint64_t base)
703 uint32_t b32 = base + (base >> 32);
707 lower = hashbig(key, length, &b32);
709 lower = hashlittle(key, length, &b32);
711 return ((uint64_t)b32 << 32) | lower;
716 /* used for timings */
725 for (i=0; i<256; ++i) buf[i] = 'x';
728 h = hashlittle(&buf[0],1,h);
731 if (z-a > 0) printf("time %d %.8x\n", z-a, h);
734 /* check that every input bit changes every output bit half the time */
741 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
742 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
743 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
744 uint32_t x[HASHSTATE],y[HASHSTATE];
747 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
748 for (hlen=0; hlen < MAXLEN; ++hlen)
751 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
753 for (j=0; j<8; ++j) /*------------------------ for each input bit, */
755 for (m=1; m<8; ++m) /*------------ for several possible initvals, */
757 for (l=0; l<HASHSTATE; ++l)
758 e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
760 /*---- check that every output bit is affected by that input bit */
761 for (k=0; k<MAXPAIR; k+=2)
764 /* keys have one bit different */
765 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
766 /* have a and b be two keys differing in only one bit */
769 c[0] = hashlittle(a, hlen, m);
771 b[i] ^= ((k+1)>>(8-j));
772 d[0] = hashlittle(b, hlen, m);
773 /* check every bit is 1, 0, set, and not set at least once */
774 for (l=0; l<HASHSTATE; ++l)
777 f[l] &= ~(c[l]^d[l]);
782 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
789 printf("Some bit didn't change: ");
790 printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
791 e[0],f[0],g[0],h[0],x[0],y[0]);
792 printf("i %d j %d m %d len %d\n", i, j, m, hlen);
794 if (z==MAXPAIR) goto done;
801 printf("Mix success %2d bytes %2d initvals ",i,m);
802 printf("required %d trials\n", z/2);
808 /* Check for reading beyond the end of the buffer and alignment problems */
811 uint8_t buf[MAXLEN+20], *b;
813 uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
815 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
817 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
819 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
823 printf("Endianness. These lines should all be the same (for values filled in):\n");
824 printf("%.8x %.8x %.8x\n",
825 hash_word((const uint32_t *)q, (sizeof(q)-1)/4, 13),
826 hash_word((const uint32_t *)q, (sizeof(q)-5)/4, 13),
827 hash_word((const uint32_t *)q, (sizeof(q)-9)/4, 13));
829 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
830 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
831 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
832 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
833 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
834 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
835 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
837 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
838 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
839 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
840 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
841 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
842 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
843 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
845 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
846 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
847 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
848 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
849 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
850 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
851 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
853 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
854 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
855 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
856 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
857 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
858 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
859 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
862 /* check that hashlittle2 and hashlittle produce the same results */
864 hashlittle2(q, sizeof(q), &i, &j);
865 if (hashlittle(q, sizeof(q), 47) != i)
866 printf("hashlittle2 and hashlittle mismatch\n");
868 /* check that hash_word2 and hash_word produce the same results */
871 hash_word2(&len, 1, &i, &j);
872 if (hash_word(&len, 1, 47) != i)
873 printf("hash_word2 and hash_word mismatch %x %x\n",
874 i, hash_word(&len, 1, 47));
876 /* check hashlittle doesn't read before or after the ends of the string */
877 for (h=0, b=buf+1; h<8; ++h, ++b)
879 for (i=0; i<MAXLEN; ++i)
882 for (j=0; j<i; ++j) *(b+j)=0;
884 /* these should all be equal */
885 ref = hashlittle(b, len, (uint32_t)1);
888 x = hashlittle(b, len, (uint32_t)1);
889 y = hashlittle(b, len, (uint32_t)1);
890 if ((ref != x) || (ref != y))
892 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
899 /* check for problems with nulls */
903 uint32_t h,i,state[HASHSTATE];
907 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
908 printf("These should all be different\n");
909 for (i=0, h=0; i<8; ++i)
911 h = hashlittle(buf, 0, h);
912 printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
919 driver1(); /* test that the key is hashed: used for timings */
920 driver2(); /* test that whole key is hashed thoroughly */
921 driver3(); /* test that nothing but the key is hashed */
922 driver4(); /* test hashing multiple buffers (all buffers are null) */
926 #endif /* SELF_TEST */