1 /* CC0 (Public domain) - see LICENSE file for details */
7 #include <ccan/build_assert/build_assert.h>
9 /* Stolen mostly from: lookup3.c, by Bob Jenkins, May 2006, Public Domain.
11 * http://burtleburtle.net/bob/c/lookup3.c
15 * hash - fast hash of an array for internal use
16 * @p: the array or pointer to first element
17 * @num: the number of elements to hash
18 * @base: the base number to roll into the hash (usually 0)
20 * The memory region pointed to by p is combined with the base to form
23 * This hash will have different results on different machines, so is
24 * only useful for internal hashes (ie. not hashes sent across the
25 * network or saved to disk).
27 * It may also change with future versions: it could even detect at runtime
28 * what the fastest hash to use is.
30 * See also: hash64, hash_stable.
33 * #include <ccan/hash/hash.h>
38 * // Simple demonstration: idential strings will have the same hash, but
39 * // two different strings will probably not.
40 * int main(int argc, char *argv[])
42 * uint32_t hash1, hash2;
45 * err(1, "Usage: %s <string1> <string2>", argv[0]);
47 * hash1 = hash(argv[1], strlen(argv[1]), 0);
48 * hash2 = hash(argv[2], strlen(argv[2]), 0);
49 * printf("Hash is %s\n", hash1 == hash2 ? "same" : "different");
53 #define hash(p, num, base) hash_any((p), (num)*sizeof(*(p)), (base))
56 * hash_stable - hash of an array for external use
57 * @p: the array or pointer to first element
58 * @num: the number of elements to hash
59 * @base: the base number to roll into the hash (usually 0)
61 * The array of simple integer types pointed to by p is combined with
62 * the base to form a 32-bit hash.
64 * This hash will have the same results on different machines, so can
65 * be used for external hashes (ie. hashes sent across the network or
66 * saved to disk). The results will not change in future versions of
69 * Note that it is only legal to hand an array of simple integer types
70 * to this hash (ie. char, uint16_t, int64_t, etc). In these cases,
71 * the same values will have the same hash result, even though the
72 * memory representations of integers depend on the machine
79 * #include <ccan/hash/hash.h>
84 * int main(int argc, char *argv[])
87 * err(1, "Usage: %s <string-to-hash>", argv[0]);
89 * printf("Hash stable result is %u\n",
90 * hash_stable(argv[1], strlen(argv[1]), 0));
94 #define hash_stable(p, num, base) \
95 (BUILD_ASSERT_OR_ZERO(sizeof(*(p)) == 8 || sizeof(*(p)) == 4 \
96 || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) + \
97 sizeof(*(p)) == 8 ? hash_stable_64((p), (num), (base)) \
98 : sizeof(*(p)) == 4 ? hash_stable_32((p), (num), (base)) \
99 : sizeof(*(p)) == 2 ? hash_stable_16((p), (num), (base)) \
100 : hash_stable_8((p), (num), (base)))
103 * hash_u32 - fast hash an array of 32-bit values for internal use
104 * @key: the array of uint32_t
105 * @num: the number of elements to hash
106 * @base: the base number to roll into the hash (usually 0)
108 * The array of uint32_t pointed to by @key is combined with the base
109 * to form a 32-bit hash. This is 2-3 times faster than hash() on small
110 * arrays, but the advantage vanishes over large hashes.
112 * This hash will have different results on different machines, so is
113 * only useful for internal hashes (ie. not hashes sent across the
114 * network or saved to disk).
116 uint32_t hash_u32(const uint32_t *key, size_t num, uint32_t base);
119 * hash_string - very fast hash of an ascii string
120 * @str: the nul-terminated string
122 * The string is hashed, using a hash function optimized for ASCII and
123 * similar strings. It's weaker than the other hash functions.
125 * This hash may have different results on different machines, so is
126 * only useful for internal hashes (ie. not hashes sent across the
127 * network or saved to disk). The results will be different from the
128 * other hash functions in this module, too.
130 static inline uint32_t hash_string(const char *string)
132 /* This is Karl Nelson <kenelson@ece.ucdavis.edu>'s X31 hash.
133 * It's a little faster than the (much better) lookup3 hash(): 56ns vs
134 * 84ns on my 2GHz Intel Core Duo 2 laptop for a 10 char string. */
137 for (ret = 0; *string; string++)
138 ret = (ret << 5) - ret + *string;
144 * hash64 - fast 64-bit hash of an array for internal use
145 * @p: the array or pointer to first element
146 * @num: the number of elements to hash
147 * @base: the 64-bit base number to roll into the hash (usually 0)
149 * The memory region pointed to by p is combined with the base to form
152 * This hash will have different results on different machines, so is
153 * only useful for internal hashes (ie. not hashes sent across the
154 * network or saved to disk).
156 * It may also change with future versions: it could even detect at runtime
157 * what the fastest hash to use is.
162 * #include <ccan/hash/hash.h>
165 * #include <string.h>
167 * // Simple demonstration: idential strings will have the same hash, but
168 * // two different strings will probably not.
169 * int main(int argc, char *argv[])
171 * uint64_t hash1, hash2;
174 * err(1, "Usage: %s <string1> <string2>", argv[0]);
176 * hash1 = hash64(argv[1], strlen(argv[1]), 0);
177 * hash2 = hash64(argv[2], strlen(argv[2]), 0);
178 * printf("Hash is %s\n", hash1 == hash2 ? "same" : "different");
182 #define hash64(p, num, base) hash64_any((p), (num)*sizeof(*(p)), (base))
185 * hash64_stable - 64 bit hash of an array for external use
186 * @p: the array or pointer to first element
187 * @num: the number of elements to hash
188 * @base: the base number to roll into the hash (usually 0)
190 * The array of simple integer types pointed to by p is combined with
191 * the base to form a 64-bit hash.
193 * This hash will have the same results on different machines, so can
194 * be used for external hashes (ie. hashes sent across the network or
195 * saved to disk). The results will not change in future versions of
198 * Note that it is only legal to hand an array of simple integer types
199 * to this hash (ie. char, uint16_t, int64_t, etc). In these cases,
200 * the same values will have the same hash result, even though the
201 * memory representations of integers depend on the machine
208 * #include <ccan/hash/hash.h>
211 * #include <string.h>
213 * int main(int argc, char *argv[])
216 * err(1, "Usage: %s <string-to-hash>", argv[0]);
218 * printf("Hash stable result is %llu\n",
219 * (long long)hash64_stable(argv[1], strlen(argv[1]), 0));
223 #define hash64_stable(p, num, base) \
224 (BUILD_ASSERT_OR_ZERO(sizeof(*(p)) == 8 || sizeof(*(p)) == 4 \
225 || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) + \
226 sizeof(*(p)) == 8 ? hash64_stable_64((p), (num), (base)) \
227 : sizeof(*(p)) == 4 ? hash64_stable_32((p), (num), (base)) \
228 : sizeof(*(p)) == 2 ? hash64_stable_16((p), (num), (base)) \
229 : hash64_stable_8((p), (num), (base)))
233 * hashl - fast 32/64-bit hash of an array for internal use
234 * @p: the array or pointer to first element
235 * @num: the number of elements to hash
236 * @base: the base number to roll into the hash (usually 0)
238 * This is either hash() or hash64(), on 32/64 bit long machines.
240 #define hashl(p, num, base) \
241 (BUILD_ASSERT_OR_ZERO(sizeof(long) == sizeof(uint32_t) \
242 || sizeof(long) == sizeof(uint64_t)) + \
243 (sizeof(long) == sizeof(uint64_t) \
244 ? hash64((p), (num), (base)) : hash((p), (num), (base))))
246 /* Our underlying operations. */
247 uint32_t hash_any(const void *key, size_t length, uint32_t base);
248 uint32_t hash_stable_64(const void *key, size_t n, uint32_t base);
249 uint32_t hash_stable_32(const void *key, size_t n, uint32_t base);
250 uint32_t hash_stable_16(const void *key, size_t n, uint32_t base);
251 uint32_t hash_stable_8(const void *key, size_t n, uint32_t base);
252 uint64_t hash64_any(const void *key, size_t length, uint64_t base);
253 uint64_t hash64_stable_64(const void *key, size_t n, uint64_t base);
254 uint64_t hash64_stable_32(const void *key, size_t n, uint64_t base);
255 uint64_t hash64_stable_16(const void *key, size_t n, uint64_t base);
256 uint64_t hash64_stable_8(const void *key, size_t n, uint64_t base);
259 * hash_pointer - hash a pointer for internal use
260 * @p: the pointer value to hash
261 * @base: the base number to roll into the hash (usually 0)
263 * The pointer p (not what p points to!) is combined with the base to form
266 * This hash will have different results on different machines, so is
267 * only useful for internal hashes (ie. not hashes sent across the
268 * network or saved to disk).
271 * #include <ccan/hash/hash.h>
273 * // Code to keep track of memory regions.
275 * struct region *chain;
279 * // We keep a simple hash table.
280 * static struct region *region_hash[128];
282 * static void add_region(struct region *r)
284 * unsigned int h = hash_pointer(r->start, 0);
286 * r->chain = region_hash[h];
287 * region_hash[h] = r->chain;
290 * static struct region *find_region(const void *start)
294 * for (r = region_hash[hash_pointer(start, 0)]; r; r = r->chain)
295 * if (r->start == start)
300 static inline uint32_t hash_pointer(const void *p, uint32_t base)
302 if (sizeof(p) % sizeof(uint32_t) == 0) {
303 /* This convoluted union is the right way of aliasing. */
305 uint32_t a[sizeof(p) / sizeof(uint32_t)];
309 return hash_u32(u.a, sizeof(p) / sizeof(uint32_t), base);
311 return hash(&p, 1, base);