1 /* Licensed under BSD-MIT - see LICENSE file for details */
3 #include <ccan/hash/hash.h>
4 #include <ccan/endian/endian.h>
7 /* "We will show that hash_count values of 3 or 4 work well in practice"
11 Eppstein, David, et al. "What's the difference?: efficient set reconciliation without prior context." ACM SIGCOMM Computer Communication Review. Vol. 41. No. 4. ACM, 2011. http://conferences.sigcomm.org/sigcomm/2011/papers/sigcomm/p218.pdf
15 struct invbloom *invbloom_new_(const tal_t *ctx,
20 struct invbloom *ib = tal(ctx, struct invbloom);
23 ib->n_elems = n_elems;
24 ib->id_size = id_size;
27 ib->count = tal_arrz(ib, s32, n_elems);
28 ib->idsum = tal_arrz(ib, u8, id_size * n_elems);
29 if (!ib->count || !ib->idsum)
35 void invbloom_singleton_cb_(struct invbloom *ib,
36 void (*cb)(struct invbloom *,
37 size_t bucket, void *),
41 ib->singleton_data = data;
44 static size_t hash_bucket(const struct invbloom *ib, const void *id, size_t i)
46 return hash((const char *)id, ib->id_size, ib->salt+i*7) % ib->n_elems;
49 static u8 *idsum_ptr(const struct invbloom *ib, size_t bucket)
51 return (u8 *)ib->idsum + bucket * ib->id_size;
54 static void check_for_singleton(struct invbloom *ib, size_t bucket)
59 if (ib->count[bucket] != 1 && ib->count[bucket] != -1)
62 ib->singleton(ib, bucket, ib->singleton_data);
65 static void add_to_bucket(struct invbloom *ib, size_t n, const u8 *id)
68 u8 *idsum = idsum_ptr(ib, n);
72 for (i = 0; i < ib->id_size; i++)
75 check_for_singleton(ib, n);
78 static void remove_from_bucket(struct invbloom *ib, size_t n, const u8 *id)
81 u8 *idsum = idsum_ptr(ib, n);
84 for (i = 0; i < ib->id_size; i++)
87 check_for_singleton(ib, n);
90 void invbloom_insert(struct invbloom *ib, const void *id)
94 for (i = 0; i < NUM_HASHES; i++)
95 add_to_bucket(ib, hash_bucket(ib, id, i), id);
98 void invbloom_delete(struct invbloom *ib, const void *id)
102 for (i = 0; i < NUM_HASHES; i++)
103 remove_from_bucket(ib, hash_bucket(ib, id, i), id);
106 static bool all_zero(const u8 *mem, size_t size)
110 for (i = 0; i < size; i++)
116 bool invbloom_get(const struct invbloom *ib, const void *id)
120 for (i = 0; i < NUM_HASHES; i++) {
121 size_t h = hash_bucket(ib, id, i);
122 u8 *idsum = idsum_ptr(ib, h);
124 if (ib->count[h] == 0 && all_zero(idsum, ib->id_size))
127 if (ib->count[h] == 1)
128 return (memcmp(idsum, id, ib->id_size) == 0);
133 static void *extract(const tal_t *ctx, struct invbloom *ib, int count)
137 /* FIXME: this makes full extraction O(n^2). */
138 for (i = 0; i < ib->n_elems; i++) {
141 if (ib->count[i] != count)
144 id = tal_dup(ctx, u8, idsum_ptr(ib, i), ib->id_size, 0);
150 void *invbloom_extract(const tal_t *ctx, struct invbloom *ib)
154 id = extract(ctx, ib, 1);
156 invbloom_delete(ib, id);
160 void *invbloom_extract_negative(const tal_t *ctx, struct invbloom *ib)
164 id = extract(ctx, ib, -1);
166 invbloom_insert(ib, id);
170 void invbloom_subtract(struct invbloom *ib1, const struct invbloom *ib2)
174 assert(ib1->n_elems == ib2->n_elems);
175 assert(ib1->id_size == ib2->id_size);
176 assert(ib1->salt == ib2->salt);
178 for (i = 0; i < ib1->n_elems * ib1->id_size; i++)
179 ib1->idsum[i] ^= ib2->idsum[i];
181 for (i = 0; i < ib1->n_elems; i++) {
182 ib1->count[i] -= ib2->count[i];
183 check_for_singleton(ib1, i);
187 bool invbloom_empty(const struct invbloom *ib)
191 for (i = 0; i < ib->n_elems; i++) {
194 if (!all_zero(idsum_ptr(ib, i), ib->id_size))