From 39f01834db9b6a21d076e67d1e3143ab99aaf43e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 26 Aug 2010 12:52:59 +0930 Subject: [PATCH] tdb2: initial commit (doesn't work, still writing tests) --- ccan/tdb2/_info | 81 + ccan/tdb2/check.c | 411 +++++ ccan/tdb2/doc/design-1.3.txt | 1050 ++++++++++++ ccan/tdb2/doc/design.lyx | 2282 +++++++++++++++++++++++++ ccan/tdb2/doc/design.lyx,v | 3106 ++++++++++++++++++++++++++++++++++ ccan/tdb2/doc/design.pdf | Bin 0 -> 185894 bytes ccan/tdb2/doc/design.txt | 1058 ++++++++++++ ccan/tdb2/free.c | 710 ++++++++ ccan/tdb2/io.c | 662 ++++++++ ccan/tdb2/lock.c | 848 ++++++++++ ccan/tdb2/private.h | 456 +++++ ccan/tdb2/tdb.c | 875 ++++++++++ ccan/tdb2/tdb2.h | 143 ++ ccan/tdb2/test/run-encode.c | 40 + ccan/tdb2/test/run-fls.c | 36 + 15 files changed, 11758 insertions(+) create mode 100644 ccan/tdb2/_info create mode 100644 ccan/tdb2/check.c create mode 100644 ccan/tdb2/doc/design-1.3.txt create mode 100644 ccan/tdb2/doc/design.lyx create mode 100644 ccan/tdb2/doc/design.lyx,v create mode 100644 ccan/tdb2/doc/design.pdf create mode 100644 ccan/tdb2/doc/design.txt create mode 100644 ccan/tdb2/free.c create mode 100644 ccan/tdb2/io.c create mode 100644 ccan/tdb2/lock.c create mode 100644 ccan/tdb2/private.h create mode 100644 ccan/tdb2/tdb.c create mode 100644 ccan/tdb2/tdb2.h create mode 100644 ccan/tdb2/test/run-encode.c create mode 100644 ccan/tdb2/test/run-fls.c diff --git a/ccan/tdb2/_info b/ccan/tdb2/_info new file mode 100644 index 00000000..cd7412c1 --- /dev/null +++ b/ccan/tdb2/_info @@ -0,0 +1,81 @@ +#include +#include + +/** + * tdb2 - [[WORK IN PROGRESS!]] The trivial (64bit transactional) database + * + * The tdb2 module provides an efficient keyword data mapping (usually + * within a file). It supports transactions, so the contents of the + * database is reliable even across crashes. + * + * Example: + * #include + * #include + * #include + * #include + * + * static void usage(void) + * { + * errx(1, "Usage: %s fetch \n" + * "OR %s store "); + * } + * + * int main(int argc, char *argv[]) + * { + * struct tdb_context *tdb; + * TDB_DATA key, value; + * + * if (argc < 4) + * usage(); + * + * tdb = tdb_open(argv[2], 1024, TDB_DEFAULT, O_CREAT|O_RDWR, + * 0600); + * if (!tdb) + * err(1, "Opening %s", argv[2]); + * + * key.dptr = (void *)argv[3]; + * key.dsize = strlen(argv[3]); + * + * if (streq(argv[1], "fetch")) { + * if (argc != 4) + * usage(); + * value = tdb_fetch(tdb, key); + * if (!value.dptr) + * errx(1, "fetch %s: %s", + * argv[3], tdb_errorstr(tdb)); + * printf("%.*s\n", value.dsize, (char *)value.dptr); + * free(value.dptr); + * } else if (streq(argv[1], "store")) { + * if (argc != 5) + * usage(); + * value.dptr = (void *)argv[4]; + * value.dsize = strlen(argv[4]); + * if (tdb_store(tdb, key, value, 0) != 0) + * errx(1, "store %s: %s", + * argv[3], tdb_errorstr(tdb)); + * } else + * usage(); + * + * return 0; + * } + * + * Maintainer: Rusty Russell + * + * Author: Rusty Russell + * + * Licence: LGPLv3 (or later) + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/hash\n"); + printf("ccan/likely\n"); + printf("ccan/asearch\n"); + return 0; + } + + return 1; +} diff --git a/ccan/tdb2/check.c b/ccan/tdb2/check.c new file mode 100644 index 00000000..f005a48d --- /dev/null +++ b/ccan/tdb2/check.c @@ -0,0 +1,411 @@ + /* + Trivial Database 2: free list/block handling + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include +#include + +/* We keep an ordered array of offsets. */ +static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off) +{ + tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t)); + if (!new) + return false; + new[(*num)++] = off; + *arr = new; + return true; +} + +static bool check_header(struct tdb_context *tdb) +{ + uint64_t hash_test; + + hash_test = TDB_HASH_MAGIC; + hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test)); + if (tdb->header.hash_test != hash_test) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "check: hash test %llu should be %llu\n", + tdb->header.hash_test, hash_test); + return false; + } + if (strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "check: bad magic '%.*s'\n", + sizeof(tdb->header.magic_food), + tdb->header.magic_food); + return false; + } + if (tdb->header.v.hash_bits < INITIAL_HASH_BITS) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "check: bad hash bits %llu\n", + (long long)tdb->header.v.hash_bits); + return false; + } + if (tdb->header.v.zone_bits < INITIAL_ZONE_BITS) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "check: bad zone_bits %llu\n", + (long long)tdb->header.v.zone_bits); + return false; + } + if (tdb->header.v.free_buckets < INITIAL_FREE_BUCKETS) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "check: bad free_buckets %llu\n", + (long long)tdb->header.v.free_buckets); + return false; + } + if ((1ULL << tdb->header.v.zone_bits) * tdb->header.v.num_zones + < tdb->map_size) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "check: %llu zones size %llu don't cover %llu\n", + (long long)(1ULL << tdb->header.v.zone_bits), + (long long)tdb->header.v.num_zones, + (long long)tdb->map_size); + return false; + } + + /* We check hash_off and free_off later. */ + + /* Don't check reserved: they *can* be used later. */ + return true; +} + +static int off_cmp(const tdb_off_t *a, const tdb_off_t *b) +{ + /* Can overflow an int. */ + return a > b ? 1 + : a < b ? -1 + : 0; +} + +static bool check_hash_list(struct tdb_context *tdb, + tdb_off_t used[], + size_t num_used) +{ + struct tdb_used_record rec; + tdb_len_t hashlen, i, num_nonzero; + tdb_off_t h; + size_t num_found; + + hashlen = sizeof(tdb_off_t) << tdb->header.v.hash_bits; + + if (tdb_read_convert(tdb, tdb->header.v.hash_off - sizeof(rec), + &rec, sizeof(rec)) == -1) + return false; + + if (rec_data_length(&rec) != hashlen) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Bad hash table length %llu vs %llu\n", + (long long)rec_data_length(&rec), + (long long)hashlen); + return false; + } + if (rec_key_length(&rec) != 0) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Bad hash table key length %llu\n", + (long long)rec_key_length(&rec)); + return false; + } + if (rec_hash(&rec) != 0) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Bad hash table hash value %llu\n", + (long long)rec_hash(&rec)); + return false; + } + + num_found = 0; + num_nonzero = 0; + for (i = 0, h = tdb->header.v.hash_off; + i < (1ULL << tdb->header.v.hash_bits); + i++, h += sizeof(tdb_off_t)) { + tdb_off_t off, *p, pos; + struct tdb_used_record rec; + uint64_t hash; + + off = tdb_read_off(tdb, h); + if (off == TDB_OFF_ERR) + return false; + if (!off) { + num_nonzero = 0; + continue; + } + /* FIXME: Check hash bits */ + p = asearch(&off, used, num_used, off_cmp); + if (!p) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Invalid offset %llu in hash\n", + (long long)off); + return false; + } + /* Mark it invalid. */ + *p ^= 1; + num_found++; + + if (tdb_read_convert(tdb, off, &rec, sizeof(rec)) == -1) + return false; + + /* Check it is hashed correctly. */ + hash = hash_record(tdb, off); + + /* Top bits must match header. */ + if (hash >> (64 - 11) != rec_hash(&rec)) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Bad hash magic at offset %llu" + " (0x%llx vs 0x%llx)\n", + (long long)off, + (long long)hash, (long long)rec_hash(&rec)); + return false; + } + + /* It must be in the right place in hash array. */ + pos = hash & ((1ULL << tdb->header.v.hash_bits)-1); + if (pos < i - num_nonzero || pos > i) { + /* Could be wrap from end of array? FIXME: check? */ + if (i != num_nonzero) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Bad hash position %llu at" + " offset %llu hash 0x%llx\n", + (long long)i, + (long long)off, + (long long)hash); + return false; + } + } + num_nonzero++; + } + + if (num_found != num_used) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Not all entries are in hash\n"); + return false; + } + return true; +} + +static bool check_free(struct tdb_context *tdb, + tdb_off_t off, + const struct tdb_free_record *frec, + tdb_off_t prev, + tdb_off_t zone, unsigned int bucket) +{ + if (frec->magic != TDB_FREE_MAGIC) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: offset %llu bad magic 0x%llx\n", + (long long)off, (long long)frec->magic); + return false; + } + if (tdb->methods->oob(tdb, off + + frec->data_len-sizeof(struct tdb_used_record), + true)) + return false; + if (zone_of(tdb, off) != zone) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: offset %llu in wrong zone %llu vs %llu\n", + (long long)off, + (long long)zone, (long long)zone_of(tdb, off)); + return false; + } + if (size_to_bucket(tdb, frec->data_len) != bucket) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: offset %llu in wrong bucket %u vs %u\n", + (long long)off, + bucket, size_to_bucket(tdb, frec->data_len)); + return false; + } + if (prev != frec->prev) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: offset %llu bad prev %llu vs %llu\n", + (long long)off, + (long long)prev, (long long)frec->prev); + return false; + } + return true; +} + +static bool check_free_list(struct tdb_context *tdb, + tdb_off_t free[], + size_t num_free) +{ + struct tdb_used_record rec; + tdb_len_t freelen, i, j; + tdb_off_t h; + size_t num_found; + + freelen = sizeof(tdb_off_t) * tdb->header.v.num_zones + * (tdb->header.v.free_buckets + 1); + + if (tdb_read_convert(tdb, tdb->header.v.free_off - sizeof(rec), + &rec, sizeof(rec)) == -1) + return false; + + if (rec_data_length(&rec) != freelen) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Bad free table length %llu vs %llu\n", + (long long)rec_data_length(&rec), + (long long)freelen); + return false; + } + if (rec_key_length(&rec) != 0) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Bad free table key length %llu\n", + (long long)rec_key_length(&rec)); + return false; + } + if (rec_hash(&rec) != 0) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Bad free table hash value %llu\n", + (long long)rec_hash(&rec)); + return false; + } + + num_found = 0; + h = tdb->header.v.free_off; + for (i = 0; i < tdb->header.v.num_zones; i++) { + for (j = 0; j <= tdb->header.v.free_buckets; + j++, h += sizeof(tdb_off_t)) { + tdb_off_t off, prev = 0, *p; + struct tdb_free_record f; + + for (off = tdb_read_off(tdb, h); off; off = f.next) { + if (off == TDB_OFF_ERR) + return false; + if (tdb_read_convert(tdb, off, &f, sizeof(f))) + return false; + if (!check_free(tdb, off, &f, prev, i, j)) + return false; + + /* FIXME: Check hash bits */ + p = asearch(&off, free, num_free, off_cmp); + if (!p) { + tdb->log(tdb, TDB_DEBUG_ERROR, + tdb->log_priv, + "tdb_check: Invalid offset" + " %llu in free table\n", + (long long)off); + return false; + } + /* Mark it invalid. */ + *p ^= 1; + num_found++; + prev = off; + } + } + } + if (num_found != num_free) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Not all entries are in free table\n"); + return false; + } + return true; +} + +/* FIXME: call check() function. */ +int tdb_check(struct tdb_context *tdb, + int (*check)(TDB_DATA key, TDB_DATA data, void *private_data), + void *private_data) +{ + tdb_off_t *free = NULL, *used = NULL, off; + tdb_len_t len; + size_t num_free = 0, num_used = 0; + bool hash_found = false, free_found = false; + + if (tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false) != 0) + return -1; + + update_header(tdb); + + if (!check_header(tdb)) + goto fail; + + /* First we do a linear scan, checking all records. */ + for (off = sizeof(struct tdb_header); + off < tdb->map_size; + off += len) { + union { + struct tdb_used_record u; + struct tdb_free_record f; + } pad, *p; + p = tdb_get(tdb, off, &pad, sizeof(pad)); + if (!p) + goto fail; + if (p->f.magic == TDB_FREE_MAGIC) { + /* This record is free! */ + if (!append(&free, &num_free, off)) + goto fail; + len = sizeof(p->u) + p->f.data_len; + if (tdb->methods->oob(tdb, off + len, false)) + goto fail; + } else { + uint64_t klen, dlen, extra; + + /* This record is used! */ + if (rec_magic(&p->u) != TDB_MAGIC) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: Bad magic 0x%llx" + " at offset %llu\n", + (long long)rec_magic(&p->u), + (long long)off); + goto fail; + } + + if (!append(&used, &num_used, off)) + goto fail; + + klen = rec_key_length(&p->u); + dlen = rec_data_length(&p->u); + extra = rec_extra_padding(&p->u); + + len = sizeof(p->u) + klen + dlen + extra; + if (tdb->methods->oob(tdb, off + len, false)) + goto fail; + + if (off + sizeof(p->u) == tdb->header.v.hash_off) { + hash_found = true; + } else if (off + sizeof(p->u) + == tdb->header.v.free_off) { + free_found = true; + } + } + } + + if (!hash_found) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: hash table not found at %llu\n", + (long long)tdb->header.v.hash_off); + goto fail; + } + + if (!free_found) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_check: free table not found at %llu\n", + (long long)tdb->header.v.free_off); + goto fail; + } + + /* FIXME: Check key uniqueness? */ + if (!check_hash_list(tdb, used, num_used)) + goto fail; + + if (!check_free_list(tdb, free, num_free)) + goto fail; + + tdb_allrecord_unlock(tdb, F_RDLCK); + return true; + +fail: + tdb_allrecord_unlock(tdb, F_RDLCK); + return false; +} diff --git a/ccan/tdb2/doc/design-1.3.txt b/ccan/tdb2/doc/design-1.3.txt new file mode 100644 index 00000000..651ada08 --- /dev/null +++ b/ccan/tdb2/doc/design-1.3.txt @@ -0,0 +1,1050 @@ +TDB2: A Redesigning The Trivial DataBase + +Rusty Russell, IBM Corporation + +27-April-2010 + +Abstract + +The Trivial DataBase on-disk format is 32 bits; with usage cases +heading towards the 4G limit, that must change. This required +breakage provides an opportunity to revisit TDB's other design +decisions and reassess them. + +1 Introduction + +The Trivial DataBase was originally written by Andrew Tridgell as +a simple key/data pair storage system with the same API as dbm, +but allowing multiple readers and writers while being small +enough (< 1000 lines of C) to include in SAMBA. The simple design +created in 1999 has proven surprisingly robust and performant, +used in Samba versions 3 and 4 as well as numerous other +projects. Its useful life was greatly increased by the +(backwards-compatible!) addition of transaction support in 2005. + +The wider variety and greater demands of TDB-using code has lead +to some organic growth of the API, as well as some compromises on +the implementation. None of these, by themselves, are seen as +show-stoppers, but the cumulative effect is to a loss of elegance +over the initial, simple TDB implementation. Here is a table of +the approximate number of lines of implementation code and number +of API functions at the end of each year: + + ++-----------+----------------+--------------------------------+ +| Year End | API Functions | Lines of C Code Implementation | ++-----------+----------------+--------------------------------+ ++-----------+----------------+--------------------------------+ +| 1999 | 13 | 1195 | ++-----------+----------------+--------------------------------+ +| 2000 | 24 | 1725 | ++-----------+----------------+--------------------------------+ +| 2001 | 32 | 2228 | ++-----------+----------------+--------------------------------+ +| 2002 | 35 | 2481 | ++-----------+----------------+--------------------------------+ +| 2003 | 35 | 2552 | ++-----------+----------------+--------------------------------+ +| 2004 | 40 | 2584 | ++-----------+----------------+--------------------------------+ +| 2005 | 38 | 2647 | ++-----------+----------------+--------------------------------+ +| 2006 | 52 | 3754 | ++-----------+----------------+--------------------------------+ +| 2007 | 66 | 4398 | ++-----------+----------------+--------------------------------+ +| 2008 | 71 | 4768 | ++-----------+----------------+--------------------------------+ +| 2009 | 73 | 5715 | ++-----------+----------------+--------------------------------+ + + +This review is an attempt to catalog and address all the known +issues with TDB and create solutions which address the problems +without significantly increasing complexity; all involved are far +too aware of the dangers of second system syndrome in rewriting a +successful project like this. + +2 API Issues + +2.1 tdb_open_ex Is Not Expandable + +The tdb_open() call was expanded to tdb_open_ex(), which added an +optional hashing function and an optional logging function +argument. Additional arguments to open would require the +introduction of a tdb_open_ex2 call etc. + +2.1.1 Proposed Solution + +tdb_open() will take a linked-list of attributes: + +enum tdb_attribute { + + TDB_ATTRIBUTE_LOG = 0, + + TDB_ATTRIBUTE_HASH = 1 + +}; + +struct tdb_attribute_base { + + enum tdb_attribute attr; + + union tdb_attribute *next; + +}; + +struct tdb_attribute_log { + + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG +*/ + + tdb_log_func log_fn; + + void *log_private; + +}; + +struct tdb_attribute_hash { + + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH +*/ + + tdb_hash_func hash_fn; + + void *hash_private; + +}; + +union tdb_attribute { + + struct tdb_attribute_base base; + + struct tdb_attribute_log log; + + struct tdb_attribute_hash hash; + +}; + +This allows future attributes to be added, even if this expands +the size of the union. + +2.2 tdb_traverse Makes Impossible Guarantees + +tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, +and it was thought that it was important to guarantee that all +records which exist at the start and end of the traversal would +be included, and no record would be included twice. + +This adds complexity (see[Reliable-Traversal-Adds]) and does not +work anyway for records which are altered (in particular, those +which are expanded may be effectively deleted and re-added behind +the traversal). + +2.2.1 Proposed Solution + +Abandon the guarantee. You will see every record if no changes +occur during your traversal, otherwise you will see some subset. +You can prevent changes by using a transaction or the locking +API. + +2.3 Nesting of Transactions Is Fraught + +TDB has alternated between allowing nested transactions and not +allowing them. Various paths in the Samba codebase assume that +transactions will nest, and in a sense they can: the operation is +only committed to disk when the outer transaction is committed. +There are two problems, however: + +1. Canceling the inner transaction will cause the outer + transaction commit to fail, and will not undo any operations + since the inner transaction began. This problem is soluble with + some additional internal code. + +2. An inner transaction commit can be cancelled by the outer + transaction. This is desirable in the way which Samba's + database initialization code uses transactions, but could be a + surprise to any users expecting a successful transaction commit + to expose changes to others. + +The current solution is to specify the behavior at tdb_open(), +with the default currently that nested transactions are allowed. +This flag can also be changed at runtime. + +2.3.1 Proposed Solution + +Given the usage patterns, it seems that the “least-surprise” +behavior of disallowing nested transactions should become the +default. Additionally, it seems the outer transaction is the only +code which knows whether inner transactions should be allowed, so +a flag to indicate this could be added to tdb_transaction_start. +However, this behavior can be simulated with a wrapper which uses +tdb_add_flags() and tdb_remove_flags(), so the API should not be +expanded for this relatively-obscure case. + +2.4 Incorrect Hash Function is Not Detected + +tdb_open_ex() allows the calling code to specify a different hash +function to use, but does not check that all other processes +accessing this tdb are using the same hash function. The result +is that records are missing from tdb_fetch(). + +2.4.1 Proposed Solution + +The header should contain an example hash result (eg. the hash of +0xdeadbeef), and tdb_open_ex() should check that the given hash +function produces the same answer, or fail the tdb_open call. + +2.5 tdb_set_max_dead/TDB_VOLATILE Expose Implementation + +In response to scalability issues with the free list ([TDB-Freelist-Is] +) two API workarounds have been incorporated in TDB: +tdb_set_max_dead() and the TDB_VOLATILE flag to tdb_open. The +latter actually calls the former with an argument of “5”. + +This code allows deleted records to accumulate without putting +them in the free list. On delete we iterate through each chain +and free them in a batch if there are more than max_dead entries. +These are never otherwise recycled except as a side-effect of a +tdb_repack. + +2.5.1 Proposed Solution + +With the scalability problems of the freelist solved, this API +can be removed. The TDB_VOLATILE flag may still be useful as a +hint that store and delete of records will be at least as common +as fetch in order to allow some internal tuning, but initially +will become a no-op. + +2.6 TDB Files Cannot Be Opened Multiple Times + In The Same Process + +No process can open the same TDB twice; we check and disallow it. +This is an unfortunate side-effect of fcntl locks, which operate +on a per-file rather than per-file-descriptor basis, and do not +nest. Thus, closing any file descriptor on a file clears all the +locks obtained by this process, even if they were placed using a +different file descriptor! + +Note that even if this were solved, deadlock could occur if +operations were nested: this is a more manageable programming +error in most cases. + +2.6.1 Proposed Solution + +We could lobby POSIX to fix the perverse rules, or at least lobby +Linux to violate them so that the most common implementation does +not have this restriction. This would be a generally good idea +for other fcntl lock users. + +Samba uses a wrapper which hands out the same tdb_context to +multiple callers if this happens, and does simple reference +counting. We should do this inside the tdb library, which already +emulates lock nesting internally; it would need to recognize when +deadlock occurs within a single process. This would create a new +failure mode for tdb operations (while we currently handle +locking failures, they are impossible in normal use and a process +encountering them can do little but give up). + +I do not see benefit in an additional tdb_open flag to indicate +whether re-opening is allowed, as though there may be some +benefit to adding a call to detect when a tdb_context is shared, +to allow other to create such an API. + +2.7 TDB API Is Not POSIX Thread-safe + +The TDB API uses an error code which can be queried after an +operation to determine what went wrong. This programming model +does not work with threads, unless specific additional guarantees +are given by the implementation. In addition, even +otherwise-independent threads cannot open the same TDB (as in [TDB-Files-Cannot] +). + +2.7.1 Proposed Solution + +Reachitecting the API to include a tdb_errcode pointer would be a +great deal of churn; we are better to guarantee that the +tdb_errcode is per-thread so the current programming model can be +maintained. + +This requires dynamic per-thread allocations, which is awkward +with POSIX threads (pthread_key_create space is limited and we +cannot simply allocate a key for every TDB). + +Internal locking is required to make sure that fcntl locks do not +overlap between threads, and also that the global list of tdbs is +maintained. + +The aim is that building tdb with -DTDB_PTHREAD will result in a +pthread-safe version of the library, and otherwise no overhead +will exist. + +2.8 *_nonblock Functions And *_mark Functions Expose + Implementation + +CTDB[footnote: +Clustered TDB, see http://ctdb.samba.org +] wishes to operate on TDB in a non-blocking manner. This is +currently done as follows: + +1. Call the _nonblock variant of an API function (eg. + tdb_lockall_nonblock). If this fails: + +2. Fork a child process, and wait for it to call the normal + variant (eg. tdb_lockall). + +3. If the child succeeds, call the _mark variant to indicate we + already have the locks (eg. tdb_lockall_mark). + +4. Upon completion, tell the child to release the locks (eg. + tdb_unlockall). + +5. Indicate to tdb that it should consider the locks removed (eg. + tdb_unlockall_mark). + +There are several issues with this approach. Firstly, adding two +new variants of each function clutters the API for an obscure +use, and so not all functions have three variants. Secondly, it +assumes that all paths of the functions ask for the same locks, +otherwise the parent process will have to get a lock which the +child doesn't have under some circumstances. I don't believe this +is currently the case, but it constrains the implementation. + +2.8.1 Proposed Solution + +Implement a hook for locking methods, so that the caller can +control the calls to create and remove fcntl locks. In this +scenario, ctdbd would operate as follows: + +1. Call the normal API function, eg tdb_lockall(). + +2. When the lock callback comes in, check if the child has the + lock. Initially, this is always false. If so, return 0. + Otherwise, try to obtain it in non-blocking mode. If that + fails, return EWOULDBLOCK. + +3. Release locks in the unlock callback as normal. + +4. If tdb_lockall() fails, see if we recorded a lock failure; if + so, call the child to repeat the operation. + +5. The child records what locks it obtains, and returns that + information to the parent. + +6. When the child has succeeded, goto 1. + +This is flexible enough to handle any potential locking scenario, +even when lock requirements change. It can be optimized so that +the parent does not release locks, just tells the child which +locks it doesn't need to obtain. + +It also keeps the complexity out of the API, and in ctdbd where +it is needed. + +2.9 tdb_chainlock Functions Expose Implementation + +tdb_chainlock locks some number of records, including the record +indicated by the given key. This gave atomicity guarantees; +no-one can start a transaction, alter, read or delete that key +while the lock is held. + +It also makes the same guarantee for any other key in the chain, +which is an internal implementation detail and potentially a +cause for deadlock. + +2.9.1 Proposed Solution + +None. It would be nice to have an explicit single entry lock +which effected no other keys. Unfortunately, this won't work for +an entry which doesn't exist. Thus while chainlock may be +implemented more efficiently for the existing case, it will still +have overlap issues with the non-existing case. So it is best to +keep the current (lack of) guarantee about which records will be +effected to avoid constraining our implementation. + +2.10 Signal Handling is Not Race-Free + +The tdb_setalarm_sigptr() call allows the caller's signal handler +to indicate that the tdb locking code should return with a +failure, rather than trying again when a signal is received (and +errno == EAGAIN). This is usually used to implement timeouts. + +Unfortunately, this does not work in the case where the signal is +received before the tdb code enters the fcntl() call to place the +lock: the code will sleep within the fcntl() code, unaware that +the signal wants it to exit. In the case of long timeouts, this +does not happen in practice. + +2.10.1 Proposed Solution + +The locking hooks proposed in[Proposed-Solution-locking-hook] +would allow the user to decide on whether to fail the lock +acquisition on a signal. This allows the caller to choose their +own compromise: they could narrow the race by checking +immediately before the fcntl call.[footnote: +It may be possible to make this race-free in some implementations +by having the signal handler alter the struct flock to make it +invalid. This will cause the fcntl() lock call to fail with +EINVAL if the signal occurs before the kernel is entered, +otherwise EAGAIN. +] + +2.11 The API Uses Gratuitous Typedefs, Capitals + +typedefs are useful for providing source compatibility when types +can differ across implementations, or arguably in the case of +function pointer definitions which are hard for humans to parse. +Otherwise it is simply obfuscation and pollutes the namespace. + +Capitalization is usually reserved for compile-time constants and +macros. + + TDB_CONTEXT There is no reason to use this over 'struct + tdb_context'; the definition isn't visible to the API user + anyway. + + TDB_DATA There is no reason to use this over struct TDB_DATA; + the struct needs to be understood by the API user. + + struct TDB_DATA This would normally be called 'struct + tdb_data'. + + enum TDB_ERROR Similarly, this would normally be enum + tdb_error. + +2.11.1 Proposed Solution + +None. Introducing lower case variants would please pedants like +myself, but if it were done the existing ones should be kept. +There is little point forcing a purely cosmetic change upon tdb +users. + +2.12 tdb_log_func Doesn't Take The + Private Pointer + +For API compatibility reasons, the logging function needs to call +tdb_get_logging_private() to retrieve the pointer registered by +the tdb_open_ex for logging. + +2.12.1 Proposed Solution + +It should simply take an extra argument, since we are prepared to +break the API/ABI. + +2.13 Various Callback Functions Are Not Typesafe + +The callback functions in tdb_set_logging_function (after [tdb_log_func-Doesnt-Take] + is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read +and tdb_check all take void * and must internally convert it to +the argument type they were expecting. + +If this type changes, the compiler will not produce warnings on +the callers, since it only sees void *. + +2.13.1 Proposed Solution + +With careful use of macros, we can create callback functions +which give a warning when used on gcc and the types of the +callback and its private argument differ. Unsupported compilers +will not give a warning, which is no worse than now. In addition, +the callbacks become clearer, as they need not use void * for +their parameter. + +See CCAN's typesafe_cb module at +http://ccan.ozlabs.org/info/typesafe_cb.html + +2.14 TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, + tdb_reopen_all Problematic + +The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB +file should be cleared if the caller discovers it is the only +process with the TDB open. However, if any caller does not +specify TDB_CLEAR_IF_FIRST it will not be detected, so will have +the TDB erased underneath them (usually resulting in a crash). + +There is a similar issue on fork(); if the parent exits (or +otherwise closes the tdb) before the child calls tdb_reopen_all() +to establish the lock used to indicate the TDB is opened by +someone, a TDB_CLEAR_IF_FIRST opener at that moment will believe +it alone has opened the TDB and will erase it. + +2.14.1 Proposed Solution + +Remove TDB_CLEAR_IF_FIRST. Other workarounds are possible, but +see [TDB_CLEAR_IF_FIRST-Imposes-Performance]. + +3 Performance And Scalability Issues + +3.1 TDB_CLEAR_IF_FIRST + Imposes Performance Penalty + +When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is +placed at offset 4 (aka. the ACTIVE_LOCK). While these locks +never conflict in normal tdb usage, they do add substantial +overhead for most fcntl lock implementations when the kernel +scans to detect if a lock conflict exists. This is often a single +linked list, making the time to acquire and release a fcntl lock +O(N) where N is the number of processes with the TDB open, not +the number actually doing work. + +In a Samba server it is common to have huge numbers of clients +sitting idle, and thus they have weaned themselves off the +TDB_CLEAR_IF_FIRST flag.[footnote: +There is a flag to tdb_reopen_all() which is used for this +optimization: if the parent process will outlive the child, the +child does not need the ACTIVE_LOCK. This is a workaround for +this very performance issue. +] + +3.1.1 Proposed Solution + +Remove the flag. It was a neat idea, but even trivial servers +tend to know when they are initializing for the first time and +can simply unlink the old tdb at that point. + +3.2 TDB Files Have a 4G Limit + +This seems to be becoming an issue (so much for “trivial”!), +particularly for ldb. + +3.2.1 Proposed Solution + +A new, incompatible TDB format which uses 64 bit offsets +internally rather than 32 bit as now. For simplicity of endian +conversion (which TDB does on the fly if required), all values +will be 64 bit on disk. In practice, some upper bits may be used +for other purposes, but at least 56 bits will be available for +file offsets. + +tdb_open() will automatically detect the old version, and even +create them if TDB_VERSION6 is specified to tdb_open. + +32 bit processes will still be able to access TDBs larger than 4G +(assuming that their off_t allows them to seek to 64 bits), they +will gracefully fall back as they fail to mmap. This can happen +already with large TDBs. + +Old versions of tdb will fail to open the new TDB files (since 28 +August 2009, commit 398d0c29290: prior to that any unrecognized +file format would be erased and initialized as a fresh tdb!) + +3.3 TDB Records Have a 4G Limit + +This has not been a reported problem, and the API uses size_t +which can be 64 bit on 64 bit platforms. However, other limits +may have made such an issue moot. + +3.3.1 Proposed Solution + +Record sizes will be 64 bit, with an error returned on 32 bit +platforms which try to access such records (the current +implementation would return TDB_ERR_OOM in a similar case). It +seems unlikely that 32 bit keys will be a limitation, so the +implementation may not support this (see [sub:Records-Incur-A]). + +3.4 Hash Size Is Determined At TDB Creation Time + +TDB contains a number of hash chains in the header; the number is +specified at creation time, and defaults to 131. This is such a +bottleneck on large databases (as each hash chain gets quite +long), that LDB uses 10,000 for this hash. In general it is +impossible to know what the 'right' answer is at database +creation time. + +3.4.1 Proposed Solution + +After comprehensive performance testing on various scalable hash +variants[footnote: +http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 +This was annoying because I was previously convinced that an +expanding tree of hashes would be very close to optimal. +], it became clear that it is hard to beat a straight linear hash +table which doubles in size when it reaches saturation. There are +three details which become important: + +1. On encountering a full bucket, we use the next bucket. + +2. Extra hash bits are stored with the offset, to reduce + comparisons. + +3. A marker entry is used on deleting an entry. + +The doubling of the table must be done under a transaction; we +will not reduce it on deletion, so it will be an unusual case. It +will either be placed at the head (other entries will be moved +out the way so we can expand). We could have a pointer in the +header to the current hashtable location, but that pointer would +have to be read frequently to check for hashtable moves. + +The locking for this is slightly more complex than the chained +case; we currently have one lock per bucket, and that means we +would need to expand the lock if we overflow to the next bucket. +The frequency of such collisions will effect our locking +heuristics: we can always lock more buckets than we need. + +One possible optimization is to only re-check the hash size on an +insert or a lookup miss. + +3.5 TDB Freelist Is Highly Contended + +TDB uses a single linked list for the free list. Allocation +occurs as follows, using heuristics which have evolved over time: + +1. Get the free list lock for this whole operation. + +2. Multiply length by 1.25, so we always over-allocate by 25%. + +3. Set the slack multiplier to 1. + +4. Examine the current freelist entry: if it is > length but < + the current best case, remember it as the best case. + +5. Multiply the slack multiplier by 1.05. + +6. If our best fit so far is less than length * slack multiplier, + return it. The slack will be turned into a new free record if + it's large enough. + +7. Otherwise, go onto the next freelist entry. + +Deleting a record occurs as follows: + +1. Lock the hash chain for this whole operation. + +2. Walk the chain to find the record, keeping the prev pointer + offset. + +3. If max_dead is non-zero: + + (a) Walk the hash chain again and count the dead records. + + (b) If it's more than max_dead, bulk free all the dead ones + (similar to steps 4 and below, but the lock is only obtained + once). + + (c) Simply mark this record as dead and return. + +4. Get the free list lock for the remainder of this operation. + +5. Examine the following block to see if it is + free; if so, enlarge the current block and remove that block + from the free list. This was disabled, as removal from the free + list was O(entries-in-free-list). + +6. Examine the preceeding block to see if it is free: for this + reason, each block has a 32-bit tailer which indicates its + length. If it is free, expand it to cover our new block and + return. + +7. Otherwise, prepend ourselves to the free list. + +Disabling right-merging (step [right-merging]) causes +fragmentation; the other heuristics proved insufficient to +address this, so the final answer to this was that when we expand +the TDB file inside a transaction commit, we repack the entire +tdb. + +The single list lock limits our allocation rate; due to the other +issues this is not currently seen as a bottleneck. + +3.5.1 Proposed Solution + +The first step is to remove all the current heuristics, as they +obviously interact, then examine them once the lock contention is +addressed. + +The free list must be split to reduce contention. Assuming +perfect free merging, we can at most have 1 free list entry for +each entry. This implies that the number of free lists is related +to the size of the hash table, but as it is rare to walk a large +number of free list entries we can use far fewer, say 1/32 of the +number of hash buckets. + +There are various benefits in using per-size free lists (see [sub:TDB-Becomes-Fragmented] +) but it's not clear this would reduce contention in the common +case where all processes are allocating/freeing the same size. +Thus we almost certainly need to divide in other ways: the most +obvious is to divide the file into zones, and using a free list +(or set of free lists) for each. This approximates address +ordering. + +Note that this means we need to split the free lists when we +expand the file; this is probably acceptable when we double the +hash table size, since that is such an expensive operation +already. In the case of increasing the file size, there is an +optimization we can use: if we use M in the formula above as the +file size rounded up to the next power of 2, we only need +reshuffle free lists when the file size crosses a power of 2 +boundary, and reshuffling the free lists is trivial: we simply +merge every consecutive pair of free lists. + +The basic algorithm is as follows. Freeing is simple: + +1. Identify the correct zone. + +2. Lock the corresponding list. + +3. Re-check the zone (we didn't have a lock, sizes could have + changed): relock if necessary. + +4. Place the freed entry in the list for that zone. + +Allocation is a little more complicated, as we perform delayed +coalescing at this point: + +1. Pick a zone either the zone we last freed into, or based on a “ + random” number. + +2. Lock the corresponding list. + +3. Re-check the zone: relock if necessary. + +4. If the top entry is -large enough, remove it from the list and + return it. + +5. Otherwise, coalesce entries in the list. + + (a) + + (b) + + (c) + + (d) + +6. If there was no entry large enough, unlock the list and try + the next zone. + +7. + +8. + +9. If no zone satisfies, expand the file. + +This optimizes rapid insert/delete of free list entries by not +coalescing them all the time.. First-fit address ordering +ordering seems to be fairly good for keeping fragmentation low +(see [sub:TDB-Becomes-Fragmented]). Note that address ordering +does not need a tailer to coalesce, though if we needed one we +could have one cheaply: see [sub:Records-Incur-A]. + + + +I anticipate that the number of entries in each free zone would +be small, but it might be worth using one free entry to hold +pointers to the others for cache efficiency. + +3.6 TDB Becomes Fragmented + +Much of this is a result of allocation strategy[footnote: +The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 +ftp://ftp.cs.utexas.edu/pub/garbage/malloc/ismm98.ps +] and deliberate hobbling of coalescing; internal fragmentation +(aka overallocation) is deliberately set at 25%, and external +fragmentation is only cured by the decision to repack the entire +db when a transaction commit needs to enlarge the file. + +3.6.1 Proposed Solution + +The 25% overhead on allocation works in practice for ldb because +indexes tend to expand by one record at a time. This internal +fragmentation can be resolved by having an “expanded” bit in the +header to note entries that have previously expanded, and +allocating more space for them. + +There are is a spectrum of possible solutions for external +fragmentation: one is to use a fragmentation-avoiding allocation +strategy such as best-fit address-order allocator. The other end +of the spectrum would be to use a bump allocator (very fast and +simple) and simply repack the file when we reach the end. + +There are three problems with efficient fragmentation-avoiding +allocators: they are non-trivial, they tend to use a single free +list for each size, and there's no evidence that tdb allocation +patterns will match those recorded for general allocators (though +it seems likely). + +Thus we don't spend too much effort on external fragmentation; we +will be no worse than the current code if we need to repack on +occasion. More effort is spent on reducing freelist contention, +and reducing overhead. + +3.7 Records Incur A 28-Byte Overhead + +Each TDB record has a header as follows: + +struct tdb_record { + + tdb_off_t next; /* offset of the next record in the list +*/ + + tdb_len_t rec_len; /* total byte length of record */ + + tdb_len_t key_len; /* byte length of key */ + + tdb_len_t data_len; /* byte length of data */ + + uint32_t full_hash; /* the full 32 bit hash of the key */ + + uint32_t magic; /* try to catch errors */ + + /* the following union is implied: + + union { + + char record[rec_len]; + + struct { + + char key[key_len]; + + char data[data_len]; + + } + + uint32_t totalsize; (tailer) + + } + + */ + +}; + +Naively, this would double to a 56-byte overhead on a 64 bit +implementation. + +3.7.1 Proposed Solution + +We can use various techniques to reduce this for an allocated +block: + +1. The 'next' pointer is not required, as we are using a flat + hash table. + +2. 'rec_len' can instead be expressed as an addition to key_len + and data_len (it accounts for wasted or overallocated length in + the record). Since the record length is always a multiple of 8, + we can conveniently fit it in 32 bits (representing up to 35 + bits). + +3. 'key_len' and 'data_len' can be reduced. I'm unwilling to + restrict 'data_len' to 32 bits, but instead we can combine the + two into one 64-bit field and using a 5 bit value which + indicates at what bit to divide the two. Keys are unlikely to + scale as fast as data, so I'm assuming a maximum key size of 32 + bits. + +4. 'full_hash' is used to avoid a memcmp on the “miss” case, but + this is diminishing returns after a handful of bits (at 10 + bits, it reduces 99.9% of false memcmp). As an aside, as the + lower bits are already incorporated in the hash table + resolution, the upper bits should be used here. + +5. 'magic' does not need to be enlarged: it currently reflects + one of 5 values (used, free, dead, recovery, and + unused_recovery). It is useful for quick sanity checking + however, and should not be eliminated. + +6. 'tailer' is only used to coalesce free blocks (so a block to + the right can find the header to check if this block is free). + This can be replaced by a single 'free' bit in the header of + the following block (and the tailer only exists in free + blocks).[footnote: +This technique from Thomas Standish. Data Structure Techniques. +Addison-Wesley, Reading, Massachusetts, 1980. +] The current proposed coalescing algorithm doesn't need this, + however. + +This produces a 16 byte used header like this: + +struct tdb_used_record { + + uint32_t magic : 16, + + prev_is_free: 1, + + key_data_divide: 5, + + top_hash: 10; + + uint32_t extra_octets; + + uint64_t key_and_data_len; + +}; + +And a free record like this: + +struct tdb_free_record { + + uint32_t free_magic; + + uint64_t total_length; + + ... + + uint64_t tailer; + +}; + + + +3.8 Transaction Commit Requires 4 fdatasync + +The current transaction algorithm is: + +1. write_recovery_data(); + +2. sync(); + +3. write_recovery_header(); + +4. sync(); + +5. overwrite_with_new_data(); + +6. sync(); + +7. remove_recovery_header(); + +8. sync(); + +On current ext3, each sync flushes all data to disk, so the next +3 syncs are relatively expensive. But this could become a +performance bottleneck on other filesystems such as ext4. + +3.8.1 Proposed Solution + + + + + + + + + +Neil Brown points out that this is overzealous, and only one sync +is needed: + +1. Bundle the recovery data, a transaction counter and a strong + checksum of the new data. + +2. Strong checksum that whole bundle. + +3. Store the bundle in the database. + +4. Overwrite the oldest of the two recovery pointers in the + header (identified using the transaction counter) with the + offset of this bundle. + +5. sync. + +6. Write the new data to the file. + +Checking for recovery means identifying the latest bundle with a +valid checksum and using the new data checksum to ensure that it +has been applied. This is more expensive than the current check, +but need only be done at open. For running databases, a separate +header field can be used to indicate a transaction in progress; +we need only check for recovery if this is set. + +3.9 TDB Does Not Have Snapshot Support + +3.9.1 Proposed Solution + +None. At some point you say “use a real database”. + +But as a thought experiment, if we implemented transactions to +only overwrite free entries (this is tricky: there must not be a +header in each entry which indicates whether it is free, but use +of presence in metadata elsewhere), and a pointer to the hash +table, we could create an entirely new commit without destroying +existing data. Then it would be easy to implement snapshots in a +similar way. + +This would not allow arbitrary changes to the database, such as +tdb_repack does, and would require more space (since we have to +preserve the current and future entries at once). If we used hash +trees rather than one big hash table, we might only have to +rewrite some sections of the hash, too. + +We could then implement snapshots using a similar method, using +multiple different hash tables/free tables. + +3.10 Transactions Cannot Operate in Parallel + +This would be useless for ldb, as it hits the index records with +just about every update. It would add significant complexity in +resolving clashes, and cause the all transaction callers to write +their code to loop in the case where the transactions spuriously +failed. + +3.10.1 Proposed Solution + +We could solve a small part of the problem by providing read-only +transactions. These would allow one write transaction to begin, +but it could not commit until all r/o transactions are done. This +would require a new RO_TRANSACTION_LOCK, which would be upgraded +on commit. + +3.11 Default Hash Function Is Suboptimal + +The Knuth-inspired multiplicative hash used by tdb is fairly slow +(especially if we expand it to 64 bits), and works best when the +hash bucket size is a prime number (which also means a slow +modulus). In addition, it is highly predictable which could +potentially lead to a Denial of Service attack in some TDB uses. + +3.11.1 Proposed Solution + +The Jenkins lookup3 hash[footnote: +http://burtleburtle.net/bob/c/lookup3.c +] is a fast and superbly-mixing hash. It's used by the Linux +kernel and almost everything else. This has the particular +properties that it takes an initial seed, and produces two 32 bit +hash numbers, which we can combine into a 64-bit hash. + +The seed should be created at tdb-creation time from some random +source, and placed in the header. This is far from foolproof, but +adds a little bit of protection against hash bombing. + +3.12 Reliable Traversal Adds Complexity + +We lock a record during traversal iteration, and try to grab that +lock in the delete code. If that grab on delete fails, we simply +mark it deleted and continue onwards; traversal checks for this +condition and does the delete when it moves off the record. + +If traversal terminates, the dead record may be left +indefinitely. + +3.12.1 Proposed Solution + +Remove reliability guarantees; see [traverse-Proposed-Solution]. + +3.13 Fcntl Locking Adds Overhead + +Placing a fcntl lock means a system call, as does removing one. +This is actually one reason why transactions can be faster +(everything is locked once at transaction start). In the +uncontended case, this overhead can theoretically be eliminated. + +3.13.1 Proposed Solution + +None. + +We tried this before with spinlock support, in the early days of +TDB, and it didn't make much difference except in manufactured +benchmarks. + +We could use spinlocks (with futex kernel support under Linux), +but it means that we lose automatic cleanup when a process dies +with a lock. There is a method of auto-cleanup under Linux, but +it's not supported by other operating systems. We could +reintroduce a clear-if-first-style lock and sweep for dead +futexes on open, but that wouldn't help the normal case of one +concurrent opener dying. Increasingly elaborate repair schemes +could be considered, but they require an ABI change (everyone +must use them) anyway, so there's no need to do this at the same +time as everything else. + diff --git a/ccan/tdb2/doc/design.lyx b/ccan/tdb2/doc/design.lyx new file mode 100644 index 00000000..51378c33 --- /dev/null +++ b/ccan/tdb2/doc/design.lyx @@ -0,0 +1,2282 @@ +#LyX 1.6.5 created this file. For more info see http://www.lyx.org/ +\lyxformat 345 +\begin_document +\begin_header +\textclass article +\use_default_options true +\language english +\inputencoding auto +\font_roman default +\font_sans default +\font_typewriter default +\font_default_family default +\font_sc false +\font_osf false +\font_sf_scale 100 +\font_tt_scale 100 + +\graphics default +\paperfontsize default +\use_hyperref false +\papersize default +\use_geometry false +\use_amsmath 1 +\use_esint 1 +\cite_engine basic +\use_bibtopic false +\paperorientation portrait +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\defskip medskip +\quotes_language english +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tracking_changes true +\output_changes true +\author "" +\author "" +\end_header + +\begin_body + +\begin_layout Title +TDB2: A Redesigning The Trivial DataBase +\end_layout + +\begin_layout Author +Rusty Russell, IBM Corporation +\end_layout + +\begin_layout Date +26-July-2010 +\end_layout + +\begin_layout Abstract +The Trivial DataBase on-disk format is 32 bits; with usage cases heading + towards the 4G limit, that must change. + This required breakage provides an opportunity to revisit TDB's other design + decisions and reassess them. +\end_layout + +\begin_layout Section +Introduction +\end_layout + +\begin_layout Standard +The Trivial DataBase was originally written by Andrew Tridgell as a simple + key/data pair storage system with the same API as dbm, but allowing multiple + readers and writers while being small enough (< 1000 lines of C) to include + in SAMBA. + The simple design created in 1999 has proven surprisingly robust and performant +, used in Samba versions 3 and 4 as well as numerous other projects. + Its useful life was greatly increased by the (backwards-compatible!) addition + of transaction support in 2005. +\end_layout + +\begin_layout Standard +The wider variety and greater demands of TDB-using code has lead to some + organic growth of the API, as well as some compromises on the implementation. + None of these, by themselves, are seen as show-stoppers, but the cumulative + effect is to a loss of elegance over the initial, simple TDB implementation. + Here is a table of the approximate number of lines of implementation code + and number of API functions at the end of each year: +\end_layout + +\begin_layout Standard +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +Year End +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +API Functions +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +Lines of C Code Implementation +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +1999 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +13 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1195 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2000 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +24 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1725 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2001 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +32 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2228 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2002 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +35 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2481 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2003 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +35 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2552 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2004 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +40 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2584 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2005 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +38 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2647 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2006 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +52 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +3754 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2007 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +66 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4398 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2008 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +71 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4768 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2009 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +73 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +5715 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Standard +This review is an attempt to catalog and address all the known issues with + TDB and create solutions which address the problems without significantly + increasing complexity; all involved are far too aware of the dangers of + second system syndrome in rewriting a successful project like this. +\end_layout + +\begin_layout Section +API Issues +\end_layout + +\begin_layout Subsection +tdb_open_ex Is Not Expandable +\end_layout + +\begin_layout Standard +The tdb_open() call was expanded to tdb_open_ex(), which added an optional + hashing function and an optional logging function argument. + Additional arguments to open would require the introduction of a tdb_open_ex2 + call etc. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +tdb_open() will take a linked-list of attributes: +\end_layout + +\begin_layout LyX-Code +enum tdb_attribute { +\end_layout + +\begin_layout LyX-Code + TDB_ATTRIBUTE_LOG = 0, +\end_layout + +\begin_layout LyX-Code + TDB_ATTRIBUTE_HASH = 1 +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_base { +\end_layout + +\begin_layout LyX-Code + enum tdb_attribute attr; +\end_layout + +\begin_layout LyX-Code + union tdb_attribute *next; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_log { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */ +\end_layout + +\begin_layout LyX-Code + tdb_log_func log_fn; +\end_layout + +\begin_layout LyX-Code + void *log_private; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_hash { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */ +\end_layout + +\begin_layout LyX-Code + tdb_hash_func hash_fn; +\end_layout + +\begin_layout LyX-Code + void *hash_private; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +union tdb_attribute { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_log log; +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_hash hash; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +This allows future attributes to be added, even if this expands the size + of the union. +\end_layout + +\begin_layout Subsection +tdb_traverse Makes Impossible Guarantees +\end_layout + +\begin_layout Standard +tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, and it + was thought that it was important to guarantee that all records which exist + at the start and end of the traversal would be included, and no record + would be included twice. +\end_layout + +\begin_layout Standard +This adds complexity (see +\begin_inset CommandInset ref +LatexCommand ref +reference "Reliable-Traversal-Adds" + +\end_inset + +) and does not work anyway for records which are altered (in particular, + those which are expanded may be effectively deleted and re-added behind + the traversal). +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "traverse-Proposed-Solution" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +Abandon the guarantee. + You will see every record if no changes occur during your traversal, otherwise + you will see some subset. + You can prevent changes by using a transaction or the locking API. +\end_layout + +\begin_layout Subsection +Nesting of Transactions Is Fraught +\end_layout + +\begin_layout Standard +TDB has alternated between allowing nested transactions and not allowing + them. + Various paths in the Samba codebase assume that transactions will nest, + and in a sense they can: the operation is only committed to disk when the + outer transaction is committed. + There are two problems, however: +\end_layout + +\begin_layout Enumerate +Canceling the inner transaction will cause the outer transaction commit + to fail, and will not undo any operations since the inner transaction began. + This problem is soluble with some additional internal code. +\end_layout + +\begin_layout Enumerate +An inner transaction commit can be cancelled by the outer transaction. + This is desirable in the way which Samba's database initialization code + uses transactions, but could be a surprise to any users expecting a successful + transaction commit to expose changes to others. +\end_layout + +\begin_layout Standard +The current solution is to specify the behavior at tdb_open(), with the + default currently that nested transactions are allowed. + This flag can also be changed at runtime. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Given the usage patterns, it seems that the +\begin_inset Quotes eld +\end_inset + +least-surprise +\begin_inset Quotes erd +\end_inset + + behavior of disallowing nested transactions should become the default. + Additionally, it seems the outer transaction is the only code which knows + whether inner transactions should be allowed, so a flag to indicate this + could be added to tdb_transaction_start. + However, this behavior can be simulated with a wrapper which uses tdb_add_flags +() and tdb_remove_flags(), so the API should not be expanded for this relatively +-obscure case. +\end_layout + +\begin_layout Subsection +Incorrect Hash Function is Not Detected +\end_layout + +\begin_layout Standard +tdb_open_ex() allows the calling code to specify a different hash function + to use, but does not check that all other processes accessing this tdb + are using the same hash function. + The result is that records are missing from tdb_fetch(). +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The header should contain an example hash result (eg. + the hash of 0xdeadbeef), and tdb_open_ex() should check that the given + hash function produces the same answer, or fail the tdb_open call. +\end_layout + +\begin_layout Subsection +tdb_set_max_dead/TDB_VOLATILE Expose Implementation +\end_layout + +\begin_layout Standard +In response to scalability issues with the free list ( +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB-Freelist-Is" + +\end_inset + +) two API workarounds have been incorporated in TDB: tdb_set_max_dead() + and the TDB_VOLATILE flag to tdb_open. + The latter actually calls the former with an argument of +\begin_inset Quotes eld +\end_inset + +5 +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +This code allows deleted records to accumulate without putting them in the + free list. + On delete we iterate through each chain and free them in a batch if there + are more than max_dead entries. + These are never otherwise recycled except as a side-effect of a tdb_repack. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +With the scalability problems of the freelist solved, this API can be removed. + The TDB_VOLATILE flag may still be useful as a hint that store and delete + of records will be at least as common as fetch in order to allow some internal + tuning, but initially will become a no-op. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB-Files-Cannot" + +\end_inset + +TDB Files Cannot Be Opened Multiple Times In The Same Process +\end_layout + +\begin_layout Standard +No process can open the same TDB twice; we check and disallow it. + This is an unfortunate side-effect of fcntl locks, which operate on a per-file + rather than per-file-descriptor basis, and do not nest. + Thus, closing any file descriptor on a file clears all the locks obtained + by this process, even if they were placed using a different file descriptor! +\end_layout + +\begin_layout Standard +Note that even if this were solved, deadlock could occur if operations were + nested: this is a more manageable programming error in most cases. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We could lobby POSIX to fix the perverse rules, or at least lobby Linux + to violate them so that the most common implementation does not have this + restriction. + This would be a generally good idea for other fcntl lock users. +\end_layout + +\begin_layout Standard +Samba uses a wrapper which hands out the same tdb_context to multiple callers + if this happens, and does simple reference counting. + We should do this inside the tdb library, which already emulates lock nesting + internally; it would need to recognize when deadlock occurs within a single + process. + This would create a new failure mode for tdb operations (while we currently + handle locking failures, they are impossible in normal use and a process + encountering them can do little but give up). +\end_layout + +\begin_layout Standard +I do not see benefit in an additional tdb_open flag to indicate whether + re-opening is allowed, as though there may be some benefit to adding a + call to detect when a tdb_context is shared, to allow other to create such + an API. +\end_layout + +\begin_layout Subsection +TDB API Is Not POSIX Thread-safe +\end_layout + +\begin_layout Standard +The TDB API uses an error code which can be queried after an operation to + determine what went wrong. + This programming model does not work with threads, unless specific additional + guarantees are given by the implementation. + In addition, even otherwise-independent threads cannot open the same TDB + (as in +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB-Files-Cannot" + +\end_inset + +). +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Reachitecting the API to include a tdb_errcode pointer would be a great + deal of churn; we are better to guarantee that the tdb_errcode is per-thread + so the current programming model can be maintained. +\end_layout + +\begin_layout Standard +This requires dynamic per-thread allocations, which is awkward with POSIX + threads (pthread_key_create space is limited and we cannot simply allocate + a key for every TDB). +\end_layout + +\begin_layout Standard +Internal locking is required to make sure that fcntl locks do not overlap + between threads, and also that the global list of tdbs is maintained. +\end_layout + +\begin_layout Standard +The aim is that building tdb with -DTDB_PTHREAD will result in a pthread-safe + version of the library, and otherwise no overhead will exist. +\end_layout + +\begin_layout Subsection +*_nonblock Functions And *_mark Functions Expose Implementation +\end_layout + +\begin_layout Standard +CTDB +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +Clustered TDB, see http://ctdb.samba.org +\end_layout + +\end_inset + + wishes to operate on TDB in a non-blocking manner. + This is currently done as follows: +\end_layout + +\begin_layout Enumerate +Call the _nonblock variant of an API function (eg. + tdb_lockall_nonblock). + If this fails: +\end_layout + +\begin_layout Enumerate +Fork a child process, and wait for it to call the normal variant (eg. + tdb_lockall). +\end_layout + +\begin_layout Enumerate +If the child succeeds, call the _mark variant to indicate we already have + the locks (eg. + tdb_lockall_mark). +\end_layout + +\begin_layout Enumerate +Upon completion, tell the child to release the locks (eg. + tdb_unlockall). +\end_layout + +\begin_layout Enumerate +Indicate to tdb that it should consider the locks removed (eg. + tdb_unlockall_mark). +\end_layout + +\begin_layout Standard +There are several issues with this approach. + Firstly, adding two new variants of each function clutters the API for + an obscure use, and so not all functions have three variants. + Secondly, it assumes that all paths of the functions ask for the same locks, + otherwise the parent process will have to get a lock which the child doesn't + have under some circumstances. + I don't believe this is currently the case, but it constrains the implementatio +n. + +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "Proposed-Solution-locking-hook" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +Implement a hook for locking methods, so that the caller can control the + calls to create and remove fcntl locks. + In this scenario, ctdbd would operate as follows: +\end_layout + +\begin_layout Enumerate +Call the normal API function, eg tdb_lockall(). +\end_layout + +\begin_layout Enumerate +When the lock callback comes in, check if the child has the lock. + Initially, this is always false. + If so, return 0. + Otherwise, try to obtain it in non-blocking mode. + If that fails, return EWOULDBLOCK. +\end_layout + +\begin_layout Enumerate +Release locks in the unlock callback as normal. +\end_layout + +\begin_layout Enumerate +If tdb_lockall() fails, see if we recorded a lock failure; if so, call the + child to repeat the operation. +\end_layout + +\begin_layout Enumerate +The child records what locks it obtains, and returns that information to + the parent. +\end_layout + +\begin_layout Enumerate +When the child has succeeded, goto 1. +\end_layout + +\begin_layout Standard +This is flexible enough to handle any potential locking scenario, even when + lock requirements change. + It can be optimized so that the parent does not release locks, just tells + the child which locks it doesn't need to obtain. +\end_layout + +\begin_layout Standard +It also keeps the complexity out of the API, and in ctdbd where it is needed. +\end_layout + +\begin_layout Subsection +tdb_chainlock Functions Expose Implementation +\end_layout + +\begin_layout Standard +tdb_chainlock locks some number of records, including the record indicated + by the given key. + This gave atomicity guarantees; no-one can start a transaction, alter, + read or delete that key while the lock is held. +\end_layout + +\begin_layout Standard +It also makes the same guarantee for any other key in the chain, which is + an internal implementation detail and potentially a cause for deadlock. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + It would be nice to have an explicit single entry lock which effected no + other keys. + Unfortunately, this won't work for an entry which doesn't exist. + Thus while chainlock may be implemented more efficiently for the existing + case, it will still have overlap issues with the non-existing case. + So it is best to keep the current (lack of) guarantee about which records + will be effected to avoid constraining our implementation. +\end_layout + +\begin_layout Subsection +Signal Handling is Not Race-Free +\end_layout + +\begin_layout Standard +The tdb_setalarm_sigptr() call allows the caller's signal handler to indicate + that the tdb locking code should return with a failure, rather than trying + again when a signal is received (and errno == EAGAIN). + This is usually used to implement timeouts. +\end_layout + +\begin_layout Standard +Unfortunately, this does not work in the case where the signal is received + before the tdb code enters the fcntl() call to place the lock: the code + will sleep within the fcntl() code, unaware that the signal wants it to + exit. + In the case of long timeouts, this does not happen in practice. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The locking hooks proposed in +\begin_inset CommandInset ref +LatexCommand ref +reference "Proposed-Solution-locking-hook" + +\end_inset + + would allow the user to decide on whether to fail the lock acquisition + on a signal. + This allows the caller to choose their own compromise: they could narrow + the race by checking immediately before the fcntl call. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +It may be possible to make this race-free in some implementations by having + the signal handler alter the struct flock to make it invalid. + This will cause the fcntl() lock call to fail with EINVAL if the signal + occurs before the kernel is entered, otherwise EAGAIN. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsection +The API Uses Gratuitous Typedefs, Capitals +\end_layout + +\begin_layout Standard +typedefs are useful for providing source compatibility when types can differ + across implementations, or arguably in the case of function pointer definitions + which are hard for humans to parse. + Otherwise it is simply obfuscation and pollutes the namespace. +\end_layout + +\begin_layout Standard +Capitalization is usually reserved for compile-time constants and macros. +\end_layout + +\begin_layout Description +TDB_CONTEXT There is no reason to use this over 'struct tdb_context'; the + definition isn't visible to the API user anyway. +\end_layout + +\begin_layout Description +TDB_DATA There is no reason to use this over struct TDB_DATA; the struct + needs to be understood by the API user. +\end_layout + +\begin_layout Description +struct +\begin_inset space ~ +\end_inset + +TDB_DATA This would normally be called 'struct tdb_data'. +\end_layout + +\begin_layout Description +enum +\begin_inset space ~ +\end_inset + +TDB_ERROR Similarly, this would normally be enum tdb_error. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + Introducing lower case variants would please pedants like myself, but if + it were done the existing ones should be kept. + There is little point forcing a purely cosmetic change upon tdb users. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "tdb_log_func-Doesnt-Take" + +\end_inset + +tdb_log_func Doesn't Take The Private Pointer +\end_layout + +\begin_layout Standard +For API compatibility reasons, the logging function needs to call tdb_get_loggin +g_private() to retrieve the pointer registered by the tdb_open_ex for logging. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +It should simply take an extra argument, since we are prepared to break + the API/ABI. +\end_layout + +\begin_layout Subsection +Various Callback Functions Are Not Typesafe +\end_layout + +\begin_layout Standard +The callback functions in tdb_set_logging_function (after +\begin_inset CommandInset ref +LatexCommand ref +reference "tdb_log_func-Doesnt-Take" + +\end_inset + + is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read and tdb_check + all take void * and must internally convert it to the argument type they + were expecting. +\end_layout + +\begin_layout Standard +If this type changes, the compiler will not produce warnings on the callers, + since it only sees void *. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +With careful use of macros, we can create callback functions which give + a warning when used on gcc and the types of the callback and its private + argument differ. + Unsupported compilers will not give a warning, which is no worse than now. + In addition, the callbacks become clearer, as they need not use void * + for their parameter. +\end_layout + +\begin_layout Standard +See CCAN's typesafe_cb module at http://ccan.ozlabs.org/info/typesafe_cb.html +\end_layout + +\begin_layout Subsection +TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, tdb_reopen_all Problematic +\end_layout + +\begin_layout Standard +The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB file should + be cleared if the caller discovers it is the only process with the TDB + open. + However, if any caller does not specify TDB_CLEAR_IF_FIRST it will not + be detected, so will have the TDB erased underneath them (usually resulting + in a crash). +\end_layout + +\begin_layout Standard +There is a similar issue on fork(); if the parent exits (or otherwise closes + the tdb) before the child calls tdb_reopen_all() to establish the lock + used to indicate the TDB is opened by someone, a TDB_CLEAR_IF_FIRST opener + at that moment will believe it alone has opened the TDB and will erase + it. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove TDB_CLEAR_IF_FIRST. + Other workarounds are possible, but see +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB_CLEAR_IF_FIRST-Imposes-Performance" + +\end_inset + +. +\end_layout + +\begin_layout Section +Performance And Scalability Issues +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB_CLEAR_IF_FIRST-Imposes-Performance" + +\end_inset + +TDB_CLEAR_IF_FIRST Imposes Performance Penalty +\end_layout + +\begin_layout Standard +When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is placed at offset + 4 (aka. + the ACTIVE_LOCK). + While these locks never conflict in normal tdb usage, they do add substantial + overhead for most fcntl lock implementations when the kernel scans to detect + if a lock conflict exists. + This is often a single linked list, making the time to acquire and release + a fcntl lock O(N) where N is the number of processes with the TDB open, + not the number actually doing work. +\end_layout + +\begin_layout Standard +In a Samba server it is common to have huge numbers of clients sitting idle, + and thus they have weaned themselves off the TDB_CLEAR_IF_FIRST flag. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +There is a flag to tdb_reopen_all() which is used for this optimization: + if the parent process will outlive the child, the child does not need the + ACTIVE_LOCK. + This is a workaround for this very performance issue. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove the flag. + It was a neat idea, but even trivial servers tend to know when they are + initializing for the first time and can simply unlink the old tdb at that + point. +\end_layout + +\begin_layout Subsection +TDB Files Have a 4G Limit +\end_layout + +\begin_layout Standard +This seems to be becoming an issue (so much for +\begin_inset Quotes eld +\end_inset + +trivial +\begin_inset Quotes erd +\end_inset + +!), particularly for ldb. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +A new, incompatible TDB format which uses 64 bit offsets internally rather + than 32 bit as now. + For simplicity of endian conversion (which TDB does on the fly if required), + all values will be 64 bit on disk. + In practice, some upper bits may be used for other purposes, but at least + 56 bits will be available for file offsets. +\end_layout + +\begin_layout Standard +tdb_open() will automatically detect the old version, and even create them + if TDB_VERSION6 is specified to tdb_open. +\end_layout + +\begin_layout Standard +32 bit processes will still be able to access TDBs larger than 4G (assuming + that their off_t allows them to seek to 64 bits), they will gracefully + fall back as they fail to mmap. + This can happen already with large TDBs. +\end_layout + +\begin_layout Standard +Old versions of tdb will fail to open the new TDB files (since 28 August + 2009, commit 398d0c29290: prior to that any unrecognized file format would + be erased and initialized as a fresh tdb!) +\end_layout + +\begin_layout Subsection +TDB Records Have a 4G Limit +\end_layout + +\begin_layout Standard +This has not been a reported problem, and the API uses size_t which can + be 64 bit on 64 bit platforms. + However, other limits may have made such an issue moot. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Record sizes will be 64 bit, with an error returned on 32 bit platforms + which try to access such records (the current implementation would return + TDB_ERR_OOM in a similar case). + It seems unlikely that 32 bit keys will be a limitation, so the implementation + may not support this (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Records-Incur-A" + +\end_inset + +). +\end_layout + +\begin_layout Subsection +Hash Size Is Determined At TDB Creation Time +\end_layout + +\begin_layout Standard +TDB contains a number of hash chains in the header; the number is specified + at creation time, and defaults to 131. + This is such a bottleneck on large databases (as each hash chain gets quite + long), that LDB uses 10,000 for this hash. + In general it is impossible to know what the 'right' answer is at database + creation time. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +After comprehensive performance testing on various scalable hash variants +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 This was annoying + because I was previously convinced that an expanding tree of hashes would + be very close to optimal. +\end_layout + +\end_inset + +, it became clear that it is hard to beat a straight linear hash table which + doubles in size when it reaches saturation. + There are three details which become important: +\end_layout + +\begin_layout Enumerate +On encountering a full bucket, we use the next bucket. +\end_layout + +\begin_layout Enumerate +Extra hash bits are stored with the offset, to reduce comparisons. +\end_layout + +\begin_layout Enumerate +A marker entry is used on deleting an entry. +\end_layout + +\begin_layout Standard +The doubling of the table must be done under a transaction; we will not + reduce it on deletion, so it will be an unusual case. + It will either be placed at the head (other entries will be moved out the + way so we can expand). + We could have a pointer in the header to the current hashtable location, + but that pointer would have to be read frequently to check for hashtable + moves. +\end_layout + +\begin_layout Standard +The locking for this is slightly more complex than the chained case; we + currently have one lock per bucket, and that means we would need to expand + the lock if we overflow to the next bucket. + The frequency of such collisions will effect our locking heuristics: we + can always lock more buckets than we need. +\end_layout + +\begin_layout Standard +One possible optimization is to only re-check the hash size on an insert + or a lookup miss. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB-Freelist-Is" + +\end_inset + +TDB Freelist Is Highly Contended +\end_layout + +\begin_layout Standard +TDB uses a single linked list for the free list. + Allocation occurs as follows, using heuristics which have evolved over + time: +\end_layout + +\begin_layout Enumerate +Get the free list lock for this whole operation. +\end_layout + +\begin_layout Enumerate +Multiply length by 1.25, so we always over-allocate by 25%. +\end_layout + +\begin_layout Enumerate +Set the slack multiplier to 1. +\end_layout + +\begin_layout Enumerate +Examine the current freelist entry: if it is > length but < the current + best case, remember it as the best case. +\end_layout + +\begin_layout Enumerate +Multiply the slack multiplier by 1.05. +\end_layout + +\begin_layout Enumerate +If our best fit so far is less than length * slack multiplier, return it. + The slack will be turned into a new free record if it's large enough. +\end_layout + +\begin_layout Enumerate +Otherwise, go onto the next freelist entry. +\end_layout + +\begin_layout Standard +Deleting a record occurs as follows: +\end_layout + +\begin_layout Enumerate +Lock the hash chain for this whole operation. +\end_layout + +\begin_layout Enumerate +Walk the chain to find the record, keeping the prev pointer offset. +\end_layout + +\begin_layout Enumerate +If max_dead is non-zero: +\end_layout + +\begin_deeper +\begin_layout Enumerate +Walk the hash chain again and count the dead records. +\end_layout + +\begin_layout Enumerate +If it's more than max_dead, bulk free all the dead ones (similar to steps + 4 and below, but the lock is only obtained once). +\end_layout + +\begin_layout Enumerate +Simply mark this record as dead and return. + +\end_layout + +\end_deeper +\begin_layout Enumerate +Get the free list lock for the remainder of this operation. +\end_layout + +\begin_layout Enumerate +\begin_inset CommandInset label +LatexCommand label +name "right-merging" + +\end_inset + +Examine the following block to see if it is free; if so, enlarge the current + block and remove that block from the free list. + This was disabled, as removal from the free list was O(entries-in-free-list). +\end_layout + +\begin_layout Enumerate +Examine the preceeding block to see if it is free: for this reason, each + block has a 32-bit tailer which indicates its length. + If it is free, expand it to cover our new block and return. +\end_layout + +\begin_layout Enumerate +Otherwise, prepend ourselves to the free list. +\end_layout + +\begin_layout Standard +Disabling right-merging (step +\begin_inset CommandInset ref +LatexCommand ref +reference "right-merging" + +\end_inset + +) causes fragmentation; the other heuristics proved insufficient to address + this, so the final answer to this was that when we expand the TDB file + inside a transaction commit, we repack the entire tdb. +\end_layout + +\begin_layout Standard +The single list lock limits our allocation rate; due to the other issues + this is not currently seen as a bottleneck. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The first step is to remove all the current heuristics, as they obviously + interact, then examine them once the lock contention is addressed. +\end_layout + +\begin_layout Standard +The free list must be split to reduce contention. + Assuming perfect free merging, we can at most have 1 free list entry for + each entry. + This implies that the number of free lists is related to the size of the + hash table, but as it is rare to walk a large number of free list entries + we can use far fewer, say 1/32 of the number of hash buckets. +\end_layout + +\begin_layout Standard +There are various benefits in using per-size free lists (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Becomes-Fragmented" + +\end_inset + +) but it's not clear this would reduce contention in the common case where + all processes are allocating/freeing the same size. + Thus we almost certainly need to divide in other ways: the most obvious + is to divide the file into zones, and using a free list (or set of free + lists) for each. + This approximates address ordering. +\end_layout + +\begin_layout Standard +Note that this means we need to split the free lists when we expand the + file; this is probably acceptable when we double the hash table size, since + that is such an expensive operation already. + In the case of increasing the file size, there is an optimization we can + use: if we use M in the formula above as the file size rounded up to the + next power of 2, we only need reshuffle free lists when the file size crosses + a power of 2 boundary, +\emph on +and +\emph default +reshuffling the free lists is trivial: we simply merge every consecutive + pair of free lists. +\end_layout + +\begin_layout Standard +The basic algorithm is as follows. + Freeing is simple: +\end_layout + +\begin_layout Enumerate +Identify the correct zone. +\end_layout + +\begin_layout Enumerate +Lock the corresponding list. +\end_layout + +\begin_layout Enumerate +Re-check the zone (we didn't have a lock, sizes could have changed): relock + if necessary. +\end_layout + +\begin_layout Enumerate +Place the freed entry in the list for that zone. +\end_layout + +\begin_layout Standard +Allocation is a little more complicated, as we perform delayed coalescing + at this point: +\end_layout + +\begin_layout Enumerate +Pick a zone either the zone we last freed into, or based on a +\begin_inset Quotes eld +\end_inset + +random +\begin_inset Quotes erd +\end_inset + + number. +\end_layout + +\begin_layout Enumerate +Lock the corresponding list. +\end_layout + +\begin_layout Enumerate +Re-check the zone: relock if necessary. +\end_layout + +\begin_layout Enumerate +If the top entry is -large enough, remove it from the list and return it. +\end_layout + +\begin_layout Enumerate +Otherwise, coalesce entries in the list.If there was no entry large enough, + unlock the list and try the next zone. +\end_layout + +\begin_layout Enumerate +If no zone satisfies, expand the file. +\end_layout + +\begin_layout Standard +This optimizes rapid insert/delete of free list entries by not coalescing + them all the time.. + First-fit address ordering ordering seems to be fairly good for keeping + fragmentation low (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Becomes-Fragmented" + +\end_inset + +). + Note that address ordering does not need a tailer to coalesce, though if + we needed one we could have one cheaply: see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Records-Incur-A" + +\end_inset + +. + +\end_layout + +\begin_layout Standard +I anticipate that the number of entries in each free zone would be small, + but it might be worth using one free entry to hold pointers to the others + for cache efficiency. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:TDB-Becomes-Fragmented" + +\end_inset + +TDB Becomes Fragmented +\end_layout + +\begin_layout Standard +Much of this is a result of allocation strategy +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 ftp://ftp.cs.ute +xas.edu/pub/garbage/malloc/ismm98.ps +\end_layout + +\end_inset + + and deliberate hobbling of coalescing; internal fragmentation (aka overallocati +on) is deliberately set at 25%, and external fragmentation is only cured + by the decision to repack the entire db when a transaction commit needs + to enlarge the file. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The 25% overhead on allocation works in practice for ldb because indexes + tend to expand by one record at a time. + This internal fragmentation can be resolved by having an +\begin_inset Quotes eld +\end_inset + +expanded +\begin_inset Quotes erd +\end_inset + + bit in the header to note entries that have previously expanded, and allocating + more space for them. +\end_layout + +\begin_layout Standard +There are is a spectrum of possible solutions for external fragmentation: + one is to use a fragmentation-avoiding allocation strategy such as best-fit + address-order allocator. + The other end of the spectrum would be to use a bump allocator (very fast + and simple) and simply repack the file when we reach the end. +\end_layout + +\begin_layout Standard +There are three problems with efficient fragmentation-avoiding allocators: + they are non-trivial, they tend to use a single free list for each size, + and there's no evidence that tdb allocation patterns will match those recorded + for general allocators (though it seems likely). +\end_layout + +\begin_layout Standard +Thus we don't spend too much effort on external fragmentation; we will be + no worse than the current code if we need to repack on occasion. + More effort is spent on reducing freelist contention, and reducing overhead. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:Records-Incur-A" + +\end_inset + +Records Incur A 28-Byte Overhead +\end_layout + +\begin_layout Standard +Each TDB record has a header as follows: +\end_layout + +\begin_layout LyX-Code +struct tdb_record { +\end_layout + +\begin_layout LyX-Code + tdb_off_t next; /* offset of the next record in the list */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t rec_len; /* total byte length of record */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t key_len; /* byte length of key */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t data_len; /* byte length of data */ +\end_layout + +\begin_layout LyX-Code + uint32_t full_hash; /* the full 32 bit hash of the key */ +\end_layout + +\begin_layout LyX-Code + uint32_t magic; /* try to catch errors */ +\end_layout + +\begin_layout LyX-Code + /* the following union is implied: +\end_layout + +\begin_layout LyX-Code + union { +\end_layout + +\begin_layout LyX-Code + char record[rec_len]; +\end_layout + +\begin_layout LyX-Code + struct { +\end_layout + +\begin_layout LyX-Code + char key[key_len]; +\end_layout + +\begin_layout LyX-Code + char data[data_len]; +\end_layout + +\begin_layout LyX-Code + } +\end_layout + +\begin_layout LyX-Code + uint32_t totalsize; (tailer) +\end_layout + +\begin_layout LyX-Code + } +\end_layout + +\begin_layout LyX-Code + */ +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +Naively, this would double to a 56-byte overhead on a 64 bit implementation. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We can use various techniques to reduce this for an allocated block: +\end_layout + +\begin_layout Enumerate +The 'next' pointer is not required, as we are using a flat hash table. +\end_layout + +\begin_layout Enumerate +'rec_len' can instead be expressed as an addition to key_len and data_len + (it accounts for wasted or overallocated length in the record). + Since the record length is always a multiple of 8, we can conveniently + fit it in 32 bits (representing up to 35 bits). +\end_layout + +\begin_layout Enumerate +'key_len' and 'data_len' can be reduced. + I'm unwilling to restrict 'data_len' to 32 bits, but instead we can combine + the two into one 64-bit field and using a 5 bit value which indicates at + what bit to divide the two. + Keys are unlikely to scale as fast as data, so I'm assuming a maximum key + size of 32 bits. +\end_layout + +\begin_layout Enumerate +'full_hash' is used to avoid a memcmp on the +\begin_inset Quotes eld +\end_inset + +miss +\begin_inset Quotes erd +\end_inset + + case, but this is diminishing returns after a handful of bits (at 10 bits, + it reduces 99.9% of false memcmp). + As an aside, as the lower bits are already incorporated in the hash table + resolution, the upper bits should be used here. +\end_layout + +\begin_layout Enumerate +'magic' does not need to be enlarged: it currently reflects one of 5 values + (used, free, dead, recovery, and unused_recovery). + It is useful for quick sanity checking however, and should not be eliminated. +\end_layout + +\begin_layout Enumerate +'tailer' is only used to coalesce free blocks (so a block to the right can + find the header to check if this block is free). + This can be replaced by a single 'free' bit in the header of the following + block (and the tailer only exists in free blocks). +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +This technique from Thomas Standish. + Data Structure Techniques. + Addison-Wesley, Reading, Massachusetts, 1980. +\end_layout + +\end_inset + + The current proposed coalescing algorithm doesn't need this, however. +\end_layout + +\begin_layout Standard +This produces a 16 byte used header like this: +\end_layout + +\begin_layout LyX-Code +struct tdb_used_record { +\end_layout + +\begin_layout LyX-Code + uint32_t magic : 16, +\end_layout + +\begin_layout LyX-Code + prev_is_free: 1, +\end_layout + +\begin_layout LyX-Code + key_data_divide: 5, +\end_layout + +\begin_layout LyX-Code + top_hash: 10; +\end_layout + +\begin_layout LyX-Code + uint32_t extra_octets; +\end_layout + +\begin_layout LyX-Code + uint64_t key_and_data_len; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +And a free record like this: +\end_layout + +\begin_layout LyX-Code +struct tdb_free_record { +\end_layout + +\begin_layout LyX-Code + uint32_t free_magic; +\end_layout + +\begin_layout LyX-Code + uint64_t total_length; +\end_layout + +\begin_layout LyX-Code + ... +\end_layout + +\begin_layout LyX-Code + uint64_t tailer; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code + +\end_layout + +\begin_layout Subsection +Transaction Commit Requires 4 fdatasync +\end_layout + +\begin_layout Standard +The current transaction algorithm is: +\end_layout + +\begin_layout Enumerate +write_recovery_data(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +write_recovery_header(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +overwrite_with_new_data(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +remove_recovery_header(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Standard +On current ext3, each sync flushes all data to disk, so the next 3 syncs + are relatively expensive. + But this could become a performance bottleneck on other filesystems such + as ext4. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Neil Brown points out that this is overzealous, and only one sync is needed: +\end_layout + +\begin_layout Enumerate +Bundle the recovery data, a transaction counter and a strong checksum of + the new data. +\end_layout + +\begin_layout Enumerate +Strong checksum that whole bundle. +\end_layout + +\begin_layout Enumerate +Store the bundle in the database. +\end_layout + +\begin_layout Enumerate +Overwrite the oldest of the two recovery pointers in the header (identified + using the transaction counter) with the offset of this bundle. +\end_layout + +\begin_layout Enumerate +sync. +\end_layout + +\begin_layout Enumerate +Write the new data to the file. +\end_layout + +\begin_layout Standard +Checking for recovery means identifying the latest bundle with a valid checksum + and using the new data checksum to ensure that it has been applied. + This is more expensive than the current check, but need only be done at + open. + For running databases, a separate header field can be used to indicate + a transaction in progress; we need only check for recovery if this is set. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:TDB-Does-Not" + +\end_inset + +TDB Does Not Have Snapshot Support +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + At some point you say +\begin_inset Quotes eld +\end_inset + +use a real database +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +But as a thought experiment, if we implemented transactions to only overwrite + free entries (this is tricky: there must not be a header in each entry + which indicates whether it is free, but use of presence in metadata elsewhere), + and a pointer to the hash table, we could create an entirely new commit + without destroying existing data. + Then it would be easy to implement snapshots in a similar way. +\end_layout + +\begin_layout Standard +This would not allow arbitrary changes to the database, such as tdb_repack + does, and would require more space (since we have to preserve the current + and future entries at once). + If we used hash trees rather than one big hash table, we might only have + to rewrite some sections of the hash, too. +\end_layout + +\begin_layout Standard +We could then implement snapshots using a similar method, using multiple + different hash tables/free tables. +\end_layout + +\begin_layout Subsection +Transactions Cannot Operate in Parallel +\end_layout + +\begin_layout Standard +This would be useless for ldb, as it hits the index records with just about + every update. + It would add significant complexity in resolving clashes, and cause the + all transaction callers to write their code to loop in the case where the + transactions spuriously failed. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We could solve a small part of the problem by providing read-only transactions. + These would allow one write transaction to begin, but it could not commit + until all r/o transactions are done. + This would require a new RO_TRANSACTION_LOCK, which would be upgraded on + commit. +\end_layout + +\begin_layout Subsection +Default Hash Function Is Suboptimal +\end_layout + +\begin_layout Standard +The Knuth-inspired multiplicative hash used by tdb is fairly slow (especially + if we expand it to 64 bits), and works best when the hash bucket size is + a prime number (which also means a slow modulus). + In addition, it is highly predictable which could potentially lead to a + Denial of Service attack in some TDB uses. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The Jenkins lookup3 hash +\begin_inset Foot +status open + +\begin_layout Plain Layout +http://burtleburtle.net/bob/c/lookup3.c +\end_layout + +\end_inset + + is a fast and superbly-mixing hash. + It's used by the Linux kernel and almost everything else. + This has the particular properties that it takes an initial seed, and produces + two 32 bit hash numbers, which we can combine into a 64-bit hash. +\end_layout + +\begin_layout Standard +The seed should be created at tdb-creation time from some random source, + and placed in the header. + This is far from foolproof, but adds a little bit of protection against + hash bombing. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "Reliable-Traversal-Adds" + +\end_inset + +Reliable Traversal Adds Complexity +\end_layout + +\begin_layout Standard +We lock a record during traversal iteration, and try to grab that lock in + the delete code. + If that grab on delete fails, we simply mark it deleted and continue onwards; + traversal checks for this condition and does the delete when it moves off + the record. +\end_layout + +\begin_layout Standard +If traversal terminates, the dead record may be left indefinitely. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove reliability guarantees; see +\begin_inset CommandInset ref +LatexCommand ref +reference "traverse-Proposed-Solution" + +\end_inset + +. +\end_layout + +\begin_layout Subsection +Fcntl Locking Adds Overhead +\end_layout + +\begin_layout Standard +Placing a fcntl lock means a system call, as does removing one. + This is actually one reason why transactions can be faster (everything + is locked once at transaction start). + In the uncontended case, this overhead can theoretically be eliminated. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. +\end_layout + +\begin_layout Standard +We tried this before with spinlock support, in the early days of TDB, and + it didn't make much difference except in manufactured benchmarks. +\end_layout + +\begin_layout Standard +We could use spinlocks (with futex kernel support under Linux), but it means + that we lose automatic cleanup when a process dies with a lock. + There is a method of auto-cleanup under Linux, but it's not supported by + other operating systems. + We could reintroduce a clear-if-first-style lock and sweep for dead futexes + on open, but that wouldn't help the normal case of one concurrent opener + dying. + Increasingly elaborate repair schemes could be considered, but they require + an ABI change (everyone must use them) anyway, so there's no need to do + this at the same time as everything else. +\end_layout + +\begin_layout Subsection +Some Transactions Don't Require Durability +\end_layout + +\begin_layout Standard +Volker points out that gencache uses a CLEAR_IF_FIRST tdb for normal (fast) + usage, and occasionally empties the results into a transactional TDB. + This kind of usage prioritizes performance over durability: as long as + we are consistent, data can be lost. +\end_layout + +\begin_layout Standard +This would be more neatly implemented inside tdb: a +\begin_inset Quotes eld +\end_inset + +soft +\begin_inset Quotes erd +\end_inset + + transaction commit (ie. + syncless) which meant that data may be reverted on a crash. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. +\end_layout + +\begin_layout Standard +Unfortunately any transaction scheme which overwrites old data requires + a sync before that overwrite to avoid the possibility of corruption. +\end_layout + +\begin_layout Standard +It seems possible to use a scheme similar to that described in +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Does-Not" + +\end_inset + +,where transactions are committed without overwriting existing data, and + an array of top-level pointers were available in the header. + If the transaction is +\begin_inset Quotes eld +\end_inset + +soft +\begin_inset Quotes erd +\end_inset + + then we would not need a sync at all: existing processes would pick up + the new hash table and free list and work with that. +\end_layout + +\begin_layout Standard +At some later point, a sync would allow recovery of the old data into the + free lists (perhaps when the array of top-level pointers filled). + On crash, tdb_open() would examine the array of top levels, and apply the + transactions until it encountered an invalid checksum. +\end_layout + +\end_body +\end_document diff --git a/ccan/tdb2/doc/design.lyx,v b/ccan/tdb2/doc/design.lyx,v new file mode 100644 index 00000000..e44b3fc4 --- /dev/null +++ b/ccan/tdb2/doc/design.lyx,v @@ -0,0 +1,3106 @@ +head 1.6; +access; +symbols; +locks; strict; +comment @# @; + + +1.6 +date 2010.08.02.00.21.43; author rusty; state Exp; +branches; +next 1.5; + +1.5 +date 2010.08.02.00.21.16; author rusty; state Exp; +branches; +next 1.4; + +1.4 +date 2010.05.10.13.09.11; author rusty; state Exp; +branches; +next 1.3; + +1.3 +date 2010.05.10.11.58.37; author rusty; state Exp; +branches; +next 1.2; + +1.2 +date 2010.05.10.05.35.13; author rusty; state Exp; +branches; +next 1.1; + +1.1 +date 2010.05.04.02.29.16; author rusty; state Exp; +branches; +next ; + + +desc +@First draft +@ + + +1.6 +log +@Commit changes +@ +text +@#LyX 1.6.5 created this file. For more info see http://www.lyx.org/ +\lyxformat 345 +\begin_document +\begin_header +\textclass article +\use_default_options true +\language english +\inputencoding auto +\font_roman default +\font_sans default +\font_typewriter default +\font_default_family default +\font_sc false +\font_osf false +\font_sf_scale 100 +\font_tt_scale 100 + +\graphics default +\paperfontsize default +\use_hyperref false +\papersize default +\use_geometry false +\use_amsmath 1 +\use_esint 1 +\cite_engine basic +\use_bibtopic false +\paperorientation portrait +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\defskip medskip +\quotes_language english +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tracking_changes true +\output_changes true +\author "" +\author "" +\end_header + +\begin_body + +\begin_layout Title +TDB2: A Redesigning The Trivial DataBase +\end_layout + +\begin_layout Author +Rusty Russell, IBM Corporation +\end_layout + +\begin_layout Date +26-July-2010 +\end_layout + +\begin_layout Abstract +The Trivial DataBase on-disk format is 32 bits; with usage cases heading + towards the 4G limit, that must change. + This required breakage provides an opportunity to revisit TDB's other design + decisions and reassess them. +\end_layout + +\begin_layout Section +Introduction +\end_layout + +\begin_layout Standard +The Trivial DataBase was originally written by Andrew Tridgell as a simple + key/data pair storage system with the same API as dbm, but allowing multiple + readers and writers while being small enough (< 1000 lines of C) to include + in SAMBA. + The simple design created in 1999 has proven surprisingly robust and performant +, used in Samba versions 3 and 4 as well as numerous other projects. + Its useful life was greatly increased by the (backwards-compatible!) addition + of transaction support in 2005. +\end_layout + +\begin_layout Standard +The wider variety and greater demands of TDB-using code has lead to some + organic growth of the API, as well as some compromises on the implementation. + None of these, by themselves, are seen as show-stoppers, but the cumulative + effect is to a loss of elegance over the initial, simple TDB implementation. + Here is a table of the approximate number of lines of implementation code + and number of API functions at the end of each year: +\end_layout + +\begin_layout Standard +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout +Year End +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +API Functions +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +Lines of C Code Implementation +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +1999 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +13 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1195 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2000 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +24 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1725 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2001 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +32 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2228 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2002 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +35 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2481 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2003 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +35 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2552 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2004 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +40 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2584 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2005 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +38 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2647 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2006 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +52 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +3754 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2007 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +66 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4398 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2008 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +71 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +4768 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +2009 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +73 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +5715 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Standard +This review is an attempt to catalog and address all the known issues with + TDB and create solutions which address the problems without significantly + increasing complexity; all involved are far too aware of the dangers of + second system syndrome in rewriting a successful project like this. +\end_layout + +\begin_layout Section +API Issues +\end_layout + +\begin_layout Subsection +tdb_open_ex Is Not Expandable +\end_layout + +\begin_layout Standard +The tdb_open() call was expanded to tdb_open_ex(), which added an optional + hashing function and an optional logging function argument. + Additional arguments to open would require the introduction of a tdb_open_ex2 + call etc. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +tdb_open() will take a linked-list of attributes: +\end_layout + +\begin_layout LyX-Code +enum tdb_attribute { +\end_layout + +\begin_layout LyX-Code + TDB_ATTRIBUTE_LOG = 0, +\end_layout + +\begin_layout LyX-Code + TDB_ATTRIBUTE_HASH = 1 +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_base { +\end_layout + +\begin_layout LyX-Code + enum tdb_attribute attr; +\end_layout + +\begin_layout LyX-Code + union tdb_attribute *next; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_log { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */ +\end_layout + +\begin_layout LyX-Code + tdb_log_func log_fn; +\end_layout + +\begin_layout LyX-Code + void *log_private; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_hash { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */ +\end_layout + +\begin_layout LyX-Code + tdb_hash_func hash_fn; +\end_layout + +\begin_layout LyX-Code + void *hash_private; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +union tdb_attribute { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_log log; +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_hash hash; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +This allows future attributes to be added, even if this expands the size + of the union. +\end_layout + +\begin_layout Subsection +tdb_traverse Makes Impossible Guarantees +\end_layout + +\begin_layout Standard +tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, and it + was thought that it was important to guarantee that all records which exist + at the start and end of the traversal would be included, and no record + would be included twice. +\end_layout + +\begin_layout Standard +This adds complexity (see +\begin_inset CommandInset ref +LatexCommand ref +reference "Reliable-Traversal-Adds" + +\end_inset + +) and does not work anyway for records which are altered (in particular, + those which are expanded may be effectively deleted and re-added behind + the traversal). +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "traverse-Proposed-Solution" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +Abandon the guarantee. + You will see every record if no changes occur during your traversal, otherwise + you will see some subset. + You can prevent changes by using a transaction or the locking API. +\end_layout + +\begin_layout Subsection +Nesting of Transactions Is Fraught +\end_layout + +\begin_layout Standard +TDB has alternated between allowing nested transactions and not allowing + them. + Various paths in the Samba codebase assume that transactions will nest, + and in a sense they can: the operation is only committed to disk when the + outer transaction is committed. + There are two problems, however: +\end_layout + +\begin_layout Enumerate +Canceling the inner transaction will cause the outer transaction commit + to fail, and will not undo any operations since the inner transaction began. + This problem is soluble with some additional internal code. +\end_layout + +\begin_layout Enumerate +An inner transaction commit can be cancelled by the outer transaction. + This is desirable in the way which Samba's database initialization code + uses transactions, but could be a surprise to any users expecting a successful + transaction commit to expose changes to others. +\end_layout + +\begin_layout Standard +The current solution is to specify the behavior at tdb_open(), with the + default currently that nested transactions are allowed. + This flag can also be changed at runtime. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Given the usage patterns, it seems that the +\begin_inset Quotes eld +\end_inset + +least-surprise +\begin_inset Quotes erd +\end_inset + + behavior of disallowing nested transactions should become the default. + Additionally, it seems the outer transaction is the only code which knows + whether inner transactions should be allowed, so a flag to indicate this + could be added to tdb_transaction_start. + However, this behavior can be simulated with a wrapper which uses tdb_add_flags +() and tdb_remove_flags(), so the API should not be expanded for this relatively +-obscure case. +\end_layout + +\begin_layout Subsection +Incorrect Hash Function is Not Detected +\end_layout + +\begin_layout Standard +tdb_open_ex() allows the calling code to specify a different hash function + to use, but does not check that all other processes accessing this tdb + are using the same hash function. + The result is that records are missing from tdb_fetch(). +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The header should contain an example hash result (eg. + the hash of 0xdeadbeef), and tdb_open_ex() should check that the given + hash function produces the same answer, or fail the tdb_open call. +\end_layout + +\begin_layout Subsection +tdb_set_max_dead/TDB_VOLATILE Expose Implementation +\end_layout + +\begin_layout Standard +In response to scalability issues with the free list ( +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB-Freelist-Is" + +\end_inset + +) two API workarounds have been incorporated in TDB: tdb_set_max_dead() + and the TDB_VOLATILE flag to tdb_open. + The latter actually calls the former with an argument of +\begin_inset Quotes eld +\end_inset + +5 +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +This code allows deleted records to accumulate without putting them in the + free list. + On delete we iterate through each chain and free them in a batch if there + are more than max_dead entries. + These are never otherwise recycled except as a side-effect of a tdb_repack. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +With the scalability problems of the freelist solved, this API can be removed. + The TDB_VOLATILE flag may still be useful as a hint that store and delete + of records will be at least as common as fetch in order to allow some internal + tuning, but initially will become a no-op. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB-Files-Cannot" + +\end_inset + +TDB Files Cannot Be Opened Multiple Times In The Same Process +\end_layout + +\begin_layout Standard +No process can open the same TDB twice; we check and disallow it. + This is an unfortunate side-effect of fcntl locks, which operate on a per-file + rather than per-file-descriptor basis, and do not nest. + Thus, closing any file descriptor on a file clears all the locks obtained + by this process, even if they were placed using a different file descriptor! +\end_layout + +\begin_layout Standard +Note that even if this were solved, deadlock could occur if operations were + nested: this is a more manageable programming error in most cases. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We could lobby POSIX to fix the perverse rules, or at least lobby Linux + to violate them so that the most common implementation does not have this + restriction. + This would be a generally good idea for other fcntl lock users. +\end_layout + +\begin_layout Standard +Samba uses a wrapper which hands out the same tdb_context to multiple callers + if this happens, and does simple reference counting. + We should do this inside the tdb library, which already emulates lock nesting + internally; it would need to recognize when deadlock occurs within a single + process. + This would create a new failure mode for tdb operations (while we currently + handle locking failures, they are impossible in normal use and a process + encountering them can do little but give up). +\end_layout + +\begin_layout Standard +I do not see benefit in an additional tdb_open flag to indicate whether + re-opening is allowed, as though there may be some benefit to adding a + call to detect when a tdb_context is shared, to allow other to create such + an API. +\end_layout + +\begin_layout Subsection +TDB API Is Not POSIX Thread-safe +\end_layout + +\begin_layout Standard +The TDB API uses an error code which can be queried after an operation to + determine what went wrong. + This programming model does not work with threads, unless specific additional + guarantees are given by the implementation. + In addition, even otherwise-independent threads cannot open the same TDB + (as in +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB-Files-Cannot" + +\end_inset + +). +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Reachitecting the API to include a tdb_errcode pointer would be a great + deal of churn; we are better to guarantee that the tdb_errcode is per-thread + so the current programming model can be maintained. +\end_layout + +\begin_layout Standard +This requires dynamic per-thread allocations, which is awkward with POSIX + threads (pthread_key_create space is limited and we cannot simply allocate + a key for every TDB). +\end_layout + +\begin_layout Standard +Internal locking is required to make sure that fcntl locks do not overlap + between threads, and also that the global list of tdbs is maintained. +\end_layout + +\begin_layout Standard +The aim is that building tdb with -DTDB_PTHREAD will result in a pthread-safe + version of the library, and otherwise no overhead will exist. +\end_layout + +\begin_layout Subsection +*_nonblock Functions And *_mark Functions Expose Implementation +\end_layout + +\begin_layout Standard +CTDB +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +Clustered TDB, see http://ctdb.samba.org +\end_layout + +\end_inset + + wishes to operate on TDB in a non-blocking manner. + This is currently done as follows: +\end_layout + +\begin_layout Enumerate +Call the _nonblock variant of an API function (eg. + tdb_lockall_nonblock). + If this fails: +\end_layout + +\begin_layout Enumerate +Fork a child process, and wait for it to call the normal variant (eg. + tdb_lockall). +\end_layout + +\begin_layout Enumerate +If the child succeeds, call the _mark variant to indicate we already have + the locks (eg. + tdb_lockall_mark). +\end_layout + +\begin_layout Enumerate +Upon completion, tell the child to release the locks (eg. + tdb_unlockall). +\end_layout + +\begin_layout Enumerate +Indicate to tdb that it should consider the locks removed (eg. + tdb_unlockall_mark). +\end_layout + +\begin_layout Standard +There are several issues with this approach. + Firstly, adding two new variants of each function clutters the API for + an obscure use, and so not all functions have three variants. + Secondly, it assumes that all paths of the functions ask for the same locks, + otherwise the parent process will have to get a lock which the child doesn't + have under some circumstances. + I don't believe this is currently the case, but it constrains the implementatio +n. + +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "Proposed-Solution-locking-hook" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +Implement a hook for locking methods, so that the caller can control the + calls to create and remove fcntl locks. + In this scenario, ctdbd would operate as follows: +\end_layout + +\begin_layout Enumerate +Call the normal API function, eg tdb_lockall(). +\end_layout + +\begin_layout Enumerate +When the lock callback comes in, check if the child has the lock. + Initially, this is always false. + If so, return 0. + Otherwise, try to obtain it in non-blocking mode. + If that fails, return EWOULDBLOCK. +\end_layout + +\begin_layout Enumerate +Release locks in the unlock callback as normal. +\end_layout + +\begin_layout Enumerate +If tdb_lockall() fails, see if we recorded a lock failure; if so, call the + child to repeat the operation. +\end_layout + +\begin_layout Enumerate +The child records what locks it obtains, and returns that information to + the parent. +\end_layout + +\begin_layout Enumerate +When the child has succeeded, goto 1. +\end_layout + +\begin_layout Standard +This is flexible enough to handle any potential locking scenario, even when + lock requirements change. + It can be optimized so that the parent does not release locks, just tells + the child which locks it doesn't need to obtain. +\end_layout + +\begin_layout Standard +It also keeps the complexity out of the API, and in ctdbd where it is needed. +\end_layout + +\begin_layout Subsection +tdb_chainlock Functions Expose Implementation +\end_layout + +\begin_layout Standard +tdb_chainlock locks some number of records, including the record indicated + by the given key. + This gave atomicity guarantees; no-one can start a transaction, alter, + read or delete that key while the lock is held. +\end_layout + +\begin_layout Standard +It also makes the same guarantee for any other key in the chain, which is + an internal implementation detail and potentially a cause for deadlock. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + It would be nice to have an explicit single entry lock which effected no + other keys. + Unfortunately, this won't work for an entry which doesn't exist. + Thus while chainlock may be implemented more efficiently for the existing + case, it will still have overlap issues with the non-existing case. + So it is best to keep the current (lack of) guarantee about which records + will be effected to avoid constraining our implementation. +\end_layout + +\begin_layout Subsection +Signal Handling is Not Race-Free +\end_layout + +\begin_layout Standard +The tdb_setalarm_sigptr() call allows the caller's signal handler to indicate + that the tdb locking code should return with a failure, rather than trying + again when a signal is received (and errno == EAGAIN). + This is usually used to implement timeouts. +\end_layout + +\begin_layout Standard +Unfortunately, this does not work in the case where the signal is received + before the tdb code enters the fcntl() call to place the lock: the code + will sleep within the fcntl() code, unaware that the signal wants it to + exit. + In the case of long timeouts, this does not happen in practice. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The locking hooks proposed in +\begin_inset CommandInset ref +LatexCommand ref +reference "Proposed-Solution-locking-hook" + +\end_inset + + would allow the user to decide on whether to fail the lock acquisition + on a signal. + This allows the caller to choose their own compromise: they could narrow + the race by checking immediately before the fcntl call. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +It may be possible to make this race-free in some implementations by having + the signal handler alter the struct flock to make it invalid. + This will cause the fcntl() lock call to fail with EINVAL if the signal + occurs before the kernel is entered, otherwise EAGAIN. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsection +The API Uses Gratuitous Typedefs, Capitals +\end_layout + +\begin_layout Standard +typedefs are useful for providing source compatibility when types can differ + across implementations, or arguably in the case of function pointer definitions + which are hard for humans to parse. + Otherwise it is simply obfuscation and pollutes the namespace. +\end_layout + +\begin_layout Standard +Capitalization is usually reserved for compile-time constants and macros. +\end_layout + +\begin_layout Description +TDB_CONTEXT There is no reason to use this over 'struct tdb_context'; the + definition isn't visible to the API user anyway. +\end_layout + +\begin_layout Description +TDB_DATA There is no reason to use this over struct TDB_DATA; the struct + needs to be understood by the API user. +\end_layout + +\begin_layout Description +struct +\begin_inset space ~ +\end_inset + +TDB_DATA This would normally be called 'struct tdb_data'. +\end_layout + +\begin_layout Description +enum +\begin_inset space ~ +\end_inset + +TDB_ERROR Similarly, this would normally be enum tdb_error. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + Introducing lower case variants would please pedants like myself, but if + it were done the existing ones should be kept. + There is little point forcing a purely cosmetic change upon tdb users. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "tdb_log_func-Doesnt-Take" + +\end_inset + +tdb_log_func Doesn't Take The Private Pointer +\end_layout + +\begin_layout Standard +For API compatibility reasons, the logging function needs to call tdb_get_loggin +g_private() to retrieve the pointer registered by the tdb_open_ex for logging. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +It should simply take an extra argument, since we are prepared to break + the API/ABI. +\end_layout + +\begin_layout Subsection +Various Callback Functions Are Not Typesafe +\end_layout + +\begin_layout Standard +The callback functions in tdb_set_logging_function (after +\begin_inset CommandInset ref +LatexCommand ref +reference "tdb_log_func-Doesnt-Take" + +\end_inset + + is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read and tdb_check + all take void * and must internally convert it to the argument type they + were expecting. +\end_layout + +\begin_layout Standard +If this type changes, the compiler will not produce warnings on the callers, + since it only sees void *. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +With careful use of macros, we can create callback functions which give + a warning when used on gcc and the types of the callback and its private + argument differ. + Unsupported compilers will not give a warning, which is no worse than now. + In addition, the callbacks become clearer, as they need not use void * + for their parameter. +\end_layout + +\begin_layout Standard +See CCAN's typesafe_cb module at http://ccan.ozlabs.org/info/typesafe_cb.html +\end_layout + +\begin_layout Subsection +TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, tdb_reopen_all Problematic +\end_layout + +\begin_layout Standard +The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB file should + be cleared if the caller discovers it is the only process with the TDB + open. + However, if any caller does not specify TDB_CLEAR_IF_FIRST it will not + be detected, so will have the TDB erased underneath them (usually resulting + in a crash). +\end_layout + +\begin_layout Standard +There is a similar issue on fork(); if the parent exits (or otherwise closes + the tdb) before the child calls tdb_reopen_all() to establish the lock + used to indicate the TDB is opened by someone, a TDB_CLEAR_IF_FIRST opener + at that moment will believe it alone has opened the TDB and will erase + it. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove TDB_CLEAR_IF_FIRST. + Other workarounds are possible, but see +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB_CLEAR_IF_FIRST-Imposes-Performance" + +\end_inset + +. +\end_layout + +\begin_layout Section +Performance And Scalability Issues +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB_CLEAR_IF_FIRST-Imposes-Performance" + +\end_inset + +TDB_CLEAR_IF_FIRST Imposes Performance Penalty +\end_layout + +\begin_layout Standard +When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is placed at offset + 4 (aka. + the ACTIVE_LOCK). + While these locks never conflict in normal tdb usage, they do add substantial + overhead for most fcntl lock implementations when the kernel scans to detect + if a lock conflict exists. + This is often a single linked list, making the time to acquire and release + a fcntl lock O(N) where N is the number of processes with the TDB open, + not the number actually doing work. +\end_layout + +\begin_layout Standard +In a Samba server it is common to have huge numbers of clients sitting idle, + and thus they have weaned themselves off the TDB_CLEAR_IF_FIRST flag. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +There is a flag to tdb_reopen_all() which is used for this optimization: + if the parent process will outlive the child, the child does not need the + ACTIVE_LOCK. + This is a workaround for this very performance issue. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove the flag. + It was a neat idea, but even trivial servers tend to know when they are + initializing for the first time and can simply unlink the old tdb at that + point. +\end_layout + +\begin_layout Subsection +TDB Files Have a 4G Limit +\end_layout + +\begin_layout Standard +This seems to be becoming an issue (so much for +\begin_inset Quotes eld +\end_inset + +trivial +\begin_inset Quotes erd +\end_inset + +!), particularly for ldb. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +A new, incompatible TDB format which uses 64 bit offsets internally rather + than 32 bit as now. + For simplicity of endian conversion (which TDB does on the fly if required), + all values will be 64 bit on disk. + In practice, some upper bits may be used for other purposes, but at least + 56 bits will be available for file offsets. +\end_layout + +\begin_layout Standard +tdb_open() will automatically detect the old version, and even create them + if TDB_VERSION6 is specified to tdb_open. +\end_layout + +\begin_layout Standard +32 bit processes will still be able to access TDBs larger than 4G (assuming + that their off_t allows them to seek to 64 bits), they will gracefully + fall back as they fail to mmap. + This can happen already with large TDBs. +\end_layout + +\begin_layout Standard +Old versions of tdb will fail to open the new TDB files (since 28 August + 2009, commit 398d0c29290: prior to that any unrecognized file format would + be erased and initialized as a fresh tdb!) +\end_layout + +\begin_layout Subsection +TDB Records Have a 4G Limit +\end_layout + +\begin_layout Standard +This has not been a reported problem, and the API uses size_t which can + be 64 bit on 64 bit platforms. + However, other limits may have made such an issue moot. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Record sizes will be 64 bit, with an error returned on 32 bit platforms + which try to access such records (the current implementation would return + TDB_ERR_OOM in a similar case). + It seems unlikely that 32 bit keys will be a limitation, so the implementation + may not support this (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Records-Incur-A" + +\end_inset + +). +\end_layout + +\begin_layout Subsection +Hash Size Is Determined At TDB Creation Time +\end_layout + +\begin_layout Standard +TDB contains a number of hash chains in the header; the number is specified + at creation time, and defaults to 131. + This is such a bottleneck on large databases (as each hash chain gets quite + long), that LDB uses 10,000 for this hash. + In general it is impossible to know what the 'right' answer is at database + creation time. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +After comprehensive performance testing on various scalable hash variants +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 This was annoying + because I was previously convinced that an expanding tree of hashes would + be very close to optimal. +\end_layout + +\end_inset + +, it became clear that it is hard to beat a straight linear hash table which + doubles in size when it reaches saturation. + There are three details which become important: +\end_layout + +\begin_layout Enumerate +On encountering a full bucket, we use the next bucket. +\end_layout + +\begin_layout Enumerate +Extra hash bits are stored with the offset, to reduce comparisons. +\end_layout + +\begin_layout Enumerate +A marker entry is used on deleting an entry. +\end_layout + +\begin_layout Standard +The doubling of the table must be done under a transaction; we will not + reduce it on deletion, so it will be an unusual case. + It will either be placed at the head (other entries will be moved out the + way so we can expand). + We could have a pointer in the header to the current hashtable location, + but that pointer would have to be read frequently to check for hashtable + moves. +\end_layout + +\begin_layout Standard +The locking for this is slightly more complex than the chained case; we + currently have one lock per bucket, and that means we would need to expand + the lock if we overflow to the next bucket. + The frequency of such collisions will effect our locking heuristics: we + can always lock more buckets than we need. +\end_layout + +\begin_layout Standard +One possible optimization is to only re-check the hash size on an insert + or a lookup miss. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB-Freelist-Is" + +\end_inset + +TDB Freelist Is Highly Contended +\end_layout + +\begin_layout Standard +TDB uses a single linked list for the free list. + Allocation occurs as follows, using heuristics which have evolved over + time: +\end_layout + +\begin_layout Enumerate +Get the free list lock for this whole operation. +\end_layout + +\begin_layout Enumerate +Multiply length by 1.25, so we always over-allocate by 25%. +\end_layout + +\begin_layout Enumerate +Set the slack multiplier to 1. +\end_layout + +\begin_layout Enumerate +Examine the current freelist entry: if it is > length but < the current + best case, remember it as the best case. +\end_layout + +\begin_layout Enumerate +Multiply the slack multiplier by 1.05. +\end_layout + +\begin_layout Enumerate +If our best fit so far is less than length * slack multiplier, return it. + The slack will be turned into a new free record if it's large enough. +\end_layout + +\begin_layout Enumerate +Otherwise, go onto the next freelist entry. +\end_layout + +\begin_layout Standard +Deleting a record occurs as follows: +\end_layout + +\begin_layout Enumerate +Lock the hash chain for this whole operation. +\end_layout + +\begin_layout Enumerate +Walk the chain to find the record, keeping the prev pointer offset. +\end_layout + +\begin_layout Enumerate +If max_dead is non-zero: +\end_layout + +\begin_deeper +\begin_layout Enumerate +Walk the hash chain again and count the dead records. +\end_layout + +\begin_layout Enumerate +If it's more than max_dead, bulk free all the dead ones (similar to steps + 4 and below, but the lock is only obtained once). +\end_layout + +\begin_layout Enumerate +Simply mark this record as dead and return. + +\end_layout + +\end_deeper +\begin_layout Enumerate +Get the free list lock for the remainder of this operation. +\end_layout + +\begin_layout Enumerate +\begin_inset CommandInset label +LatexCommand label +name "right-merging" + +\end_inset + +Examine the following block to see if it is free; if so, enlarge the current + block and remove that block from the free list. + This was disabled, as removal from the free list was O(entries-in-free-list). +\end_layout + +\begin_layout Enumerate +Examine the preceeding block to see if it is free: for this reason, each + block has a 32-bit tailer which indicates its length. + If it is free, expand it to cover our new block and return. +\end_layout + +\begin_layout Enumerate +Otherwise, prepend ourselves to the free list. +\end_layout + +\begin_layout Standard +Disabling right-merging (step +\begin_inset CommandInset ref +LatexCommand ref +reference "right-merging" + +\end_inset + +) causes fragmentation; the other heuristics proved insufficient to address + this, so the final answer to this was that when we expand the TDB file + inside a transaction commit, we repack the entire tdb. +\end_layout + +\begin_layout Standard +The single list lock limits our allocation rate; due to the other issues + this is not currently seen as a bottleneck. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The first step is to remove all the current heuristics, as they obviously + interact, then examine them once the lock contention is addressed. +\end_layout + +\begin_layout Standard +The free list must be split to reduce contention. + Assuming perfect free merging, we can at most have 1 free list entry for + each entry. + This implies that the number of free lists is related to the size of the + hash table, but as it is rare to walk a large number of free list entries + we can use far fewer, say 1/32 of the number of hash buckets. +\end_layout + +\begin_layout Standard +There are various benefits in using per-size free lists (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Becomes-Fragmented" + +\end_inset + +) but it's not clear this would reduce contention in the common case where + all processes are allocating/freeing the same size. + Thus we almost certainly need to divide in other ways: the most obvious + is to divide the file into zones, and using a free list (or set of free + lists) for each. + This approximates address ordering. +\end_layout + +\begin_layout Standard +Note that this means we need to split the free lists when we expand the + file; this is probably acceptable when we double the hash table size, since + that is such an expensive operation already. + In the case of increasing the file size, there is an optimization we can + use: if we use M in the formula above as the file size rounded up to the + next power of 2, we only need reshuffle free lists when the file size crosses + a power of 2 boundary, +\emph on +and +\emph default +reshuffling the free lists is trivial: we simply merge every consecutive + pair of free lists. +\end_layout + +\begin_layout Standard +The basic algorithm is as follows. + Freeing is simple: +\end_layout + +\begin_layout Enumerate +Identify the correct zone. +\end_layout + +\begin_layout Enumerate +Lock the corresponding list. +\end_layout + +\begin_layout Enumerate +Re-check the zone (we didn't have a lock, sizes could have changed): relock + if necessary. +\end_layout + +\begin_layout Enumerate +Place the freed entry in the list for that zone. +\end_layout + +\begin_layout Standard +Allocation is a little more complicated, as we perform delayed coalescing + at this point: +\end_layout + +\begin_layout Enumerate +Pick a zone either the zone we last freed into, or based on a +\begin_inset Quotes eld +\end_inset + +random +\begin_inset Quotes erd +\end_inset + + number. +\end_layout + +\begin_layout Enumerate +Lock the corresponding list. +\end_layout + +\begin_layout Enumerate +Re-check the zone: relock if necessary. +\end_layout + +\begin_layout Enumerate +If the top entry is -large enough, remove it from the list and return it. +\end_layout + +\begin_layout Enumerate +Otherwise, coalesce entries in the list.If there was no entry large enough, + unlock the list and try the next zone. +\end_layout + +\begin_layout Enumerate +If no zone satisfies, expand the file. +\end_layout + +\begin_layout Standard +This optimizes rapid insert/delete of free list entries by not coalescing + them all the time.. + First-fit address ordering ordering seems to be fairly good for keeping + fragmentation low (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Becomes-Fragmented" + +\end_inset + +). + Note that address ordering does not need a tailer to coalesce, though if + we needed one we could have one cheaply: see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Records-Incur-A" + +\end_inset + +. + +\end_layout + +\begin_layout Standard +I anticipate that the number of entries in each free zone would be small, + but it might be worth using one free entry to hold pointers to the others + for cache efficiency. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:TDB-Becomes-Fragmented" + +\end_inset + +TDB Becomes Fragmented +\end_layout + +\begin_layout Standard +Much of this is a result of allocation strategy +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 ftp://ftp.cs.ute +xas.edu/pub/garbage/malloc/ismm98.ps +\end_layout + +\end_inset + + and deliberate hobbling of coalescing; internal fragmentation (aka overallocati +on) is deliberately set at 25%, and external fragmentation is only cured + by the decision to repack the entire db when a transaction commit needs + to enlarge the file. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The 25% overhead on allocation works in practice for ldb because indexes + tend to expand by one record at a time. + This internal fragmentation can be resolved by having an +\begin_inset Quotes eld +\end_inset + +expanded +\begin_inset Quotes erd +\end_inset + + bit in the header to note entries that have previously expanded, and allocating + more space for them. +\end_layout + +\begin_layout Standard +There are is a spectrum of possible solutions for external fragmentation: + one is to use a fragmentation-avoiding allocation strategy such as best-fit + address-order allocator. + The other end of the spectrum would be to use a bump allocator (very fast + and simple) and simply repack the file when we reach the end. +\end_layout + +\begin_layout Standard +There are three problems with efficient fragmentation-avoiding allocators: + they are non-trivial, they tend to use a single free list for each size, + and there's no evidence that tdb allocation patterns will match those recorded + for general allocators (though it seems likely). +\end_layout + +\begin_layout Standard +Thus we don't spend too much effort on external fragmentation; we will be + no worse than the current code if we need to repack on occasion. + More effort is spent on reducing freelist contention, and reducing overhead. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:Records-Incur-A" + +\end_inset + +Records Incur A 28-Byte Overhead +\end_layout + +\begin_layout Standard +Each TDB record has a header as follows: +\end_layout + +\begin_layout LyX-Code +struct tdb_record { +\end_layout + +\begin_layout LyX-Code + tdb_off_t next; /* offset of the next record in the list */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t rec_len; /* total byte length of record */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t key_len; /* byte length of key */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t data_len; /* byte length of data */ +\end_layout + +\begin_layout LyX-Code + uint32_t full_hash; /* the full 32 bit hash of the key */ +\end_layout + +\begin_layout LyX-Code + uint32_t magic; /* try to catch errors */ +\end_layout + +\begin_layout LyX-Code + /* the following union is implied: +\end_layout + +\begin_layout LyX-Code + union { +\end_layout + +\begin_layout LyX-Code + char record[rec_len]; +\end_layout + +\begin_layout LyX-Code + struct { +\end_layout + +\begin_layout LyX-Code + char key[key_len]; +\end_layout + +\begin_layout LyX-Code + char data[data_len]; +\end_layout + +\begin_layout LyX-Code + } +\end_layout + +\begin_layout LyX-Code + uint32_t totalsize; (tailer) +\end_layout + +\begin_layout LyX-Code + } +\end_layout + +\begin_layout LyX-Code + */ +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +Naively, this would double to a 56-byte overhead on a 64 bit implementation. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We can use various techniques to reduce this for an allocated block: +\end_layout + +\begin_layout Enumerate +The 'next' pointer is not required, as we are using a flat hash table. +\end_layout + +\begin_layout Enumerate +'rec_len' can instead be expressed as an addition to key_len and data_len + (it accounts for wasted or overallocated length in the record). + Since the record length is always a multiple of 8, we can conveniently + fit it in 32 bits (representing up to 35 bits). +\end_layout + +\begin_layout Enumerate +'key_len' and 'data_len' can be reduced. + I'm unwilling to restrict 'data_len' to 32 bits, but instead we can combine + the two into one 64-bit field and using a 5 bit value which indicates at + what bit to divide the two. + Keys are unlikely to scale as fast as data, so I'm assuming a maximum key + size of 32 bits. +\end_layout + +\begin_layout Enumerate +'full_hash' is used to avoid a memcmp on the +\begin_inset Quotes eld +\end_inset + +miss +\begin_inset Quotes erd +\end_inset + + case, but this is diminishing returns after a handful of bits (at 10 bits, + it reduces 99.9% of false memcmp). + As an aside, as the lower bits are already incorporated in the hash table + resolution, the upper bits should be used here. +\end_layout + +\begin_layout Enumerate +'magic' does not need to be enlarged: it currently reflects one of 5 values + (used, free, dead, recovery, and unused_recovery). + It is useful for quick sanity checking however, and should not be eliminated. +\end_layout + +\begin_layout Enumerate +'tailer' is only used to coalesce free blocks (so a block to the right can + find the header to check if this block is free). + This can be replaced by a single 'free' bit in the header of the following + block (and the tailer only exists in free blocks). +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +This technique from Thomas Standish. + Data Structure Techniques. + Addison-Wesley, Reading, Massachusetts, 1980. +\end_layout + +\end_inset + + The current proposed coalescing algorithm doesn't need this, however. +\end_layout + +\begin_layout Standard +This produces a 16 byte used header like this: +\end_layout + +\begin_layout LyX-Code +struct tdb_used_record { +\end_layout + +\begin_layout LyX-Code + uint32_t magic : 16, +\end_layout + +\begin_layout LyX-Code + prev_is_free: 1, +\end_layout + +\begin_layout LyX-Code + key_data_divide: 5, +\end_layout + +\begin_layout LyX-Code + top_hash: 10; +\end_layout + +\begin_layout LyX-Code + uint32_t extra_octets; +\end_layout + +\begin_layout LyX-Code + uint64_t key_and_data_len; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +And a free record like this: +\end_layout + +\begin_layout LyX-Code +struct tdb_free_record { +\end_layout + +\begin_layout LyX-Code + uint32_t free_magic; +\end_layout + +\begin_layout LyX-Code + uint64_t total_length; +\end_layout + +\begin_layout LyX-Code + ... +\end_layout + +\begin_layout LyX-Code + uint64_t tailer; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code + +\end_layout + +\begin_layout Subsection +Transaction Commit Requires 4 fdatasync +\end_layout + +\begin_layout Standard +The current transaction algorithm is: +\end_layout + +\begin_layout Enumerate +write_recovery_data(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +write_recovery_header(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +overwrite_with_new_data(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +remove_recovery_header(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Standard +On current ext3, each sync flushes all data to disk, so the next 3 syncs + are relatively expensive. + But this could become a performance bottleneck on other filesystems such + as ext4. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Neil Brown points out that this is overzealous, and only one sync is needed: +\end_layout + +\begin_layout Enumerate +Bundle the recovery data, a transaction counter and a strong checksum of + the new data. +\end_layout + +\begin_layout Enumerate +Strong checksum that whole bundle. +\end_layout + +\begin_layout Enumerate +Store the bundle in the database. +\end_layout + +\begin_layout Enumerate +Overwrite the oldest of the two recovery pointers in the header (identified + using the transaction counter) with the offset of this bundle. +\end_layout + +\begin_layout Enumerate +sync. +\end_layout + +\begin_layout Enumerate +Write the new data to the file. +\end_layout + +\begin_layout Standard +Checking for recovery means identifying the latest bundle with a valid checksum + and using the new data checksum to ensure that it has been applied. + This is more expensive than the current check, but need only be done at + open. + For running databases, a separate header field can be used to indicate + a transaction in progress; we need only check for recovery if this is set. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:TDB-Does-Not" + +\end_inset + +TDB Does Not Have Snapshot Support +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + At some point you say +\begin_inset Quotes eld +\end_inset + +use a real database +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +But as a thought experiment, if we implemented transactions to only overwrite + free entries (this is tricky: there must not be a header in each entry + which indicates whether it is free, but use of presence in metadata elsewhere), + and a pointer to the hash table, we could create an entirely new commit + without destroying existing data. + Then it would be easy to implement snapshots in a similar way. +\end_layout + +\begin_layout Standard +This would not allow arbitrary changes to the database, such as tdb_repack + does, and would require more space (since we have to preserve the current + and future entries at once). + If we used hash trees rather than one big hash table, we might only have + to rewrite some sections of the hash, too. +\end_layout + +\begin_layout Standard +We could then implement snapshots using a similar method, using multiple + different hash tables/free tables. +\end_layout + +\begin_layout Subsection +Transactions Cannot Operate in Parallel +\end_layout + +\begin_layout Standard +This would be useless for ldb, as it hits the index records with just about + every update. + It would add significant complexity in resolving clashes, and cause the + all transaction callers to write their code to loop in the case where the + transactions spuriously failed. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We could solve a small part of the problem by providing read-only transactions. + These would allow one write transaction to begin, but it could not commit + until all r/o transactions are done. + This would require a new RO_TRANSACTION_LOCK, which would be upgraded on + commit. +\end_layout + +\begin_layout Subsection +Default Hash Function Is Suboptimal +\end_layout + +\begin_layout Standard +The Knuth-inspired multiplicative hash used by tdb is fairly slow (especially + if we expand it to 64 bits), and works best when the hash bucket size is + a prime number (which also means a slow modulus). + In addition, it is highly predictable which could potentially lead to a + Denial of Service attack in some TDB uses. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The Jenkins lookup3 hash +\begin_inset Foot +status open + +\begin_layout Plain Layout +http://burtleburtle.net/bob/c/lookup3.c +\end_layout + +\end_inset + + is a fast and superbly-mixing hash. + It's used by the Linux kernel and almost everything else. + This has the particular properties that it takes an initial seed, and produces + two 32 bit hash numbers, which we can combine into a 64-bit hash. +\end_layout + +\begin_layout Standard +The seed should be created at tdb-creation time from some random source, + and placed in the header. + This is far from foolproof, but adds a little bit of protection against + hash bombing. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "Reliable-Traversal-Adds" + +\end_inset + +Reliable Traversal Adds Complexity +\end_layout + +\begin_layout Standard +We lock a record during traversal iteration, and try to grab that lock in + the delete code. + If that grab on delete fails, we simply mark it deleted and continue onwards; + traversal checks for this condition and does the delete when it moves off + the record. +\end_layout + +\begin_layout Standard +If traversal terminates, the dead record may be left indefinitely. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove reliability guarantees; see +\begin_inset CommandInset ref +LatexCommand ref +reference "traverse-Proposed-Solution" + +\end_inset + +. +\end_layout + +\begin_layout Subsection +Fcntl Locking Adds Overhead +\end_layout + +\begin_layout Standard +Placing a fcntl lock means a system call, as does removing one. + This is actually one reason why transactions can be faster (everything + is locked once at transaction start). + In the uncontended case, this overhead can theoretically be eliminated. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. +\end_layout + +\begin_layout Standard +We tried this before with spinlock support, in the early days of TDB, and + it didn't make much difference except in manufactured benchmarks. +\end_layout + +\begin_layout Standard +We could use spinlocks (with futex kernel support under Linux), but it means + that we lose automatic cleanup when a process dies with a lock. + There is a method of auto-cleanup under Linux, but it's not supported by + other operating systems. + We could reintroduce a clear-if-first-style lock and sweep for dead futexes + on open, but that wouldn't help the normal case of one concurrent opener + dying. + Increasingly elaborate repair schemes could be considered, but they require + an ABI change (everyone must use them) anyway, so there's no need to do + this at the same time as everything else. +\end_layout + +\begin_layout Subsection +Some Transactions Don't Require Durability +\end_layout + +\begin_layout Standard +Volker points out that gencache uses a CLEAR_IF_FIRST tdb for normal (fast) + usage, and occasionally empties the results into a transactional TDB. + This kind of usage prioritizes performance over durability: as long as + we are consistent, data can be lost. +\end_layout + +\begin_layout Standard +This would be more neatly implemented inside tdb: a +\begin_inset Quotes eld +\end_inset + +soft +\begin_inset Quotes erd +\end_inset + + transaction commit (ie. + syncless) which meant that data may be reverted on a crash. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. +\end_layout + +\begin_layout Standard +Unfortunately any transaction scheme which overwrites old data requires + a sync before that overwrite to avoid the possibility of corruption. +\end_layout + +\begin_layout Standard +It seems possible to use a scheme similar to that described in +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Does-Not" + +\end_inset + +,where transactions are committed without overwriting existing data, and + an array of top-level pointers were available in the header. + If the transaction is +\begin_inset Quotes eld +\end_inset + +soft +\begin_inset Quotes erd +\end_inset + + then we would not need a sync at all: existing processes would pick up + the new hash table and free list and work with that. +\end_layout + +\begin_layout Standard +At some later point, a sync would allow recovery of the old data into the + free lists (perhaps when the array of top-level pointers filled). + On crash, tdb_open() would examine the array of top levels, and apply the + transactions until it encountered an invalid checksum. +\end_layout + +\end_body +\end_document +@ + + +1.5 +log +@Soft transaction commit +@ +text +@d38 1 +a38 1 +\author "Rusty Russell,,," +a52 4 + +\change_deleted 0 1280141199 +10-May-2010 +\change_inserted 0 1280141202 +a53 2 +\change_unchanged + +a2028 2 + +\change_inserted 0 1280140902 +a2034 2 + +\change_unchanged +a2212 2 +\change_inserted 0 1280140661 + +a2215 2 + +\change_inserted 0 1280140703 +a2219 2 + +\change_inserted 0 1280708312 +a2226 2 + +\change_inserted 0 1280708400 +a2239 2 + +\change_inserted 0 1280140836 +a2243 2 + +\change_inserted 0 1280708255 +a2247 2 + +\change_inserted 0 1280708374 +a2252 2 + +\change_inserted 0 1280141181 +a2274 2 + +\change_inserted 0 1280141345 +@ + + +1.4 +log +@Merge changes +@ +text +@d38 1 +a38 1 +\author "" +d53 2 +d56 4 +d2035 10 +d2223 84 +@ + + +1.3 +log +@Transaction and freelist rethink. +@ +text +@d38 1 +a38 1 +\author "Rusty Russell,,," +d53 1 +a53 1 +27-April-2010 +d662 1 +a662 5 + behavior of disallowing +\change_inserted 0 1272940179 +nested +\change_unchanged +transactions should become the default. +a1210 2 +\change_inserted 0 1272944650 + +a1214 2 + +\change_inserted 0 1272944763 +a1218 2 +\change_unchanged + +a1223 2 +\change_unchanged + +a1301 2 + +\change_inserted 0 1273478114 +a1310 2 +\change_unchanged + +d1515 1 +a1515 11 +The free list +\change_deleted 0 1273469807 +should +\change_inserted 0 1273469810 +must +\change_unchanged + be split +\change_deleted 0 1273469815 +into multiple lists +\change_unchanged +to reduce contention. +a1520 2 +\change_inserted 0 1273470006 + +a1523 2 + +\change_inserted 0 1273492055 +a1539 2 + +\change_inserted 0 1273483888 +a1551 2 +\change_unchanged + +a1554 8 + +\change_deleted 0 1272942055 +There are various ways to organize these lisys, but because we want to be + able to quickly identify which free list an entry is in, and reduce the + number of locks required for merging, we will use zoning (eg. + each free list covers some fixed fraction of the file). + +\change_inserted 0 1273484187 +d1556 1 +a1556 7 + +\change_deleted 0 1273484194 +The algorithm for f +\change_inserted 0 1273484194 +F +\change_unchanged +reeing is simple: +d1560 1 +a1560 7 +Identify the correct +\change_deleted 0 1273482856 +free list +\change_inserted 0 1273482857 +zone +\change_unchanged +. +d1564 1 +a1564 7 +Lock the +\change_inserted 0 1273482895 +corresponding +\change_unchanged +list +\change_inserted 0 1273482863 +. +a1567 2 + +\change_inserted 0 1273482909 +d1573 1 +a1573 13 + +\change_deleted 0 1273482885 +, and p +\change_inserted 0 1273482888 +P +\change_unchanged +lace the freed entry +\change_deleted 0 1273492415 +at the head +\change_inserted 0 1273492415 +in the list for that zone +\change_unchanged +. +d1577 2 +a1578 7 +Allocation is a little more complicated, as we +\change_deleted 0 1273483240 +merge entries as we walk the list: +\change_inserted 0 1273484250 +perform delayed coalescing at this point: +\change_unchanged + +d1582 1 +a1582 19 +Pick a +\change_deleted 0 1273482955 +free list; +\change_inserted 0 1273482957 +zone +\change_unchanged + either the +\change_deleted 0 1273482962 +list +\change_inserted 0 1273482962 +zone +\change_unchanged + we last freed +\change_deleted 0 1273482966 +o +\change_inserted 0 1273482966 +i +\change_unchanged +nto, or based on a +d1594 1 +a1594 9 +Lock th +\change_inserted 0 1273482980 +e corresponding +\change_deleted 0 1273482973 +at +\change_unchanged + list. +\change_inserted 0 1273482982 + +a1597 2 + +\change_inserted 0 1273483084 +a1598 53 +\change_unchanged + +\end_layout + +\begin_layout Enumerate +If the top entry is +\change_deleted 0 1273492155 +well-sized, +\change_inserted 0 1273492159 +-large enough, +\change_unchanged +remove it from the list and return it. +\end_layout + +\begin_layout Enumerate +Otherwise, +\change_inserted 0 1273492206 +coalesce entries in the list. +\change_deleted 0 1273492200 +examine the entry to the right of it in the file. + If it is free: +\end_layout + +\begin_deeper +\begin_layout Enumerate + +\change_deleted 0 1273492200 +If that entry is in a different list, lock that list too. +\end_layout + +\begin_layout Enumerate + +\change_deleted 0 1273492200 +If we had to place a new lock, re-check that the entry is free. +\end_layout + +\begin_layout Enumerate + +\change_deleted 0 1273492200 +Remove that entry from its free list and expand this entry to cover it. +\end_layout + +\begin_layout Enumerate + +\change_deleted 0 1273485554 +Goto step 3. +\end_layout + +\end_deeper +\begin_layout Enumerate + +\change_inserted 0 1273485311 +If there was no entry large enough, unlock the list and try the next zone. +d1602 1 +a1602 5 + +\change_deleted 0 1273483646 +Repeat step 3 with each entry in the list. +\change_unchanged + +d1606 2 +a1607 5 + +\change_deleted 0 1273483668 +Unlock the list and repeat step 2 with the next list. +\change_unchanged + +d1611 1 +a1611 7 +If no +\change_deleted 0 1273483671 +list +\change_inserted 0 1273483671 +zone +\change_unchanged + satisfies, expand the file. +d1615 2 +a1616 9 +This optimizes rapid insert/delete of free list entries +\change_inserted 0 1273485794 + by not coalescing them all the time. +\change_deleted 0 1273483685 +, and allows us to get rid of the tailer altogether +\change_unchanged +. + +\change_inserted 0 1273492299 +a1638 39 + +\change_deleted 0 1273476840 +The question of +\begin_inset Quotes eld +\end_inset + +well-sized +\begin_inset Quotes erd +\end_inset + + free entries is more difficult: the 25% overhead works in practice for + ldb because indexes tend to expand by one record at a time. + This can be resolved by having an +\begin_inset Quotes eld +\end_inset + +expanded +\begin_inset Quotes erd +\end_inset + + bit in the header to note entries that have previously expanded, and allocating + more space for them. + Whether the +\begin_inset Quotes eld +\end_inset + +increasing slack +\begin_inset Quotes erd +\end_inset + + algorithm should be implemented or first-fit used is still unknown: we + will determine this once these other ideas are implemented. +\change_inserted 0 1273483750 + +\end_layout + +\begin_layout Standard + +\change_inserted 0 1273492450 +a1644 2 + +\change_inserted 0 1273470441 +a1654 2 + +\change_inserted 0 1273476556 +a1659 2 + +\change_inserted 0 1273470423 +a1661 2 +\change_unchanged + +a1672 2 + +\change_inserted 0 1273476847 +a1676 2 + +\change_inserted 0 1273476886 +a1691 2 + +\change_inserted 0 1273477233 +a1699 2 + +\change_inserted 0 1273477534 +a1706 2 + +\change_inserted 0 1273482700 +a1712 2 + +\change_inserted 0 1273478079 +a1722 2 + +\change_inserted 0 1273477839 +a1726 2 + +\change_inserted 0 1273477925 +a1730 2 + +\change_inserted 0 1273477925 +a1734 2 + +\change_inserted 0 1273477925 +a1738 2 + +\change_inserted 0 1273477925 +a1742 2 + +\change_inserted 0 1273477925 +a1746 2 + +\change_inserted 0 1273477925 +a1750 2 + +\change_inserted 0 1273477925 +a1754 2 + +\change_inserted 0 1273477925 +a1758 2 + +\change_inserted 0 1273477925 +a1762 2 + +\change_inserted 0 1273477925 +a1766 2 + +\change_inserted 0 1273477925 +a1770 2 + +\change_inserted 0 1273477925 +a1774 2 + +\change_inserted 0 1273477925 +a1778 2 + +\change_inserted 0 1273477925 +a1782 2 + +\change_inserted 0 1273477925 +a1786 2 + +\change_inserted 0 1273477925 +a1790 2 + +\change_inserted 0 1273477925 +a1794 2 + +\change_inserted 0 1273477925 +a1798 2 + +\change_inserted 0 1273492522 +a1802 2 + +\change_inserted 0 1273492530 +a1806 2 + +\change_inserted 0 1273492546 +a1810 2 + +\change_inserted 0 1273478239 +a1814 2 + +\change_inserted 0 1273479960 +a1821 2 + +\change_inserted 0 1273480265 +a1830 2 + +\change_inserted 0 1273480354 +a1845 2 + +\change_inserted 0 1273478968 +a1851 2 + +\change_inserted 0 1273492604 +a1859 2 + +\change_inserted 0 1273479572 +a1862 2 +\change_unchanged + +a1870 2 + +\change_inserted 0 1273480282 +a1874 2 + +\change_inserted 0 1273478931 +a1878 2 + +\change_inserted 0 1273481549 +a1882 2 + +\change_inserted 0 1273481557 +a1886 2 + +\change_inserted 0 1273480307 +a1890 2 + +\change_inserted 0 1273480335 +a1894 2 + +\change_inserted 0 1273479897 +a1898 2 + +\change_inserted 0 1273479653 +a1902 2 + +\change_inserted 0 1273480371 +a1906 2 + +\change_inserted 0 1273480464 +a1910 2 + +\change_inserted 0 1273480399 +a1914 2 + +\change_inserted 0 1273480425 +a1918 2 + +\change_inserted 0 1273480453 +a1922 2 + +\change_inserted 0 1273480455 +a1926 2 + +\change_inserted 0 1273480450 +a1930 2 + +\change_inserted 0 1273480452 +a1935 2 +\change_inserted 0 1273478830 + +a1942 5 + +\change_deleted 0 1273481604 +In theory, we could get away with 2: one after we write the new data, and + one to somehow atomically change over to it. +\change_inserted 0 1273481632 +a1946 2 + +\change_inserted 0 1273481724 +a1950 2 + +\change_inserted 0 1273481713 +a1954 2 + +\change_inserted 0 1273481717 +a1958 2 + +\change_inserted 0 1273481730 +a1962 2 + +\change_inserted 0 1273481736 +a1966 2 + +\change_inserted 0 1273481744 +a1970 2 + +\change_inserted 0 1273481748 +a1974 2 + +\change_inserted 0 1273482185 +a1978 2 + +\change_inserted 0 1273482259 +a1989 50 + +\change_deleted 0 1273481848 +None. + Trying to rewrite the transaction code is a separate experiment, which + I encourage someone else to do. + At some point you say +\begin_inset Quotes eld +\end_inset + +use a real database +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard + +\change_deleted 0 1273481848 +But as a thought experiment: +\change_unchanged + +\end_layout + +\begin_layout Standard + +\change_deleted 0 1273481788 +Say there was a pointer in the header which said where the hash table and + free list tables were, and that no blocks were labeled with whether they + were free or not (it had to be derived from what list they were in). + We could create new hash table and free list in some free space, and populate + it as we want the post-committed state to look. + Then we sync, then we switch the offset in the header, then we sync again. +\end_layout + +\begin_layout Standard + +\change_deleted 0 1273481788 +This would not allow arbitrary changes to the database, such as tdb_repack + does, and would require more space (since we have to preserve the current + and future entries at once). + If we used hash trees rather than one big hash table, we might only have + to rewrite some sections of the hash, too. +\change_inserted 0 1273481854 + +\end_layout + +\begin_layout Standard + +\change_inserted 0 1273482102 +a1993 2 + +\change_inserted 0 1273482061 +a1998 2 + +\change_inserted 0 1273482063 +a2002 2 + +\change_inserted 0 1273482072 +a2006 2 + +\change_inserted 0 1273482139 +a2011 2 + +\change_inserted 0 1273482364 +a2015 2 + +\change_inserted 0 1273482163 +a2019 2 + +\change_inserted 0 1273482493 +a2037 2 + +\change_inserted 0 1273482536 +a2046 2 +\change_unchanged + +a2049 2 + +\change_inserted 0 1273482641 +a2058 2 + +\change_inserted 0 1273481827 +d2067 2 +a2068 11 +We could +\change_inserted 0 1273481829 +then +\change_unchanged +implement snapshots using a similar method +\change_deleted 0 1273481838 + to the above, only +\change_inserted 0 1273481840 +, +\change_unchanged + using multiple different hash tables/free tables. +@ + + +1.2 +log +@After first feedback (Ronnie & Volker) +@ +text +@d1314 13 +d1531 11 +a1541 1 +The free list should be split into multiple lists to reduce contention. +d1547 39 +d1596 7 +d1604 1 +a1604 1 +The algorithm for freeing is simple: +d1608 7 +a1614 1 +Identify the correct free list. +d1618 30 +a1647 1 +Lock the list, and place the freed entry at the head. +d1651 7 +a1657 2 +Allocation is a little more complicated, as we merge entries as we walk + the list: +d1661 19 +a1679 1 +Pick a free list; either the list we last freed onto, or based on a +d1691 17 +a1707 1 +Lock that list. +d1711 7 +a1717 1 +If the top entry is well-sized, remove it from the list and return it. +d1721 5 +a1725 1 +Otherwise, examine the entry to the right of it in the file. +d1731 2 +d1737 2 +d1743 2 +d1749 2 +d1756 8 +d1765 2 +d1770 2 +d1773 2 +d1778 7 +a1784 1 +If no list satisfies, expand the file. +d1788 28 +a1815 2 +This optimizes rapid insert/delete of free list entries, and allows us to + get rid of the tailer altogether. +d1819 2 +d1851 1 +a1851 1 +\change_inserted 0 1272941474 +d1857 303 +a2159 18 +\change_inserted 0 1272942759 +There are various ways to organize these lists, but because we want to be + able to quickly identify which free list an entry is in, and reduce the + number of locks required for merging, we will use zoning (eg. + each of the N free lists in a tdb file of size M covers a fixed fraction + M/N). + Note that this means we need to reshuffle the free lists when we expand + the file; this is probably acceptable when we double the hash table size, + since that is such an expensive operation already. + In the case of increasing the file size, there is an optimization we can + use: if we use M in the formula above as the file size rounded up to the + next power of 2, we only need reshuffle free lists when the file size crosses + a power of 2 boundary, +\emph on +and +\emph default +reshuffling the free lists is trivial: we simply merge every consecutive + pair of free lists. +d2164 107 +d2276 2 +d2280 59 +d2346 2 +d2363 2 +d2366 2 +d2371 2 +d2382 2 +d2389 57 +d2458 13 +d2474 32 +a2505 2 +We could implement snapshots using a similar method to the above, only using + multiple different hash tables/free tables. +@ + + +1.1 +log +@Initial revision +@ +text +@d1 1 +a1 1 +#LyX 1.6.4 created this file. For more info see http://www.lyx.org/ +d36 3 +a38 3 +\tracking_changes false +\output_changes false +\author "" +d662 5 +a666 1 + behavior of disallowing transactions should become the default. +d1215 21 +d1527 2 +d1533 3 +a1535 1 + The algorithm for freeing is simple: +d1642 26 +@ diff --git a/ccan/tdb2/doc/design.pdf b/ccan/tdb2/doc/design.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bfe3350c30806d0af14d308133b140a795f747e6 GIT binary patch literal 185894 zcmcG#Qkys#)O!#8Fq(%HqKRVXF`dICE`O9F0gC^~5qTQg^K0!9WlMuIGj97Wh&^w$`f|lvS_Tn% zAnA{Wb`9lsnLVs$QhJHNr2R;pNGI{hI$uvYef-SLQVa@1$o_!z+xk5jfD~L2tO`gz zeo61LUQhOSpTum4Y|{4Cg<&J%)C0_E3gawE84-Aq$4Ae*8(cS2hUINBcIZ-LChtkq z+FG_14eRkc24{k12G&25XH0sC`&vEHwhyYc-e0eQ^Vhs%=ocMpu}aqNGKBZ{J#m`}zbz(`}&l$^4r5+Av!xXAS0%-OI5e@nw*RtK#%rGz)T z-9kj`Hgfq%FIbFJ&h! zB)m-zWaM%_qxD)xm%1k%ZM(55e-Z#ZCZ;$y6|UQZpz+Ow;3~UaBr60@`>evYQRSXa zI~`Y%a4~(v^6oiaE0LFPlPK;+*E%~`L6Tv3vVh*cFIRF5wRw}J_NPaI%=wh)n37Ub z`B-_%j7h*qg5ZdU2%rL=0fst6(4~BV+yq>;d$Ec=){hE@851p6l3?>sCv4Y&F|0&I zZv3}03s|kwiV^BE;9)`jmWy+u>VR9IEQi#k6o)EF-gKnm9QFcOZ6i2?ZV+vxg8 z!{rfqw@5s>MGhMRm!9JSKYLnoHWola7#}%*^?~Ep%sdsiz0G*7hq&hFt6wl+wTG*P z#R}UGWA*7#G#T9Bg@x&;<<_I7(`xSvErRr3@ta8{{UG6dp|%0MMMk=;BU0vVF;M24e+O5eDBnon+@`C1xRh_(*FUGo zv-RuhG|^K&TrLGC@Mz#K+6u%gS+W0|J|ck`C-|k2IyYba#1c-xF_$odL9<)LzKxez z3dUHnu5Z2DU!tqP;IH9)6rPjZS#!fuBiP(iK>{@nkx9hpC%El9&J#e*Eh1ZJhOZ+d zu9Pfp$?dhOV1QHNqjRUgo$R?Yu*LE~C@!ee`wh>neBqhTcC*>~9N{Ua77@qBms=kO z0uc3fE+N^{LTV3|bB#-!VE@?3PadbnewVuXdnB|VOq$B2)rr}-aoPBUOSfO3m0hzC z(5J9LAu99_4y*BpI<#%~VPkPuq?~ zMo>!C_*NA9!U9#+;jG9f_-Z+-gk9Xd?|90nfL2*y zxLl*Erv?clLpI%RyMfsCEw-#4Dm6D#{-N?V*J>qmAn=Y?uIKzA9%4Lk;8I`(3sR^C zuXu{{i*JeFv=Q>X9E@rg(8s#J(?7+f&e^|;>AZ|zyaX+S3!U}ifcE%Q&i&M5yJjt< zG5-K6^O9NJK3SA1>yjH;N*QYj!&N%axC<^|`56Ae$ zpI~455{15=J=oa+i`>&95Eanr#6Pwk5+ScVTas^tT`~?;sS5tI(P^(S>$9&SG#IS1 z)m?|cW9$gX^*)Rx-7 z1HUHc8y0)#M&K+S7m15nNGqYoqv3HE(=TU$3^!J!i&@_17*zY!V0C<%(5eN4^# zAX5NL(kJm&Up*e8+Z5}EGHpP4?L1zX`Z1hblo`RCrI;8T7nvW_V-;#g{@h`ZFIaS4 z;6U=V{agk3cEY?p2kYVq_iU~|e5)ai5MdzdXCO2)DaH(ze)6vK*i5)1KFb<~tIGIo zvK-eBzBVq`8q<$pns4!wY2pW)zWWsuJdu04iQWcuNbt$vCq7u*9lYHUwUKyWtW`?y zlL;&C)DQgdV}z{0Z=i5cavopfocP{My-?-p4X@DZ9=x-ry}|GzWDF;=^d2ST{Rf7t z4ylt%EMl4^wbdMA6)6B|p@}rtQ+~OpnEGB7a{5ai6=uEX<#>jU#3f}t7_Ql*9yW?m zD1PsmVHODtS>d@BynXjDbvz3~>Xg$=Jiw+i_hSmXK8$_$Lv{N!q}<(!mhZdNsCV*H zyC&bQ<(kv;J;7FUPYJ6QkIHia8%tk*m5tv*R?vmwno;H~-+-6Nj^=BWg~siWEbuHj z2L8E=usS8>8UHmUA4lctA&@2Xf!Icpq;t-S_9=|d8$-AX4>0q{C*{gJAE;ed`T%sR-OmgTJ8ZsHP0_~Dqu~CXE0e*&g9{b zGxkO8NLDoaOPS@)Sk|yeZR?tt2A*KbdzFBfS$O6iV-k23g2zKKOJ934oMMP%uSx5S zHBRmfXM~xOW}w*RExHCCohf)_hLwfp9ldyt<{lCjF`l5zq!q?871f$(V6<7!dzhy} zPDGzu#5X5ms@05wGE-^UMiJ+dL1_k*EFsB!d?sd^y>l3$tUGNMr2UoY&IH z<-DIx1J`~gJ2+p!5bT^1P$ss<|HP2rqu&Sv#rQXrQTDJmA)u2tFf)Oo6SA{)HnDYf zBKU)K6rt!8O`Pmp9F0tz2pIk`A!A}}VIXMdPM}5qdxDjLk${bZQ3vXGxa03Q0){_k z6@Oz7!9N%JBkV6`68-)1-|+OCkQmqqnE!b2k0Cn&%Rh&V90aU?4cQ6U{yAh~AYlLN zSr!70KZn1uica3q&Pd6`nLz9J-i1X8=#)*|oe6aQzVyH0H3kMI`u~*IhE%uhHdqmS zXY>lhILc5XUJX2k@sMb2m|(Q(J>|~yN0Drt5+vk`&W%OBo;GhN#9~uiD1grU4ih@K ze7oK*M`yLX3Z5>BocE{R_fBn+Js0#d>nF2nyC}wjBnG=^azJQgyFCs*E@#*TNCt&n8FlE-+Ji(2pD zlgNZD)4ILk2CqebrhgsBd?!gKgN#<7m*J}7`{}OJv8DYCY^j9GD2tu59x&X96lewxd#LYXFCe!COhXYp zRqv1H1Sy32kJW1-;%FXvS!lEQRMJ_)rRAb-D*)TfSHIi&cv0F*#HKq_|1`k8HcxCZ z{!IXi!T2EnYSUg=(K?LY@c!8!pZ4IP8i(76Iiral&cV}(Ljrgr(~RM>sRHj62Z927 zM&j+-_M_6=5`BmNPhok1CPs+^k8q0>xOaunqSsoINnTeRpWIZT;!E0Rli_^2s7wA<0he zV~uHWhP@UEQx4u|E(BPVz}zgLY@;*RwCmSgxI1mU4?;z|Ait!1fv9Uf0Wi{^;oLV( zxf-2HX+dE?nANav;XMwX3igtWJ0|KZCG<4_!3A?0^CD^FyT|>=*U|IfVw;oiS%|zm zD}d0j$~M$(gGDZ)4lTi=8e>(UvpJW6e0~$7-V%Ctg)neKbRx>GKMy>VFA#>zPD(=M zIND}iAXHRSR3%hI!&!VV!`e*)l5}3xN>_7qr*~yLyVv@1u~(|Ml`!|j9Bdcr?q&P* z`MUpxJP>Z1T%hw~5=<+{Li1d*J7`s_Pt>fQ=6<4dOcyHn;!!T3x45rc%VLbZjfGZE z@>Acm){D$5=F2N^8aKb20~$(_390s*1q=m1ce1mF5V{*1`C3mGmbF|RvA=eeCUXMm z$vP?r_9>4>z(dhx01U*U>5u!{@;c;p7mM&H6Xbru(I9jb>cL=d3+9 z*^{HCYt4i=xv?`Y_FKGQJ9@uy9!a5qvDW~P<>e7_T4%>wyctYh&=jBX_LmBl>h6i_ zje50x?!q&Khu3wf&RZJ z^j}g-C;B%5W@02@_)EB%mRPo~ zqQ8~7_F9WWQCdIEYAdltLaSV*NIj*ZC~~=OL=-$|ND%jF#mzQ~j?e=Cb&$T3i|gry zK=VR%?)0KjJhzr@Mz?Fu5_QBNqr|e=LwiUw%tN)Rff+tURxQi(fl$7{Q+=dfGN^vb z7YqC@e}AVIyc&p${jF}<1MA%~H{J^O$EPc{lB=Ii@|=-@FPWyloXAm(#eEbpa?&FD-+b3E z9B>~`nSnzIsfrh#?@KM(1?U0{KIjAW{NrVeW@d1?U=B3A=;oSek%OvV3tWl(ww2xW zXErX5hF?B(pSQM-zX`a0$FvvIs2l5`0iF;>ypNmgZ5(V1F7B;MPhA=7@U=WxMXL&O z@wMov$g$K4(o150NJ9}=xpRU8*SymktWKNU;nI&<-jP}LwWPwHpJs=gAZg$1S!076 zfD&C{QQ-LL$KJBT&gpxoBF%1A4QF(~RCZmS0PZrA8Btb)pW)Y^{-}?%_BbdFcbu_2 zxr7?`(TjcPPV|H%$86AT9E3Sb3TZlM{$Y~5GAX*56?;Mq(q3M&43oYk5leaY>)GX$ zWK5*$!y81ZTAa&0$66I#Tsu zMu~UFJWq`H_|Ays)=lAJ1BKJ}s1Sz7dSrvV}jWOt5b@0k%Sh#H8OUtzL3TJE{yJblEceZ)w4#JtzX;FCI(t;1 z?~_uqP)N!V#O(vRkKXE~0I@x6Iwo4<%qdXZ{Dmi+To705IL7vi@fDN5&Nn4oCvGKN z8)+W77gW+K_gD|(N(yXKrBu&oZpwjB4%m>C!i==cIQ7q6OkHQ<79U4&6TQsV_LO_c-4Q z?!T4@;Pw=(C(707Y6$dwg3szKvoHQAv8`xajCIo=M95x%VUS=aiTJt?crjleAma*$ zY3Hri-mTDz)4szZ>qjvuf!*WXZC8Irs_@vykx1d5K37baD#Nr)A$2R^_Vs|wfJ1(L zG*{igtl`m=KYE*4d=x-KeRQN5YHkLo%0%xP=M#G~({RUyK+nDTx}@Gs8omFZ3>%a7 znFo0Q)Chw%wSS$0ZmMI2A*vU3#=g^2|JGe(J zdCe*FW51wzJ8XC`LLQ}ZMFKK2S3#i6-jZR`?ZR(C^BG9IhJX~R#5D;d7-b@n+ z)?UZ9Hw*Cw@Z~Y(lPzGn@<|c=$!?!n+6>ts5Fe%j!Mpw6ZD_L z-MD(M80tqWV8pkj(^j765)sJJNkr8(9UXgt7o`T^s7X`tofHAMr;xfXUR8ca2XEJ= zmjqh~Zl1ec6*x?-%+>qVc&C~E?s`k+UC2kgtGM)jPnt}WlcWLKoNP6Ec-!sfZ)JWj zT}B(^vzXvDmjW#t#Nnw=l`WNjogx;?78)<1<7YV2-oD|3OK0Y6TNn0un*c$TKyvfr z9)}L*;+tbEDxO(-*6tH3veRt9AwJ2qi8BN5OwA|L4)zxup#n@B(=)y8)WKE1VZfsn zc=zfIar&BDI@$ENyxE#%&VfNjvbXs5`HREdIsOWKQOUo7p+B;}zhL3t!_c2e;eSh3 z|4bgC=>FH)>K}1`t@W>T_3zm8n|c0`gZ>7i|01COh=P41EjsUX<9mkfk_%PQ zwqe7ZrCAQ`<0~fjmA7K4TQqDU%&=4D=e=0_FK^T1J9!Tf$S$4lPH!6%Y!>aSgIQFS z_AAEql4tC-D_z^GmzU=sjwwqCc$fG4rA5*^bRjOR9z`9~-wMw9oV>U#(Xf_i7s{9h z1lChi&5OO7-XGd-Ip<^6_{JjNEv8CIW||j(zwtHPj*OOU_=EcrIH)PhX#N9inpcpszN*0iPE63I-vDt;5 zVm72U2gxr^`Nj(Y(G5u+I=r=(>n2!w^)R}E#Er+Ke*2C@qk*}SWesMQO~gGx@(KX% z5}wT+RRvyFeajigyKYM~;)CnXQY+G8q5|rxK)b~?{%KxXW71w94Qm3XO7WVS;H&)M zgKoO;P=Im?VbUale~s1{E#SqmeCbjCt3G7YEgB1U+$F|oAp$ zYWp*syQSOb4eJqrzuQ-sz8Hc`^=Dz4nz4NDpo(gciboe^aJwfvDh&RT+I34Nc}~G4 z8bdNU1RCoUToZ0td?Ta!Y*31&S^V~7IIqW^ue&6&F6%fvSS_{~QyYJ?n`Y7Ua{?)d zC|+7LES8LM--F*w=20#O`Dbu_EOPKoQaCR7N5XeY^>}e4rIBLbM_^T^15g>yjdcWz?lKheL`P0*N`KTl%Y#a{!+A-Rd3X7?Qoj zaB!>aMqzHpz!Sjm2LNrq=OV?aOyRPM6kWZ>`z zbFB2-EWo~Jt{j{ZEUuG_gKWFiu(m=Kq!YtD`m>onYF>)r|faQIF5PsP#A0D7% z7%^Lu;riY#4Kk!rY5b1oR%Z!ui2_|^n8W)TWd!3&zTqtKLqx}pvb&HzFgX43sf|3K z9b6&uV1usrdGw<_AgVSd|Ma&)nIFCGvTK{$-J)|g0}o%Rul0TRpa!ZS z&0Q&5wXVguNqn@J!P3VXfvIBYl?ntf#Aofuy7%Gi7vD0J%4grqceq;c;9kd2ij#;G z1Rd9$OPM>00eKG#V}+TlbQL`I@xB2}RRgFKUf;`71gKYdaX@?7m|}-ZZ_O6m#$Wh1G{2)J2jo-Mpn*Og zgb&LY$S!D5Rx^@>@bk^4%SA%-ZlqKV;QmPDyb&$~r_Tf+XP=SzrWhqS+UD-t`P0Hp zopw&hAF{18S04im(1K;CpE*3+Ga>PTO*KD3R~0p6V+< zbYn5Qeox_5kM!vHbsYqr3Xc`QFVq^BdUn;%!DM7m!jUi_zwE5s+*b~0pmzRbM zArW3)it;XG<30|$rF{=1P(SQ)IOS4pnUbP|1V(qmkyfnN!u~-|UQF@eyC%Ue>!-vPAZ3n;DivLMYjDNRw z{w+EEi6sBc=4bqef&afIr$6KVek+ji<=$oh4CuV6S zk_0ldNnRUgd>hYhr9!H*RZSTlOtO3(@1d^h>+=dEV!`Z?J8Pcp`cfV2_ECBc>Elbb3MmrC3gOdJ(hY24?+a;x=Svn}+RVIz|fqS3tq z%WD#>)2ZqE5?UB|)Xl|0XQQRd?j_Dx#V5T=>2^jhc4sA>Tjih5JwRLQKYHwz;8Y^c z3`wyB3V$*xsBP#z9jTzTyKk^}D$+>Ly4X2;9L5q-%QWLjfN9SV%|)7-HkF#c=!fhmcI>RJIQL$0tON3MVM$mlSTaF8RSU%7HK3@9Bt@icRnwPWryuxKkiaz8eV!T^`?#u&N@Ef)iSt++tXE`~!r`uyaYbc){@ z{K%;}El=w!A%>?c?mY7(8>MQiYV4^Qsjvda`xuDxp@;K?ndT@^4FP-U^$?$W$k4z* zXvVQG#$cYhy@k}%{T9xPeaPSmWt%)yGpt1nxFxyaJRomlHDTRu)7dk}G7l+QOCvHu z9+*{ZnE{Mv$Fq6=i@6ODXY1O%V8bhuEKlvYdtY1-sJplh9Q&fuuC1W1l=ztT+C0r` z_dWa*LmVZb*0oJ)uQP~Ya;UmDPL| z!nbP2|qAkLt;1?vvXdO>qvS$Z^1}e&lJA#7_%+ynLTo9$CPJr=GK11Et!< z;>xOr+{87aWhyKpeu$ zq`WRgEBABJ*H~5nhPqa0H`!F8d_e;9;IO*E8;Ck7v^0iaV;gi*!O$F)fMR@VvqE)X z7Met6GT%s_rh0WE_19zTI{Z@i0iLfm?MS5LD`J7%EUo&zxicvLgZ{Son!O32Z!{c3u!2SS1ec5KEZyr0-&o0-}=| z^D6qP0JWp5g(W56xW$AI;o1N#qS5pSqK+`h+I$5gxe*|xMPE}qgNP~b6EQsqBNv2mA#<$58y&cPUmK$a!T2UrsJ#daA_j*e zEDsP}L3>)BtU%ic=uZ%vdIPLI=9?m;Hd)LJg9^BZaOE9QIuYvyfU(^#A$~w22DoDYMcv_yZ$)b4bF%L?uV?3 zU6ER@)+WH70J%4%EQVBW?*xjrsev@<3lh0Q>ZXp8qz9SOv2F{2GGS!j$5JxSNVXy+ z=u85K+>G)1-2y3T-@s1U31L2xgcqeUCv56G6JFZnVJRp=V_2D#_=0LUomob-tff`o z3%$=wvLiJs^h&8d(t!PPUOTk@b)eduDUn+~ z71qo5qQuorRPWhtA>%Ve)~rTg6$m1^>bP31>Cm;}s)mDcB9|SjK$!qr7H8K$JX(_? z_UcaBBe*JEaLkfbwR;+D_gq>xUkuQjLeX$waRikh5NcFRZ^)t+B^9S)QEzW+0*6f8 zvtH_ztG))gJXXJtkAaORSGZ*-aRKi>WQ|z!gGsLSq+=AeBgrt5J?ABOZ8n+^K8#9q zk0dJ3Bt;d~ZK`o_8 z1Fc;E?%Fupl7Lq3#zly{!^$&@V{!&3lu~wo(mxRx;~tO(jAH*>V+h#=dF-A9%E2$| z5tun?(?JOkWv6Y`E(2146mkN7)wY|XpwkBs!|tVB@Oqw~23@v*w?IDuMTp^+CDlzV zYPB0seHVvE`vLOYdm}qQ*XI10=NLE(J&)4@a3bWO8QL_FO?QT&J9l*~gB)6bBFY~h zv~zZRct!{_H)9mvS=%%f-Ui@<3(l}_kT;G<;<|_Za{$Dk4kh+I8R_BS(;>PY+SrpQ z?(7_YV)>BioSUu=lZxgxj+Avq!K}bEEXx8U=v7tX%maLJ9*5$o=^k+)wZaxf*ol}F z(^td5?FHUnf(_|jc*@?lT39dHGrkVyo@f1L8RPU86mXJjT#awhGEg(8*%fi}N2=Cl zO4dE*hTMpHv^{}K>U$#?-Bdq@EJg$pV$CvyKEX>&Rad+}nQIra503t>Z~(el+C6^h z1U}b0I;!~~$ZJ13l2*3*ukNwz(N1&PNIK0%Su)M0}tq zo#?`|3q781ORtjY!4ASoZznnCCc#XLbdy8)SEi4`{BFq`By%wIX;S2VjZW;Ln1%FpY@3p8sds0JMe|${h3JntCFM%LY0vfxiw*7>5RRmi5HxT$oz3wk|`u7m{XBF@7 z_8H?ptBe0z`|QuSzt;MfefD?D?Eh9kWFcVutM`M6jezmLnm zI_y6|k%5DQ?LP-asigg3*q*D}Sz6^K$wMKOq32&Cww6e0)zo-t@Ur7eL|TSsWws$Q znp`greU%(UVO)_nl@^_KAxX$7Ef1X@J8llRvyTBgr{rRG*U%UI<>VnvCX!b3RY~W zD@CDau;7?Ws&8D-A7aOHY@2UAhktS=%9-VIFMOgN)$`h|WNm(ndwqT0(2uC*Zz1y3 zJbDvvrmZ7o%GqBc17uQSpwweN>02&Qu`h4Lm&Km*KV95 z`;w~-C_Zi#j~Obq)1k`VTpa#1*@Xs}66_{SR&G#7oR1v%2sY~u`yT2qF9Cu!_zv*t!hhV1(L-2)Q6j4XR)$p{s@8*{vRu3T$ z)@~!cenRi|S1+d^bMV0N1e1=0>eoDblf^tUOSeD>qPqnQnI`vsJ~c|+4B+-=oN5hY z+?Ola7+?)Zmb_(uMNF)4QiJIq_0VZ_;o}(nCe4z_LWxoKX^@2rmHJ>`3(&zQB32@T z#BvVsG^ji-rrkA&!I3Tir~tx7*OBJL5~}C9(=|v7XcRxm4qXH_AJ~f?j(W&}h)ccD z&WZzb9?olX7IL0C-+s!x6ht!xiZ%bi zakmSRhFzDAX-5$#1>?w7(*dyaT~n9Y-bTuD^IflW$flf~BntGJ>!HF_Yegp+0aKWE$&3Sp)JnBZkSF%5jjS|^>SNx#tKpe|s;^MTh$pibNPg~3jHQ4*R8}2Jkp+@A zyvK~GG~(S52$$b3Igi@@%f;CmEM32tGQMFsbUs`G6pvzlfO~-f$khQaq||b3Em&$s zkgI&zs;bQclP5dPG&4y1Bq|uSM3*z07jgEXRmYZ<3)O_XCk9qV22ze+A!!8x zEYaKu-Q(fKpO1x7`ZISB5{5t{mZ4%7I}bP+S8Gapy<_+1b>^>u2(Wg0vii0%1&-ZA zvZo9^W7YEmT^N;Q_8iyrXljD{9ulCRl5!?5qt%d=7U^`%N;#ib`W^N71$Z_igK1#*Cwdh zU-tPCe2*>HJxH_{O`%UIA}E$!qyC1nu_{wXLE92Guo|DdidOorCe$37+ZI6G8#P(y z6O1psLKMeaGeLtz4?P?KU53va7sDJU3$H9ulSon*%pTx#xwvg!0}n)B1PWSP?-VT; zxl#wtjA^Vd!a=Wnt+v;>4~vKM1Snk&pTde$Vu^W@M1Rs3i0NVTi!LeC1c#wx3&Z-+ z!I_o&NyGPmpg>V-pHcO*o9~ISgGXqGU&7FhW7kmN` zF8f~~FlM0aYiS$SKlE(IH(U7K^Qm}tU#_1ozXltoG=)a1GzYB@~MMeJ^<@_D#{<6cE894q^%E`f4i`@{t_2~(;69PmV zcv~faPclAtsjKU5X;3GCjK*tVv4#<$l9@NUSh3TgRS4wnyNv4SP}k8>(J2+YnuG3U zV=JiQv$gH<$|2E3RrbjC%Bn^um5~2coMtA1K%UWR(Y5`~UKVKH99u|p&sRmX4fnRh z8S!DT2n%4c=ZDqd!6@|O)#z@*t@gWhAH)5!0w#$`U)L6FH{9=b3b~I~WSnnEUmmDw zpY(2{mEs>^OPt@lr?x$npg{%`_~?I9ao>Xa{i;0mSkimSDUY$oyNL#jcpH&ra*OFi z)uvwqMt12%!B`r-kRY*ywb3$o^-3q{$Q9Q@vE=8GXrw4>$Y#MVQQcVL1RdZ`Fb+dR zp`D-oYTkx(&Gce9xvT?n>IQ0{c`99w_V$;V-IkrG_SwV5F$GAGP8wYy2MvEEq4l*< z$LGOYJE=9z4rIzWFkz#`%NFBSAS7l+4rJMyrE0*%s@F=mL3fR;oZSXgB|Hd_K zo_-BCoSZfd%nYnM4Y5(qLp;Sk?fHv%#@*M_Lx_=3gzx?=NjqGieP?Q+!9N^@ zXg$mTjdrdUI@$YabaMxuH4_x%8ghY=^PVXtMw^(;ap-pe@C9w9a=cfEhVgsr4iZ2M z8sJ~7Tnb7CijeG;B=sXSh7!~ch{*>GA|iyQW$bsqYqq+(TId;HZt}Q9h5J?j0y6+- zod$+G-zy$oA1WjT3_b*3sdRetPvgN$4|ng%$d@QP-?%eJK8$Zz2Cp$!6%8hxyA*K2 z5z-GlP{3l55nR`|pgd7d5N%bqos*T~ef3pK?4+w_@o$#yUBVQ~bWkE{MM=IHQEZ?h z4Oyf0F5_&($j4O|89f_6FmL4E%!CFK^p}wjs%F@>AVk$ocvrH6&WF$z8z=YS{mPwH zi@QEtNj!3@WN0@5b(C#%Gyspq)$Sv0-etGJ2qFU~zCpCH;iLZy!*}ogA#}>-X1BdN z03zLLfh*KHy$=&-Z;wq2Q@U#ztHDX}c5dv7)o<|&2fJ7xrQ|5ACe^>qkrY7MNqVE} zVom^2eGzCiBUjDS`<7sJF+Ehw6`U7%hG6Gccr2L6H^;FI0fqYqmNn8!J4!opi37&` zJ1T(GPhb#392>C$C-4>)zZ2Pd!4XtaGh-flQ+SJu+m=Sb4>sSb{gKR%j)ZDzfPA;o zdMKT#s^b~+Ig+suWPlZ8koy}T=SPsAMWvjAGvBLRr73Px&$BlXO$|-?7{}RP1Jsfq zDknzn$A2}~f2B&@VA@r&!3b>qDAZ$XyFcZ=Pq7}mNF*_7r5?Som$D5*sb0T&+4LHl zXt=DdAU_`0R+XJjGRX!tNy$!RWQ(;p@O%EzF>JCf(*TXX(*(p(Z$cVcn(*Y+0lgo3 zHBN88moIE!-+0c>!3F=GdY>`{?LG>kDi2%>3yw)5y#O&c3q7N%^w^mi$3PuIY9bZN zP6gOz&Wl)l%4f!e#Rx@{Uo9>Nm`GY{v~y)v$Det|v~|xo=YX81=gJ z+ch<@K%nq{<}nR1V}e3w8iUHEDHUxR4A|W@;EHQb+zH!X-PArw6K;h=qA0!m_3OHO z{luY`L`2U;9uv7z3U<&%yNX=6tTYC@MksXU8=3tTpxh>@dtv6I5x5 zX!kZ^c>IC5Um|SU%DtIUwPira zkI&_RxiZh7e|?3?QnWLvchMc8dQz&zRxvS!P`6^@w9A|7wYb78!$H@(`ZheyP?s&a zRZ@kKfbzM~WpqOXagWhW<{nbAWqL@Oj-XERCYk|rL&l!VgfA!LZD2D@|AhG6WpV@8 zys?;d4Lgv^R=WWq0w#*cD9le1EdZT9-~)dRkfgGwwG*F_FX^gD&jZ z-zYz2W%1b&0SM7?ej;NXI3S3os9o9mz7gMJJwvQv7}Io{5p4omAQqXx*dO_Hyp+CI zYr11Hqk&qbln7*?>L#1m-xjuxb^}2SKtL`4RMm&7vLxbVev(u}iDy$|pu=|wEKw;| z?=h+gpdTrUl73Om6_lzVF&gZigKgJ8XxiPt!@u&Upe{SCa!l5Y0|I zKZal5kt_n&lB+nF{fZ`p?WxM=2(etzUw{$X4-GZ5)X6x{04e#=Ms#lcyBPB-h;vF@ z+1QRSx7fFUUy_m(F|wBAjOq5NI6Mau^@3a)w)n&E?NYCjA~5?AUNoJymNB}hl!H;q zhe3#E!9S;Xn$TES56?@mz3HMEhs6~Q(4%=T&*}rGeKl|+e?#lh9#6Rz3{XeiM(?9IIe_g@) zGw!do{>7sI>R$VO3F~i7ZU4ohzq$1PBUkS49ejWBDn0#wd=aZ!UCL>n6{%sESiP%CRpS#%7KQHv@vSU8zdP{hB5{0Hv`k%-LkdigqzH7gcB@aD$@FcjK| zNZs?U48eBm23zZsL)YjBcT~gBV;#i)s-9VvNNUXog)I7pdkt`X zu0~B^v)V(kZ5#n(RQK@CIn$>SZsT}ART0sZMMjD4+)1E_dT!}Zi5-=$vu(08P@l%V0S@R>T-THpf z#-2&8^co!aZ#BsXe0guOEct#oLCrBdl%-ObyQHUuJXCm3jsV(~BC>n!mUK$0F3V;V zgL8ogVu2y%7j~@Z$|B+A6hl*;!#HkXJww1N=V|0@rmH!y~?c{&W&=75T5(tMSr2}f_O^53g;r1Fsm)3eZ=nz(2uI4F6 zVR6|TA%WNZpfwq|b9N&Cz5Y>Pv4P;~8ASiUdp-o6(Mc247=y3N3wY`In6mR1L`#WPv;5 zDhSI|AmU;w`f=)ya8xe9YtT3LD2#7#!Mw?ReR3Nj(xp`v7(Y-0KTOds?7`alDs)OE z7gUbBXh>K&(OLNvI)YKzyyGipDHcv(WrF;HaxjI0n}v-@LQ$%h%INASg&lUoD>H!j z17m9FRT;0e-ouS?luS;l_vbJoXv04zyXKpc-e93|Ckm_qjiy=nqXA0JWu+**XM_$2 zRI7!bi>Y?SyR*x;D&}DmIw6qx-9$^t`L zPHLD-zGtRVYXh<1S<&5o>lv79)k~Y$`$3-eA=ZhdJfkFEsub3 zVm3M`z->ygwG0Iq?i#742TnUJ8;BM*(`-6(b6u84Mg*-9L}TA)`3{e~^dw$qeJ&i}C5KVlc@F0q0s#-yfT2#*{kCMQ;23eb|srGw<11dICg64^w08XXs z>#UyEMqaV7tE|LRc(ERFM^UTsXg`P$8U*9oDkoYJUa~?|jV8E~&^{+Ekk){at!8I| zf(wF#)3R+lZYVYR7?zfqprwKemX9YBiZ={P-`k>d>QQku$WoCIMS;krGWiBH*CQ+v z;c6{}$sbC8DIt_QhhazaEF##Xz}T1w4sM}9xIQb|?&mf8H6hqhizxVBCo^=80+RyOXLITXp zKWQYm^EKrkwBnN$Vy6vVAaVPNNKr8OMZ;B2DFUY2Ae#};O&yvNU-=lriMqP>#lluS ztkiQ3QrDfn0PZ2p)};WZII0rP)IP zOE*J;L!W9QYBVoRT%N`*RUCLCxiK&81*jWEIu2>!anP0%1*Uj3=GCmMipJ}Nl$R^s zF}u|V^_k7fC%e@?-=V*c1Xd&zt)tJye3-t!n^*`1N6mC7hvS&sk4J5m`bvOp<^%3c z6_tL*(RYAz%7osHfxSK*nFPRFBPpJtoI*5%^^KPXdSxjoHJZaZj~np~pr9l06m0sD zqO}^u1fC>;s*N8>R#w`8X>CKdgIu5gZk6r5N|eKsH3BUWbGH|ZYciuNDJH(Ohi}W? z3+t#2GwgbDR%Q;?sYQ)j1{GfYn%NU4*Xj#Iby|Orvc|FwZ5wfa0EcW58oe>S)0lW3 z*7`WNIVCC?wh8_5K{xC>Md|GK5hB<@8~bVlSM^~PiG>+bbYl&TkY#))h>keN(PxAM zk?K58A}p<1cOGYOFC1Rvo7z{%@Q8&zjFV9iiM3atu(+xvZI{$~YcZ$w* zTer4j+qP}nR>ig|wylb7Cl%YaZQH51Vt-k)ZOrwr`L{LqZu>nSC-1>{uF-qH`~50N z$3JdewdB?9eM`==C))QHF!blt{a?e-A0FU8!O$OW;lEAz|0@jrr-?krr{sVMcz8Zqm7}(1!2S}exZMH_;_Kht&Fxy?=GicF8deO$b%pvA9r>o_= z1*GZRza?o8h!p^`ygUoqUt>tag=v`G;UshLjZ*7V4*fU>Dj2r+ShTd2tq37X+ zwt}1VRrZf!VALU>1RGAR%6{hqkWbn*&32o^419?bqbj=%V3i_9t=%ex+DW%iCCPV7 z7jY*flel@XTbeFc4mS0|7?OL0&CEbEaa%1Q3@7RkZA?#sVnwRlT3M27*$W1(&dP{N z*pK(S8+o^v#8Q!>)!g-$^cmu^leFu14i(QONAUoXi90CN?fbP$Qv%VJo6H1W_Sl9R zwdJZhDD~FHtbg1tL6iA-#W6XNJdu+}s31_ue$tyVdhN#IF|N!# zGY3|jBj>u=+{Q>$RE&K5Lk*0B+8K-|b4Vq1w>aqbR$aL8Aa6)ODgt3Ite8$s64R%n z{o`={{Wv4$6GYOEZ}P3JzH--EU=31oEU82rlaDj`e3BWCk;#4qhL<94T`aMN*Vi*L z_91g_-UYZFO@o8`JA%|*;RoXHERW0%@hI2;6J>NJ{^D}l--BBQO%53C zr@*1w#X0(z^@lbBD^uTqeVuQun}`|n;Nk{r;BF4gHCggqMiPS4`457^GX=oAJ6-U$ z^cQ@3{SFvYJra4lGTWgy`K3iP2u}Q3h2d>R22pYu7H#n5$sRUVnPn5Qs--NR*_w;D-Ej0 zg6ii71cSJ&YJqpXmyYQQYmG=q$&he^mYlMMlXAVvjg+LYS=!jgE$2Ds>%uCE(_3Vi zZ#27aa=(YoQIjP501>4+z05ZE@lfQq$xxp|S!jw&CWBchm;A)8TVZri_iS}m5~uRE zHr^O30W7`t1%h96zm6U1LKv)m5$=`m|4!rM9Q8m&Nob#|QN!+GIA#NTbPY~-0D=-5 zB{^}kaNB3mFsjnL^WAKIA8Xysr~js^*ZUW zCT6Rj(Ij!W(be~NxRL%X6MS{nR4!bQvm}XF^zaTX0gFdJb9jL|=?Phxg6P46O!c+b0|6gqF4)7o z?4rGbKzi4Ioc2$>c$#4*Qd#0~e6<2?s>j`#%N&u)0N~ic3v87@&gaO2Lv3(D&j)ok z&?BwgG#8)q3q+&uM3t+6GV=UJxz`5CLAY)54~Af~r#kk4B?7T)TFL?90QUanxC+-N zUX8DVse#khD8O9Rz=8APNbutp8VR3z4mttE6Tmd`X#JcOcT!b|^c#Y&^*GTqi1vCK zy#H_iIvx%BT4~@0{wf6SMMAwixaK9^yf~)8C}j0lM2}kQC&k22o7^_Qh~rKk;uha* zdl;*$mG~?^rnC5O9I2dm-hK)8Pd6bQ?>sLlY4(MJG>c8i>A*sjvvMOiHrgYUVC?-ipX0^V&CBe1 zGm#czH%F=3*_J(ZZ_4|SLSi*92Xkgt`_jm!7Dol}Veb?rF1^V~u2>2-nj>nTy#m@` zC|NO?V6OUDHbf{3GvXmhN%W`!iaCDtywAA~u6PF7jqr`E=5|mQ=B2e{IAD)`;GgzU z_7U2{hLPB(dO_rA*lrnXiZhB^Uo0uY2xdKC0A#q^g*CH{0R~}R20sZ^%8qBN z_IEp&1;K!7qf+-C1Vso5rKn)S?ok8Yk7cuy_?y{1Y3;X{6t#hQS2t?vYB`kp{F0+x zK(*P=%4-`eL6=lQ5K^g|5X|jZ>ol{iWPzK_A#}VSInkdCpmcfXCnQYokFa4XqPRar z1!Km>_TOKuWj?`I$Cu;&l_6RFt-1RLoc)uz%l!Au-G61szczRO{{*%F?l1appZEVv zTVwf4FZl159DQkPF&qD(ty%U|B8qmdMGW?>a@3-0aD0c44MfJkVKpOBr)`w9!27t@ zqAx2Rc6g{uQLU9g8!fBW%@S)?#Cms#8+#lsj~LH!Mx#tR4=0MKvLX8Etd!)ejHVKQ zO4PO{j&GE8GO1cx5mxQ1l3!U-5EN}K=}37mijY1B01SmVuf;P&cfcx$%0e|U#_ z7crXjzC?NYu6L(Q9It_)#Bd9}=usreYpjC@{wQ*z;ns)GH$(lCq0Xl`h_>Xu6||nH z)O9r=QG`%6oaF;nj~!3ZgU6!|R(-#JW%?WVuV+3i%j7|X{(5Mp7_;A9XP!j?yykx4 zQ@@Zo5<$7Nf*AH!0!zhQA&vE>4lI12&;KcpvYhWy5L=XgqTDNbR ze30jNN^*3&eLqh(+kLBbs)-AV4}>xG;d2+ThdYMcpdS^323dT#8P~NuuRFZ(UM`t? z%Y6?0<0UG=Bu_8B;v01R2RW zH(s4>_YYLY>+ZATym>=X{XDgD1FLHfeBbsF%Bc51XWy}_L2cklvmqE+D$&R!Mx6wI zLv6O_VcfcIORkzWL?|SO-0blhB1foXwVT7+4!l`;hqcK#GU$pvh1$*rjpu>BwgQVl zzxuJ?^46^nMVF}&IE?v+DM~sOXg_J;9IeQF&l(Qqo!dc8~9@GK42*p-U`+j z(XYil8ba^Lu0i6w5z|jgbOXvqj7;Cp+MPccm+H+BcuY&>9alstzop+GOt#OH2`@!? zM&rX>q4SQGeJL}&8shDS?n;w068IGRh)bfOD~=`Z3>)8Ke%c8VtjR=Q;{$;SPrB6< zw21E(g~>zCLk{k~8xfgnmu(`$u(RAp?{`o=m!#YA_?cX#yBQmEhC|1W7d_SFa=jTJ zyc@Ky)E5iIP@PK*%ORi^Ym#vPjeVnzLI}_6iVaj*r3!m?XXHBS6c!sm42F6G7Ft@{ zoP)wcQ3x?I3`&0yCy|z|^#=ANr}2a( ztXfPCTgMgMaF3?Am=C3yXbi8+YwPzPOv+GVmz-$?5a~jU(`Y;6BUF`U!~IZEI@QN7 zYMh*|O32=3)9So9183lcM(*ebT-F8#jzN)j_rbz_rk~LU*%NAwj}{-{SQ~%Gw0;Vv zbtIOa@$U_N<9NE#Zh!$azR0f@Lfzw|M})N@a1Qr8I92eNz_HR7q|ke4YDNOO)`^I7 z)q|Lxem&!)xnlG=lY#H5caA^?%>l~e_pkPt*-Y`H$itzlt_#u&j8@8K>zan>IR z2gGJxKXQOeKv6|AI7{jBXC4%Az@5yjO(or+XgIV3gknA<9k|NmAwf+)6JmAt9S{dY zT?QfV>*@#ioJ#>S)a?CR#pV{Pm}K>QVcA02}uwj3&O8gDu8)<)Kqo&12oH$ zO-rJkApj$<~BW?2S&O$ zr&P4>+(J(Y=4;pO4!tg2Xszj?0orYzHHmCYz5cycE@7fpJBKy;M?*eW#|`~W)u0CN zOwfjj2NOW^q0gb;4h&&v0#;cr7=9U4va7CBF0C4lIM5=uP-@xUu{Ctf5b+)4rrL`z zT}K$R{NSpF|<|Sx9_si-el&kWO>~3zRS=RbV00?H=x!Q_DnVLYrw7JgtjJxS@8U=fm8*IOLD^t#(}*3JSpsLsR2h1y|D7i+UCK$%_T8Rh$r{ zGll(getLGlb|u*#GnZMEa86xO1JTng|hmW@poY zT}-gCC)>8?wB06G2vB3L3YgRyFFrGr({3N@7htRq`4sdA@z9GMQHg&6n%roX`z2gV zJsr+=%x;sP`i+B^>{K;jNyY~blISvn~ARxZ?kfE&Qd6{<3s}!_cuRYrtCsI+%811a?w3m zye7SFNmA@`dmM4K{`OjRk2L#Ct+-If=a^n}@s_$*5 z^~6`LP^*4(3=5~!izPNuD%Qxi+mh6c<@(bv`lJtw0(iD- zkRU5@TZf%kj^f0+r=m`wTXaDASYky@=u>ln61CeOS;-HL>%SLq;0{eaz;RF~erxnx z7`F(}KPr@@l?RJf)EXd@3~;P$YN;EVW+?&n@fyOHL_RQ67GWPK4=Z&?Glv+%(p)bM z^zt`*SjrT&$3Ufy1&EDWbyDQWx;2QSP3gs6on3-MCxJlUXc6?no#7ca_y(edvV%Om zG*qpYoSgMNs%S^{ZniRR2+)JD-+{`|OJ#sxPPRCDDoz1x^|C$v1Qx3AjUuw1o2b$> z(vxFefaP+$_f6lE1$t@yT;h^mN3cvR|HsjEhpIK~xlm|tl z+Ud)WbEVToWZzbF@I|iasg8Oxz^AG>+U!L{qh)a#pPN`gE&h%k1$8h>aij2K3h7V& zxtwO;xeVm#=iQ;&V15_jDS%_cNWTm;`%(Ws4x8X4D{*4@x!rnuz>{4?m{NV;v%Eu# zuMu;o4U6#?$GVlRIgH3?hLZbsQ~VnK=q$P zuN;xwc!u(+r#g^V3Fbd0&7IeV;y|RnfDP)MQ$=@#RA!^OhyA@mye-d-RWc~TNz3wD zdhk+ia62$|`TbYF*289C%V&FhpC=3P$}g9j7?zj0ME4e{9#5mNrjo)7e!BXfemN)n z`iD_z3ZnRtln=nCitE1;s__#Fxg})n*2dMfLNt$M%DV0>!Kx9mY_*%7vv`ynxRnm3L}N1T0ciXuNYdRB@H}>&$lZ*GLxMl$tn-Q1$HZihzmlqV(F6!d41IRD%#l zTz*_WptW5s=2csO8+3^4Q!H6Z5bxhno&U_!I86NBhnhC0!s0xA_i4lTGX+8w7oepq zW@?j#9(MF(G@8Y?jQzy3qA;w}la%i9VW^k|s6Rvc`#zE^b;SX%He~VV=y>C$Z*tFCEA%%A&VKi7*IN0bx?OS0k%*zu_ZG`zFsHu%g(Xn zbi7Oy4WzYjTF>U}D$sBV<#CLZMy&-2m~)#$bH$Blcrf_$6I!AP!v2=^^_y09(4&xE z*ekI86n7-*rF>Mp?nH3A1f5HjGNBq>O}t)IH+7WAN5gcX#sCGN{rU8Ylm=(9Wc?H4 z5GE%qoxNNjhzAHi7#addzT~g+T*#ox4Wx1z>IRyvkGqL~y^>dkRan?9-sN7TM zo(u44-^jBP&al7Y>)K)9pkTW9w2Ml$W59r9Y-_+SSW^JLmf-}yq3@2*mLFc{AQEzE_2CH4Ik2c>ns$DP7_@!y( zCCAm);WCDe%7^CHT?;_g-B&h}VVAY0u$!OXTyYPuBrQWybe!>$MftQy6jLAA3oX#^ z$6r>vJqEP@qEC(G&kpEcx~+eFnE#nE{$oV+e;Pew`Nz4x$ryjjVP*ZZ%=ceWGS4mo)4A>L(*NIageHI6-w@F0&Vtrf4wJ^NABp z{)WbG!6C9a!OJP0TO?0;2QBWG%QW(JJdHs6Haweax#vAUD`>`AHt5W|!4DfOGad{MGTu3!s^8j)lyWj@ zYN8F!Atg+|G#e;>Ke+Xb=$SadQ&kXa9Kh>Pm(PrGaJlKE$`zQ3l<5{3+?19v^k5H< ztNffh$$BJK-|+{0g`I{s86-8(N_Z; zCFh-=>IIZzg&S8A2jfeBv;*0GsJH+EwRk7f#Kbj!>c1zMbpGCxY5e+-!HLc$d(p5B zDZTA_6msh6G2%aXYPV!(U*ux?AeH>coBz}4QMB-S#^O;wmE_UeeP1b&M&1^NZqODO zzxS8bx?L=U2{?e^UEiuDa;SEx6^tx=R?1HUTqnh2L+2V$DX&*Tu|}5)|N0 zlQY2-E+P<_N=~F`s7q__d@@UG(2sG}Au>bGJGhzpWJ2+efSDp$)N;`6CMkG5Va>)g zc3gGEQtI#|64@c8-uiz2gb&MQe(1&<2S0X_-BtbFz=#f(hhW<-7_E2AY#D4$VaAQ- zeXOzLwjLbUG!=&Jf0NwSk-s*NtHgyK06GHs2xJvfn+Aac#qXgG*;T9=izA+xX3fO! z(9l1ddI~=y?r1*7j|~FHEp%83{f?l9SfUc8Gbat`KqL#@0hvmLoV8fxwI>y>Lz37M z;qKe!*`X)v?e}LwRsoQ|zytWr!#gHf#R%8Xg_r8P0_ZHk7P%)t=r_mMvyQz9&1piE z>-jrJ>_G#M)V+?PKmH)32dX7H-le78mvV^*cV(rR=qb*p123b(I$Ko{gHYlU@Ol=! zo@9;{blia8xa*1}uz!Mhk*0;%+~3J)_*Kq48ioMlw;l3NBvV7Djh%7*Tm|=j&za~J zl)E&0xari^{xFfNJ;Y18ljC)TS;B3wn)yilhGIuC_SjTMsR@JteB+S2Nw(Hp2*{P+ z5Kw(|F1rao2T?#J#-r-Odo1u|S3v8-5)c>KLi0A8w?+65WNe`t4k{931ABkbayzzr zw`wA3p}kKDQzMc7G{eBu?snE}*a4a%!t>^hD|{dPs(e=&;|Qg=v3>}diuM);6$hol zbt7S~4iF&lxDC40r5qE_4=^^+6|{#la_OfuZ=;+-6sya$Lm8kAq&aCzc198u2!;TP z$7MHXb_^u0Q^^wmDsw^mWjc+Y4(>u95La;7L%wM+;kL67=`yR0xMe3?z*bB<*A|)A zFG+GHK+$dGQAe_6Rm5t>j^^2wsGNR zGS9)=G<8r(%M|t%ZgF5E#O*66=i)>;(%I%*4fN#8LZZwd2(kpUuo>ulQM0bsz|Let zYijLcl_E7F8avr8MRUW0W!0t8xw=TbYCYb2WB_sl#?5;eW-rHLwu*sNj)%+mVNzE=c*$ zSMYiLHgO6N@4SA_dbn0Z9^Ix^Ki!-5t%cY<=ab1PeD5{4-z3i!X?!j(qVmynn-Vg1 z_gIGc7xe3{r4DkxP)iB@(l>%oq?Q+3g|6hJmBzlKzA_i#O#&w(1Ctg(mGhdVLN_v@ zV6nOcsppbXio#6Ez(d|GI2&~lK*Bo3Z6F&Qt})a%wJ~=c6^4<+6FyHK+t@2oqS5zV zoM#_?-+ics^^v@1hQq#*GEPmF)-P-Y;>^enN6qDyO>$KY4(wc&(!=W*48KC0*S(BS)tpE>w1j}ZxUHM)MmI_BQoNUX za8RKo9#LHhxo95PYgD3_=ZC30<#0_-p}l)O9_gj9?}Pn?_VQ;}?=QLN5BT_JF8YsL z+W(k~{&DUvxadzJ%b&HEuN5i(NR}xP{5?kcD@%}>{ckf;lExP!4IzG&VDtXe9+6t^xTI{?G*I*B6j<`tW*TC1}}O8w;MFzx{Rk@xrKoa?v^S zx~@ZU(H`19>7D3dqF`!&#CzT$QA#ikMK66zpj!Gk6Ml4Rz~vr@m$0a%cK^vi=J+Y(2SEm5Y_Ew5@#*1(bI-CL1xb>}wZIeEVZ;!>b|7 z?Fgx|QdVGXCKVsE*jr5QAD@ge!qVYEh^WT7R{EkPwk>8@RG(L-@w=xAvNF3Wt0Zx@ zaWxa&8_1qfyILB2;rH-wf@fk6aZ%+>^ZT-s{b*8@W=!wt?eo2v58y>w1A$UILM1Rq zyO2&`Go}b+G9#NoYnfb_{K_rY^M-c1iu)siYQvXrV3FA*eC1Gb-vG6F;U$hJAje2r z(cZ*cJRtoHFPfb_D-@zej97x!yp*j|U)JYEQZ)58srI!Rku&OS1z#x;T#qMbQVUI) z<-jkqD|QGGJAnwRpcC7d)7p(~%;AfWgK#iEXs723IJz6>`)>_YzpyAE_SPlp+yfk_X%sQ0hQ*Kpp@-eV@LzZ;;BrySB0M`r~jt zmNVT9UZH@{2@qO)prR^95-EuGdUo-1VwchEJta#{a5tPAP(^q{r86kl1J@oc$S!~H ziNv9_Jih8_n1@(d&pC7{O#wBjf`XbhK}@82+)<57<>(}|MGVq7szk3Z%iEnK6ljL4 z3$ioDn{ZjLT}AKnhp)!ev`+;$D5;7fBz&gj&=h$x4TM(#A0Jo9$1E`t2%G|}q@WB@ zk^fTe7kiY6@nnT`%7X&2tB9QY4hCWM9ihH2k{*iii5X^^-4BS6T4(m1xK`T&Ml2ws zBPe#YQ0rfb35c;tA))*I_Rr_D`!!!;0-n+vd*lWvQgF63bw>F^fcuw`sJX8Pn@+Tq zc(a$M^vN_huiZvD#}J?mnARe)VXPA@kDrIUqCkE{k{^w|CcF;oWn;t~5)GrI%UWwn z+d~isY`D_o7OMW`kFR-V%u0jk)KqUH%5?OxPyIGzi%XO0F`XpALqRB)7q}lF265c# z0?M{Q+}91&-sE!H44!ox)+U$sB8>|U0K?cAmjbl|h&+pnqn6EevwAI5l7LIXjwb$A zaTh`&lX18-B(KNHy#SMdEF#j&v6g9M9ERhtNfbN6RF2MQ3_0%K+>paZs>*wD&xE*e z&18o$>so^|TgQ}IrHjwsXNqC--Z6#C_)3Xb5}Jm3(Z)dYeIAk%O2) zFmB(+RRHzly%^}<#}E(RM|OtrK)L53Kmlt@Ap9OA4ik3+ptBqzl*w1|mct!KqLs*# z$0>rW%skLVYny&x=;)dG#AU`NhT@>X&coOcAb7anu@)_%b7Len8;~B2iv+8py&Xnq zi@vTQXS0!u^NjxHDNUgV<**(DeR@g$QOXo73Q6(x(Ppf`oYl#X1O}7K)dZ6CQef#@ zXEbt1pwTjFLazCWPH%~Fi4t_1j}&Ui<6^`H6om;2K6YDBc9WYl%L}pgJEm~|7A1_~g^n@gtVx#_g!Qqt>f?CK8JM zo}0e7k>D4GLP|zpgA!H?FOM7vuVm_D(;i*gl};B-X=7G#KJgstz;yQnCvbl62f92N zezFDygCTOe_)RXB2Hsf2X!$pBGdJ;<$wKXTollNZCA9~wi44JI`s8CcH>f&Caaa6o zAI0n8lVxfQ3?5g!L)p)(gzj)`9^P4Z)>mNG+8$tRl~+;IFqdXiygz0>>pSIP@SKv7 znVGbQ5_MLrmMLszTTVb^DnE--$O%?EmZ}xnu=bys#YM8)QMzkf85ulU!?$YVIeZc^ zjK5wC1Afg*>(xlUVK^$(`nVSL0Y7~{H3Y;fOO5b*ZM@sve2-1Uk8%Pm-8q2{(plJq z>gJHxOn9;}H{*J?=|{BsI6Bg7v8jtC0<#L{)W$?X_(2h?EzerQF^JT#$`=5WIH?GC z@SZY}Dk7Md-8pI3w0ePOasxo;N4;&xe#2g!YJGCsAB`tfC2vM(On(;Zuq=)x-Cb@0 z15&7KZR3LiWWmMDqx;}dC?o-Io3;;p$8q}z z@6mT~Gg-{CsJu%V^LfSCaee40cfnA)f?Ln+jJpZu=jK)=WQLMCkQgo+q`I_Uhfeb` z=UuPmaJSf1396=;uIDGZ9lDacA%s+VD8U*I`D)Y@VH62_M5qQ%b|MFurr&;Clr(oLgn>)8;0MVp@ zDcXU%&poK9i7p)SE<|c72mO$$d+%Gb3c`ZE&ruvO?pf}*r8mhn-{!0=_V6*LB$!07TB=TGOcD(u z{HWsnj7cz`z%{U9N#9Z{fL<3z9_!tn$u6wsMO|ADxf24TPCy@Nw3An)6C#c^bfJZN zUtkS16iy#CslbtUc7)R7qE~#vX1&ROWlYw8>+iAt%h)ID-}Coa|G#6Of1LYQ#{4r^ z{>ke5r%}&8Vd#HvcKV8}{qKv}SpN*X|Eb$CekJDqrq}n?;A?yE)!_SBJtM7uZ>UKu zwUFe!MzE@1ClNaUdmiYjKSSG;QKR{5mJyG9#8+NN89y2QR=-}ICV8Z+8qNiW2h{$0 zIYWJj-`a-nBlA~ocp1-V;3S>I9>>@o(?~s$)Ev#qLa+DJ%~#imbe-LX7MY9Ayc2zu z_ulD3n@yvtLBu!QHviR2zX_v#tM}o>EH%6ul-z(ys-q#rgL>veCw+o)@oveQhR zn=ano@W8)(%&lIZ#n;gEE-{9D)}n3i$gWPko2nnLAN@49sV1ujzcZ(MK85Zboui!n ztbVgS^-K6Z%;FwP9KC6t4%pcGxq_eesFgWbU>|kiuVnUXp7cIKn&0a!%i3u#hQIuL z;{8lSTt&MFL;NS5`fvOpZyiz-^NeMcv=DsNGZtAum=m)?T~$d$mv7&08!n56hr$(x z3w8;|mt!d2xW+s}Jv7|An?qPm5CH9)sF!*8+TxZsrGi6^jNqF@tqU;Y<{J^WA#6O_ z5L0Ml=RJ#26y=lzHQ?QnjyIa4#xD~U$&WIAtu;X2GY?KG@$5o=d&IV zX%U-cgYA0hm}r=LEXx`eAu>RtZ#G|!8-sWKXqLAb~^?ev}p;ewv|2` zUW>;Ioy1QZM6q&&0k4cynjcW0rkFWCtKvlI*gdt!v2lI~(Omh?w#5Yd063WSUGdZW zH39kn&YJy2+A9rbLj=Ik_&s=ox?{_OS@9vYvjXta!c=5_3&xW%1dv7tm#wJ%L@J+2 zqhp-55CTS~MKoXf0UoIeR}2VC4)4?^nfzhBwi$RApxDg|%cP+6yLd}3CKueW83@4v z>{C2?%6lqNSQH`zA4U9cCIH4R$XU{Z*o&r&l|Ep{l}7;ur!@a}SjTC8c-&r5&i(vC zng^E4G#A>K#GbevmT-5{0ON1TyIJ3xA4nQv-`t>mNC7u@d{kq;4NylH5$a*TJ^pg% z5NO$>Ei!c)25ZvPJ#*nn_De04GA-NzbHSjoX@78P!e(0PPWBgWkwYQM_;BSG1ZL2*$P!dbCs@h?p8Y%^K0;Rg}@;VMq2uA1q7@ z9!o=?n1m9rkzgJmB|#K6d)^+?r)|(`#1JMAeVN4x5rr&wj*Q|5ob=ve=vE;4X2*=N z7|zWR#wuy-{7469svXlX=S(JC+zH%>k5gp@14vwM!CvtZ?^!ph_!9o{3 z9on%c4~fhw@aVAwj<>-sx`0^6e5zj0D9?vrG!9f>K-G{+3z4uoDw+!d(o9E=-Q2*Q z&cO3mC(mo9ekc9*ok$g&33la){EBz04#%N>~Yr#_jL$>xv_!|Im83H%cPH+IY zG#Juq0j7z2eGw>X&5uL{F8WgJ=Y!emPeY})qp$$uxag9DNw6HA0#w>@?#6sSzy8r) z5|I_4s_?YqK-f$n^KV_EvAn1(q`o_}xpuwM+~(wnjQYAwkp~{Ic(0I21L5sZ2n@hI z=-p`OFl0N7(f;OKfJ}us*%}T`8>_WvW%fRae2mg zvd5KX4hlMqkwgGCvthF{TpzlVR~Os0Zdgit!c=YQ$?%9Q$u6UDZb?MYk5gZv+~t~E zSLB;`M*4vpgXx>KJ_E)*wf$XWh7G?Z_uNA|574}%x3q8Yx9^huQwji`;Z%hQgK+c|`4(YxI9 zO+L^`tx>$skYr80`rrr!#a--^hvPN7!#K$gRL9wnLsdUj!6-t=m6YqL%}=?7vzGiS zF_~sqX&jdB(Rb@lgv8-+=a00jGWEcZe^DmK&bAlxF6w4GEDOFU(Tcj%3g@Z^IRw(P zhYO#w+jU$+)^Es(Joy7J+@DHFEnQdfSy+JDB6s@%3Y9{mPITp?9j~O)2IJB~3Lx2l z)X)I2r4Q!2MCkR0RY~b>Q(vYVB&O?TUqZ@2G4G5T4tD>rdmVfie}3>F5L?X#K9XS; z7G^;viv8Z*x?5(SWSSXWBn=3nl(NlrUviBz6~I##-Y`IJRaHQ2wuo)hvEoBpDkZeoga3K@l(Hcvf9U=vEa4!A8c`E9w~-gn1ha;8z(lbVY{V~!qG z6e~%^If|Lx%Z zM%K>tmj2nl3WCW_U$9D1Q`|(=uQ0sst}oaE4}}1oI&uD*vPMd|>}~@C)7bfqkSOXf z6vFhPhwm=DXVi2XH*#cacOCK?>H-eL5OnZ9_9tNGr%}mY(Al2@Nq?;X{)gW8 z-)oUs|K1$xztY*CUD!XBa{SjK&3`WC_%o6H-=weD{$f^@lkGoNDiHkp^wm#QY1@sj z^wq1{NeO62BuTi(KyjNn)ZGl)9}>Ml?B zGlvrjP5{Nz4b{uoql@eP5w3h-e3C{yXuCH;M1V;v1XEmnP=x+67`~9v{ldzmDhW}; zg$^rb>*|n!mc=T?=3K8{#9Zy^`BwDL$<{75$&NxeiA_ z+ck_W1qR{)@T}M@1~a+{7PjGQAHD=|UXu_w*Wrjvh4*DRs0uV-5M8mEVEyu7aW?W~ z1v+Yl=vEsxvX%Uh{b0P;#Cl1ZGmB|pm^P|sa6o%K4;Wg^I8d~fXX3&fyKP#<>Invv&Sb!?A7zCzl-ov zhgYb>LgVh5$nlld$g0pPrH9V!YGQ~sR&R zO<~_RfLmLznwAP7J57Ui`kn>lLVE3~xbYG{+VN^Z8vWfRShVF?|8^3BAJ;|$gT`DP zl5Ps{z>p9O8tR9vovn|(k!l(pqUOwFvN%3{f=?4~(s;sTtTe4jQTc}o>)8Z2**D>$ z3J?9J5#h5>$Y6|fEFD=i8apqfrHb=2@owUBQCw0=E#7#c7%=2@YA@K-uS zInWfJsCP4;9BPVflaUQVyk8K+Jy1 zTznYs7sGqEa&mzFOuWw}bG&#h3}pr#F?Z7EdFhmbakS4R#dk=oOS&k3JicIQdfFlR zS&Ro|cR}Fa*L1Wf3O=Gw!y*+6Avu~-jw}1+3%?l5$nA1D0D15HRNe2zZhze`H7p{w zSlm(sNgqkrJMS)dj`9x{1xj{WT74M2<$rwh)Qj|N(hVQ=W*leabGE!*y~43HLo|x3 zs|b2b>F3UzE{O=9g>NtK22^kztNzyuknPXuiod)7|F|{&$pZPxUi_U_qJO;r|7^GX z`wQ^TPu2gr4F8rL@^z>Ex7YHYPfv!g%{PDZ>6!J9$cV@vF;fb)O59G;{vRMffK(Y{ zZQ;NQrM0ZO@FVfFkuLkb^4(&|9Rc|*>b$(w+1ZyLUG^<*+CdRldco}5?CWaP+oG$6 z^;-w_T`PEKis_Z?7PTYwhPdRbhFY{fu<`f5A1`A1D+a5}XQ6DbIX?WLbwy2Bh3(#cztk2lwr`>0o*>p6+p+gs|ql2*4u{cwR4t%hi zKegLp{%EK;x$Dvy5+#7vRrl1bvV&NG!7t_>x8&+~Yb{Vsx>Wz5oe=tV3Z~|G)XX61 z9Na{o@1`?AibJhw%fzW_D|_puC#qj(D?UwjZ7FY%$Z9 z{bMnD(aGr-n~N7V`-e8{QdDO4B-f^H`a8e2>eTJ;RrhY1agx{wtemfP4$Bfd#4Qh| zFl~xfWZw!5nOW~I6t8AEedGD}dEnu!hx^9MMK2@+Lp2W8friujO;Y8w3KXct?Sq)w z;B1RSzUN&6(dqy2?|~ic&ra6BD_K+CAn70JB6^yCAm*3Y!D+8#A_RgP{Psy!;9p>{#p(qyktJ9dmJo?Y~49V;MOahTXq z<~F>3*P}_hG~V(nLi# zYrv(}_`ikipfzoG`KO7Zc||Zn3YxS&=Q-27nvV_q5ocbzozYUaK^MaaVIlqzZv~XD$xrbtL7jqO?7D@jL*%0`c$3L80HBD zoRa@xqLQZ8=DBDD*xqAd)}Cezt&SEfw}CS8Q?4I~z>?TODlp1PR3ADFSYsse%D5;e zkpxG$)P*GzH)N=SAa4ykjWdbR9aoyfAum)XvTSjwlsJT@{xS3T>w^=~V~>O}jrG-$ zl~~`BqPY$T$nLdXq_Z$|R=^GlIe=WGR_%@^kyrG@z!3s$Lu4H~-Xunvj8Tq?DpYI| zBCPLXX5~_*Ol4vE+!U1bsfk8WuMD2F5JUcu2_qX?22MZeTu7UVgOtbO1q=xZC^-u! zlge$031&dj+&rqnlZONO$s&ASbosN!;-w>a3)##ylkaR*cvS3yqNJ^3)0Z!}KL7%&V^a6+q_wAEm-2o;O?5cUR zLE3j`d;`yyK8qh5uz82iQgzj-Cie=n!_`1E6xqfF_O8o=*CC&nabc6=1D!D-hUY@6 zHn-5$J)2^jVxabJ0F;<4ax@e{;%q^31rU{gNncfP&r2(}(8MJjs~TOGzwCI(i<7e-j**b3-{-p84ic9=0Tt z%r;z$!UPWV)B^fTSEGJ!orFOkTU>;HT85SPLGv(f#e|z_ zRglzq*3Ap-c>e(RW0r;Ine>EU8dgp~{Ra2c(ji&b5n#fV6SCKv-b8yQT2jl#Q~uY$ z+mu3tu%Sx{ufMQH5PG^%DepB#Z$E5T8bOZy#tJ0uFrt|$mVD73I)M{#b*ZYhkwsKo z#F8T)2$Z2kh85KDD#{=)!VUxBz~S8_c(}6V<+Yy|%jBlO_kvtko-q!*#%>$34MJ>l zw6WiMaiwOXgq(CCJ-(ZD46xt}6algXnn^sNLZ6`k57E;RXKp|O%qT(2)#(?9ccn8V(6eq7im(ZBcOnEq$!A~ zfTCifBS=S7tRMnnqlpR#E*m=54JxX{4OMe*jk zYB*hYI(3cRhV2@+tN1R?`eiyf4uPe_vMg6GSi0jx9q_{*iB0IKPp_>Rx1iDBQ*3k} z=#es>0Qq~K7g+QQH?-)yPS@|3idZut>27$vVZ1!E(tJK7#5c(2%4^|H_g$YBY^9GQ zM%XS+H&C!dd57(M-?B&T1&?~D;G*x7U7z;^+9)!h)K&Lgrt!U+U~&JwO3M8urbcc{ z?lYHppS`H$zZ~@irWAiP_BFj?hdUKdmbp0F%|~h_ZhM)^Ej|WQmuPaf#T|7FK-_*S z(`Al0%)dL2N!fGv=iHSPKbafytsF7z_Z~e@>z%B;=c^X68ZSCtvinV<`3DuDMOjaU z54;gnWSF|7NZ&`%u;hDpEEaM+bH(Q)oW0KPUAxS6PKuk71NOoxw3s1Td{SG|mTPbK z1sl3(ZSu0LeHCXUSiU|{l)juPd!+0uSx$C=9D}OqMhdC_m+NWkz0(M9RoqC^Ms+>(J97Z}#s+R36$3o82ipkY6(x13~$5n0qJmzd~+LO>>2p>m~ zQ@Hcda_FcJ+=1QePBnt@<%bH-{svv?hK_W%TGF@HXf-FpH%msY3q92H^pEIW+?VR2 zl{ccomJs&qe$NHSzmWS_D9{FO*R&IFOKZG<(JC_3adhh;165eO`$k+t(j$VC*8W2-% zlNK08o*rVY`YiH38`<$v>yD?IW9H6X$2Y#)Wy{q}uNw7Lo9t7)S2`TKOE@vTdqISe zd0f~-RGaBm{`d*;%a-&iX`I3}mb&LfPmPC**ISCemaTCx>->JxOLo0bV#il)ncR10 zIrY<5Rc@$%n!LlaN59@LOqC?-bhPxqT!oAHpyv=nq2%|_?$gc3pvlzLg$L~*jVO2k zG^ryhNl?z4ZKr5q`J#x1{yf&M*uY@JLz?Z*T{6vRd*`#tMG`>nN?~fPEHXY0`aW-bc&LkI~JZm4-;;grTI=9`+&R|I@=B%~(pQbEPu@$&})!)}LiGYNeN zy(Ur+Q$TXN?BJOHM?{;t2jrG}nY}~oA)e2TAA1C?*=;X%$Le1d4+wtk-7sHtk8!z{ zS10fR-G`g6*$#F&Pn|pxtaHPgsWSWR>%G<~GwzPHVX#9~<|R^>DR}46OZF2~oFzp? z?JaO6m7O=9^U&TcgXqn|OBrq%kg4kn))nt5ER1$C&$2BnpC{*xX$pusj=e0e^q_bZ z+y44cH{X47Bjb8&@d`or#${;^wu;L8tZ2Ji4pI{co2kJ&>FxPrmM{)_2qnPwzb^G}WI`-9$6Xx_b-^(5wWR*|%7 zN54=!k9+6&Ybc+~%B0i!Xl{<+sgWD7x%`zw$*BIca1l3S?8gs%rL$S|^0p&Wh|d+R z9beQ@<}_x>dRJcY_VW1H%PZQ|IEWSGxzBq(D5&f-*of>}h>TaBzrYsJc@8^L6MUH! zG2gO?rRY!S?lBo~e6H;}GQrGGwJv`sG~=q+17=YMV+z=Mio20J-MhevO4~#8hb~h1 zUGA+j_4RN1O7or0YqVg{_;LFyu{*ZfmRGO3bi4|w;EsEEY+BVbgLmHt@0V!B(&4$- zEMYW{=rh*x5iiy&bSke^jyGUrDJew#rT9cVm@^hSt(3&0E?wqnzHW8ufbFDhIP^-P zJcq0+zdI=(oSw+OB{)l7^KwY5%N+C8!6zOz=a)%V;pZPFF%+FhDP1{t z_Qgp^@K}F8*AT5i$m*q~J?Y1>cY84Tde7eP88$heR9#T@YHWSUnEO~Q)tu4Dg(Qch zW58;%lvD0anCW>nHYM{^MIzJmAPc>_?rAw!K38Sl{p_}pm#QSHqmg|Xbq0{8ry;{G zi%sTDmgb-1+vaBHZeEKGN{h@KIvFN0|JE~X>~(o|6chF}^v9)&d!eJU_j~BMCOfDn zNspU7+#jQMY3$?Vn5bCiG!gLkyK|nn9}I`8{t5oJw?yg>KbXQ_ERg~s7(c<^|F;Z6 zioZMkwg4hh4n&V^(?52?);M5u8TJhO;ZCP^#m5JR+xd>G92K<)^Hkxti;+w*=SWu4fG=LpXzsN9U^!@CEnvf9)&oP~Gh{_yY6o`NwrOu`{NDPmlLB-PDNv zaY-aeG49qVpL}uggdujR^9a9K&)9bXZ@ck;?0X!sS3f#kZYs6>@nFZQm+J*xR{<6N zubLyi<}@j1{32^Z)!rKEPSp1c1w~sMAVFNa|quH+8#4?tv&Rw|{*s@WPNHQ;ddEz$v zT)k3v%WlJznUr$jYN6-SiXoHPbcdgEbuk~&Q2BAgdErY2H~OL#g=rGGAINi*s{&EfXY$PxGEZsOl*)PZ z8;g66KzB+9n`HB=^l>zukuWcE8kw!lvXCE!(Q|XC+>bo-pg=80s%IZ#b?N{|^2MED z;}Dvz5H7pRDn=|_7u4mO%|B(FJs-cE=!&L&c~^@c)A97!ZTeYAtpSu>*TT$7&-;p7 zM4GM70pZ7|x@1-K$h*tuJ&vEEk9LfAR1%;QF1UQ4k5eW_1c%qj>jVII7f~6#Z2~sYhF`cF`FAno5>A@T34-VNsVqjXv)??Xb zzH9B?VHRl@<@)r%j6`lZ?QI5!reqfcor2h~VS!_;=MpBA`Xed}$_{-~C-ji`DEvW9Hy>)DQOx84=Ey5DATEOYnUGufKzx-9dbx0Qe;k3AK>l@*mt*4 z#W^xuz3kQ9ZdNiRC6&Dr?IF%$)8iEp8Jbh->B#RIwZmi4Zk^UGd z_h14lPr5wZ(^h12YH1&QN^(!Jyu|0B&uVPUB4aHEjaKLMn4ht8dQ3{oK_HFTr8%f^US26nnUljDYepx}Ln7=Aohby^@v4kbS{g z%$$``-^7odf8*(EEjp|6k+ zE6Z~y7h-qqsnoOS_Wm01I_6^W++l8h%bu$XO^1)IXpS({ajl}7qW!xiGW9_kshmSR zePpbwXMI&SK1iLZPyJYZvj5-(zcbU}29^UgyN`~oQ9uXp3lSOUexHOPI?&-qhkp)q z+gtPU=Ro&+2;i>?pZ^q(_;aA!)+CQ2k=bln-CJg8m^mPrxC%NHuI@g$P zKSfMYE7S+3V(yW=JE6_p{aU4`$>DBCoe%msh8<1eD#gbuE_{)pH|Psyt8Kav+=(Vb zD{#xqL7ep;w3{c9w~l932?}sOwM^u85Udu>?47VSK&j_VoRS>M@0f-?waiuhJ`v~M zXjjk8VqCn#AV9=pM`ziwukJT@so8G4_bK8o)1g{T^VGy136hMIQRSI*jA_nm^K$23%x`(+JXg!M1FG}i2y5}a~(nBv@7|C-lf_FS-S{r%H> z`&UVvpG=*;SY-|Fj;Bmn%nG{%GJbCC z#vHu=rs*B_O=q`KcW|q?h7RvQaQ>pZP7|v>vsrNdR~dmZIT`WrDW$UceNNOpY(9#2 zWT@YLBs0D}srde`Rin$m1j$V1ZSLh2v?8ba^g}03>uOz_>&1_f_YNg$Oc=@)WfXiH zzy8&9WN?D)ImtU!3ijer^oM(zSED~1wctHvb?mUP6~*TdUn5T_Hhh286)~dxsrMzu zq48p~R?kz~nTr)1tFSYHdyD^M7=FbXLE1J|3$D{s%UB(A3Jmdeh0EZ6L{ZC_IUWTk zdMG6ja{_KKbqw_P42B;-NF$K42;AH)`1^HyaPuxLEgx_XToEpXgv%+zL3lB^@GB}u zO${z%?(gGlCm`hc9QG(J@m0{-g= zPs%1i;9E{XS^-yWS8o?w3H;Ol$uGj2sbzFL-P~OLUA>)M1Ki4^0gL?>GZvyp>FO63a_!A@ou80$O%iqfq zaU>)EB47w5`xJ$=L6{?~2Ib{!5wbQ2d2<8gq{O7G94winqp#GmyV2gP*nag5b-bJX zDnHs!`ox|?ac9bgt))*?Lfpx;rB86gd1#Zx+DY%>AR~7p4^9yyPyRYWKPqQje) zqu#(6ZK6u)SRb~OsxdA)$HBQys^pp_Dx+C6ccKI^-heL@&qZO^Z$dn7?x$#7FOAWZ%&AOijbv1cGV$MP zrKn`#kUwjzM9hMf-F_$=>iP((!Fo$FZ4h!+wVQ%TlEfv($)&a9XA?;_f);B1R|}zk zt8EOOHeXvLqu277`55L!Gi7`)T`z4f?I&6<+HhJ~T6S78S}I!mE5-KF_7GD6-clm8 z5CJmETRUaJx5Iq`Eifl-D>9lmR_!ZV(lqb^NEb9|=R1Gp5dZfHyX<`5hec8Tm(Epi z8;dq~Z=3qi&DKQ*@#$w*CQ?%(Z~_S;h_(wSPzsMxvbKQhn|(v!SKp9@CI72$r1t*r zzClY%O99ViI>`gi}fHC86!?XfFup^@WfQ2E?g2uB0Bj+|##G zPehHK@9K045U;OOvVYjTMX$n5(P|>}5@(=4Kc$wpBT`b9N@SBprEi9|ku5n}r- zMCi#DwdM$UYlQ6HO??f;^^5%2U;V-a-#lgi>KOl2U0BU=hLbKO5@?0Ip2wNHGH4*& z{Nm+AD*d}ZjG%r9eR(?_K=ug=)YTkejgYfJkPJ_6DxY|Ev?h#f z;HU;-&AAFYfW<;{$Nj%fQcsdV;%5$iB|;;SfnOH5Ew|gzkM-9Xwd3rDNGcacT}!v2H&!b>5Me(yQc!|HNtTfe79k0`8UcD~)Hef*)!Lh%Cw`=z}wB8(-9F1By9 zq7fZ8-iK%In*e+rK;SZMEV&3e5Y9Azt zeZ@fjL&Kmes@ou!xw8YWUkZo|Qno~|ADLoXJ3`kk^;CYk_yr_kFXUMhRkLzV(;+2x`07mU6?U0R zPevTr4w{m;=H66F_KV*`n`!FyCuhJI|A%poRN8(gJ2h~RJ_f-^F)!?ObDr`wi(T{K zR-KsK(H0$1inkSM^xOwa(>u%ODN||b8U$c`rq2)gADxXt6EEaxIVCyo0KKK25bd8F)xVFus9py{OWjil7kRT3IEl?U#3{fs% zzEWfb_$%~(V<;#w?(^KO{QmOglU==J?gG7ZqN~@3cU`9O`JUiED-kP~!@KL|+<4Gj z4GUzJw0NGvI`1fvz#C;E07zDLYoX{i^#A$6y2B)zIBlcc)xtVULt4m@7yWWxhPIaBe;$uDD0=y@|goU9g)JP~oVo9!C1a?&WmSUIG!+NER+9rb94vaEB1 z+Uw{Bsd6pb8l*87JGjFFW|zc(4j+ndG*~T?$B4|Gyv;DtS)LsB^3c6cy{bnV?r6>- z8;(9!Eq}rpiiOD;Gvt3{%J-w_yL9wa+R4blxHo&ySK0dB(+Ufj?;;{M#84QpCAX`b zQSwi&A@%|tLhfV*#z(8o0f+?!7wG+M{K?Jr`;X%f0hVlFvDnyXsKJIi2D8xu$3{gj zP{6)WFm}(wOrAs=h9ShqAVMpGlTgO)4Tz>}Bw8pk%0SO#k%3Gk-cw9LsPW`Fq&1FF z@rCfp6vIe5BQ|F3bu)yDV#`>8j~^v6{wfmzWk^DB(>A09g?xGq7^67ySx~>o7yj4y zo6ETk`e7{s(8Hs6XGuWb^XZ27Lk>)1`@qjmE z+s%)BgaSy(96{p5;{1z#7#Sm}zb5lfgM24B&LFoSYLJ&8@un#@dYmB_G<@?~0%&(_ zy2M%-lRrF$!K}8F`4c>b%fJ9mNzU3zqGKJEC_tNv#kNr*I3yffqAt>DA~G})SST|* z3{JX*wn*EoZV;iC2sV^OZY4I_Mr#)hHkq}dY02kh9x`_KsJE_=!Rbu%7b-SRcSVu1 zz&?JiZ9b9HVYOUB36-u(V_LX7)?!%t|J=}Bl$2pOo}O+84}{4!DV>{{$=$H61Zqq+t0vojQzU)dAu7{q`nV*6?oCHojLJv?rp(oJ`Gt!lak%K;*8HZ?f{xw9MIW${5a)O-FfEV4;-#XN z1*pkeG-%n-_RY(&Eio&wSl%Bn@^4r=+SEu+&Rh2CjU>$0@2xMSI~SN!AGe0Yh?8E$O;E6|)nw}Tl(#F6%4&6~eA(kzrx6sC zuu^sDT8go_OJ(Kmoa(dh&Wtr4mXnseo?dDD`exyn@nheJ@~ee6Z!UgaSZ`g~m^Cu$ zXIL~hTIl+$=|npsb*Uh)E6zpeA-Tb{xYsxjZ=VZ7@m}7U5R#j<(Jdo7ds!Z{e)p2& zTk2XV>=pQYZjIjU6UVVqv!d?ue0`aO`&@1Ywy>Rfh{tO(Qnj%W4T^j>0?I#sjGZlR zzxD8`l)wsuGy2FJ5oQsAi9eG?Y@?r}kV7c^pa!&>_D`+mO!F_T&Upa%LH>hP_1mb{ z(hS*|iv|0YBQ`iOF%&d{(Q&m=ENt)ziVO@X(on`j$Poi-C z**_qjEOPtvFDTT|rkje~2_^iKDyx4}mAijCQ~^^FgN+==3c&lYSO_Y2qq&*#f&cSVS(Z2s9V6w3Xpl)IBJ zhNoSsu`*2F@v-kpSw}bmH8tD6QSA0GE06uW!<7H7j=)FSQ+^@B*YJ+jLDdB$pw zKzW*iwl9}G*`;{1{rP%N178k_Jhyr)R;KOCR#W_5WL5Lg9+k9+A(xWrbBWTi6dF6F zYENB<-56iLD%@ouMYBTg5ox`L7^{fjKUu`~g{ml{jy?uW8*j{MaC@F|iEQZ>>7XHV zyAC-^5pQayii$^|F8?Y0H%y#diO=_wyJhy|90~i3{XXHLvPY~UapzelV+$?!6vb4L z?)P~%X12Z5a$#4>_XTj9)|Eb357Ep#n&UDw8Cc88U)MziKMHMLxtr&}Vhz!pl84av z-Wu%jQ;A}>r?<*r)~JH6J>+;YTz1MKbCL1p-uowu>RFSbg(jDUz@Re{ofvvohtGaNAQy@$Z;mH9 zMX|4)vYxr(qyp&|0r&zIOCi>><)pkmxSt3@LgHu42-8Rehm=@Q?%$v_82=(LM zyQ{i)Vx;yB%}@mt&-#ZvcITM(M)=(_@=Fltl=6!sCIUoJ4pn zmCqI$GA~AOP#1QI`{xN}vg>KaFvHj0-jCpg+`V+Kx|}=e8mmPGY%vRC`N907u(5OX zBYSr}N4fsv>qDPx*B*T+dF(V;MvGXu*0ahW&iEa+s)bghQy)}xino#`#w22R5p0>b zZ9_tS@R}SRFgmdSU(_!<@}G{(?HH0Du&@>evF_%`s1<#>43}jKh>#TAg(NX6DuV%C zL3>gSA{63#nihV>(KmCE0o@L~v?U^2c0)5{4>K|9h``FfL0_)@$ir<2*3N|iPN!hj zyKX&q&_d!#MmNy;Xm!@-7qzKK&!lo$K9}dDR?ndy)Ly3KSLakcb5JOIFB{Bl~o)tCXSIbNQrCNgA&;(nV|r7-|e^R!r+& zTHctxA#ZTti?JolFZLFRxuAcRI+=MG?I7gzlvB5lsf{Fs9UY2= zSHt>yv7Tu4!}bKH2z@ov0CNJorY@jV>aci;D6^M6XEbwfpLK)Lohq_u zcn!x4$t@4kQn&~6CsF?WEhZ(;`I0Xtn8A;9?l$%=`QZNV%);2r(8DL%53DAGQyss8Y9@GwS*R>pfjSQx2poZ+xT*2bq#oj^ita50&b~%qJVv~3m9*l}i^D^AUC&kzNM-lX4eCI1q1Jtz5lMq_ zBpZH{F5GqvF;-$#_JX&?I6d>swVyp^IDL?YEA{C7-HRXZ9ag+NTwOzP$}0a6&4xE} zj0gunh`}Fd7^$>iuo3m32P`st5{hOs{`kyU5!p!<73QjPC(nb}* z*kZ=Cz+k|O9cylmC|-fWe((X@`$>cIFejKY89~-+U}|9(Gbvc;5HSKKyzprB3>l9r zgZv7ttBj6TgLCwI)Tb#}a(x}_RcrHdLUi<&QLysD^d>Pj5P`Z#h3yN%sXip|h%=H6 z?z1Gl&`np^808(r0aKuAr}P%|a22#U>{;p|>X&{)znjT&e}(U0>yrt2Y#X-UImw*p=@rA41jLV{gAHdJ~?0-?4;# zTsi84eny~p#c3^eF-G$$9V)Kt5CI0{TIqUHHr%|vT8lk1<6*)F94_=D^QEYU^JS6 zU#|)3QLok&9G0P?%4k6$x{;wZQu3j}IVw+b400op54h2Rn}b0C%YYb(gfK5sL?C>( z;uaQf?Gqrz1Y+RyFBI)TLgNd7#(&0>i);K?sHRXmZG+ zab5Gt*hpj@ce{3)UBIC_e!rUFt1~((ey;6x8`?zxShtt5 ztFG+3QxhJ>y8V=NgVyus6iF505b7siQ9ZiAtl9IrXjT7_ToHG3)8a0&PwiB1_Bn0DLkzU(-#3_Y1EFpto{G{=S$d7HN`PZ%eGb*FZoRkp4wu|Zu0C3i zi5`yV`9e1OhmF?)-)cW&ttB(x?-;f86t&BBOuvt`>d<5Ps>8GNF646ko@NZ)3tDJm zd?AW2w^kuzMS5_y&xx-_ED8?aAwv>s9G&5>}@hZ&?R0&CbWScWaEtS%@g zPmByA=ntv1{gyh9Q%VRB>1vSB%+pIoI!0yk!r+*H6fV%xR7OczGmAGKq~@`_O{BP&xo5iLt*Yp5 z69>jClVQi*hDQ{A&pmt|I)Gd|P|5f(lNzW}qL>-R+oAiJ))+Xkh&8Mz z@W1|yz)8Ap+Cz*oVrWTz`zY~etlcJ9wm~RZZ-HfVh^Dh}qRG#ws5)rQeG2nj?N!nD^J0M-ej?WI`e8s!<~S&o(rA)lrQ z!dP@N$OABiS!RH>321R%V80;X!cO@`u~qWHUL+AzgLCFznH^xAJM$uz|=7B8lhdxYR;PCBYQV9|}MKw}d z(Bre*w1>1cJ4iQZZ{q7?)H{|1EjWcUVKdb*&lBxx;H$D+wBNW&%6la;_jAnr zWmR8`adM3eIx7-ltRRN0{wB&7#GlvUr@9?cKQ)@L^S{9&SHSGn6Fbt#0U?Rdem4%IE+!|y{?jtRU5km|PDQq7m?&wyfCPA|5z0t__ z<#}W8iE5UTshmNK3ElKo{9hl|5aYC9-fVQan ztT?;eg<>zr@Rg%?bs^^JOP7EcX zwm-M?a~Xj(&h7`R2#{NNGL%H3>GmNa5STUd=6T`h7NGMYVOx!=8>6aEj5K1%3AO!3{llhJ z*jgZgcWH3)X#Fpj#_yr%BtM|>0<>6$E^Vp}SgHnRvy+?P z1bqgk2`ZecQNozYfFbR|Y+;H+!KkN1=tdC(Qp$e;0SSI_Jl*o45jy~sT-I8P zZQIM{`Pc{Xc~GM#23>W61|e`y_vV3Eoo@(4muT$(TF3{1fJkjb8PzL7_@cE)=ro!M zF&ciM`oY9_LJVgqZC}p$`nJYr4ia*HCmHxsQL~+Jgd|@zVN4=pd&R4ErrbGgSlHzg zd?b>0QF!jfsW6!6iSGHA2IN-CZsD$qIxo{5C3*Ypv%GFj#Czp*)l1p>H>sO>rF=D! zy%F%NG+9q9sOB2O*eH4Wp*G`KG_u_L@AtQXaH z+owy|RGu%xGh^&RLnmK;lvO%OrtKxEZ(7zUS!gxWyQ)?s-?s;J)vqh|a6gWk&c90* z4-2iFIDwtm=}RseJvvO17e%p1#gr&2i5e&SUTBS|y9FuS@#u7hmDKtz(4u-u(RlzJWadx4VoZmwg9sw-wh?k9AZ%JFXHp~9s)l% znlDi1sB%%x?OefK)o4g^%%_AuE57%c&3c4nbD`if^{#d@|a(aq4-~(Hs z^h*9H8snr8jnJI1Kr&NW@`f|-5+wab-m)dIJ!?<(iJe8_vnb~dY+j74963-RX zG@zc#1(s;s5mF!$&x8c~r~K*WR``oJEW#?8p8-u4o5+1Dpy~H9VH*mB_8ahXZ$x4+ z5$0{p5zT4QO`65g0+drAtQ$Q_FZnQ{guI4?4eN;6KvU@{TuO^xhxSS{lHV?aPSR(D zp@ral@Z&Krd$#7Fvxh7ci8VSgq=wwS2jQo+0&{YPk+HCB8lM2F{Uauoa~nvFGnkXp zixar+LHy1dwr#$Wy!`^p>H~8uR)EoO%QyR(L#mq?5kwH0>|ZRN0js4qS3Mk(B#F)C zP@0`43%q8SW;~^}CA*|^4`Z$RX9rJQS2N-V(u-NkyezrqO+vweNv7UYau8fJByP&l2MdF$DJ)*!4sout`7^=6IG?g1O4^VYTSv zpZ9$lDYcm=*Q}%$VjUUM9NK$AcEMs5?zEmx?ah2&FjC8@-a>^*N}N;ptw|7-z%viO zxF>WsD5Rw>?N4=+*MIm>dNTa>r+wKy7e#6$_KB7$dhHTSu2PJe_((lh{2ji?Hwo2Y z)S^}P@Ff3im_MmkB*L^k85?Sv=~i&u%+B|a;k5>t+GrADTWCZ6h` zXzMNDIQHBz=S12qHeXhM-Y+{#U~x&3C(=~U^@QrLXZ04go$R%H;xbHiF72IE-bP?E zG*3{59Wf;hY=?&`v?*ymc;^{`SYRQL+ccZ{rXhnvbwd03vX?dbXwU4=boNMheeqi*2TVl<1cbT?uK3(UF{$~1##>azxQOQLv zRDEzP-3RVcnV|#i8D*zw!^Vc!(O+d>eK#Oo(=IF%mCZ?2ojcCy)4o^uwCWD>rQzZD z2&hz<=}RJueXOoVEW0#_GJ#L{LB({YWqP5-*oz_k)6hS)5@Dg5#vj+ zgws#>LK)Q#w_oRHXo8sTS}mceY&Rsb&P?|A-nE^K0SX|T>0cQGH15nD+Af6ph!EfL4a?-%(gMiB_N`u0i-?)2_%F-Z|O%^2$k}Yf=qvWM! z72&^sBmQx=c?DPY_j@+W;(jH(35Ajd6LH{NR$2)KGAu{~EhmqV27zSqC~2@PNlsB3 zWDfu-48VveFE0(!l7ZUEOUuDg3eq5|89WB;H!LS74V>i)a=1!vK4|mtn}xx{k6)3(0xwfLFIVYAH%F?gGTOc02F zrE-u}mT{B?EI|Cs+JI7$aRlBFl=9DC)~2}Mvs5tL8_CBT%w(yyEqN}!(j z4=AN12Un8+`9>vB4*yYvEB#7*p$wh~jB#a!pZ9Gh#{j>A=PQHfZ>7cnG$9e-;Yb8{ zG6<5yeaVA#8NUi55ujFEU!W`!fh)Y3LIYPAi7Sl66-MF;Bf*Q2NKhvv5?2g~D}lt7 zkOdq-%Hm4M%Kn~tW3$&2gaX&-_#TbRwgH-32?wcg$6%BIQ9xVAfRQ+ri2ofh1J@bw z$MS$NgkuH3KEg3z&6c(Sq5$7CJ@5sT!3m)RPF0|A$^>@>WckCbuGl&Oy#v%`>qG&Z z;4~5bHbtPw2(Jgy0#w501t2G&nYNBmpk)Zhpo)aT3JP%i;zrzqfWrvCz7Kz_1Xm_h z7obP{N-o^*AmbEJGn>x@vILrU>lg*vnQ$xz^0p9;LHiTl3oJ`8*S>W>s5Cz09{*6l z51^{Ht^j_3MW>r5fUQ6%2q%Ca_y}5D89*gYNVaYR8#Cd@7W@^!91vr@bp`hxLIpr= zf#Tb|0BQ)Nh;RZbfDd`Xl?Qc1A_=v@=WPM0sx}MZY6;}*=NYIFlE4jIJ%Qx?yauWS ztSUk|AkJVh0pSe18;o;=Gu+DwHNn*hh$-P3s1%UbpJ(9hU~81Ea-dd7pfd?);O)TQ zPB;T^2l61F09cHT5Kwg0`Qf^!>d zDnM!$#BK~GvUwg0ZQFw=*q^upCjYP$QHfi#U9~oHe#C$wj*0&!0SNwfN#N_oP4R%m z4!g)`G~%xC>1>E1JogWpR&7pqm6oK%B*$pC?A|YMK7A0W4OeJA-u7zBe?P0_GyOiL zft52uv02==mmUkei-Jcf8_Zi z2KN{RYhtwHMCK3Lw-1V?dhoFZoeNh3!zk}XWgUl_4hPO;XR+;{fS z!1q%+4F~FkEWTLOsaaT|cD7rcYj5Ly%lo96E6{;jF&5^Z&IGneSS|PXDfS^80onF@o&=qVqt=5Dj*%rs)H59jF;bw zYt@LpQ}9Y_m!O_6jpij*CqZCK&OrU;d}LoY^R$cDDcu!!Bkz1+NO}0NV*iB?vBiZi zMUJSx=nPVF5J`73>Wj(U`zSf-$cdNd+>tA8rDi6js+B=9b>>2h$3;r6J43i*-QW}| zQLjrlZfRKxx-M~^=uAYq*mY(@^|oUKCGMm<4bCp{eVGf?jy6)FP-Hp zcX!2v@bjhfrAtM&G;7miEln;xrOWGSdB+I(kiPTu;K<;~bI-MvqMBqCU9WUKmn5Ny zN}*Ob-T;G3J~d3&M1*H*YC)eGZGh?QrZ?TTw4Oq2xlm zM|EdOS+nNg>t6i@Cf=0R+e~Cl?<2=`TEx9 z+bL41klE`NzWZR1Naji|TX&Xia;&_oc-tk58)lcnj*VSTP0%WtFQvnait;dNiRh?jEEJ5!!)?^(XCM!^G`2`A_8x zG6Y$Ji4%mvmhY2<3QV08{O>Kq40(%Q3pC$)c#mE)NL1r;>E<`<~oSJhr3Y|L2*ajnH%{nn8ll&*K_~DMw z&f76wLE5>08LzSkJf;9llSUAZ|7pCUaOv)F_JZ91*>;ut-FB6i|7E)3^Ywvomtd*D zk@yY7!7sm=uK4}S|1kg)()>{$Kp}7$@3!;(%TmI>;-6e2y#HU8k{kfnxDPOHezR!6 zF+k(;^8Y~E|JY<5XAo_o1RSOS5(!dU1F6KH0DK`wz$`cjdB&yjcH}2{=?lAmb+r`5Qrhf~v(uTah^l2aq{$W$hBy{v$SlSe#f?8AuR^mclQ> z2N}j_*=WIR?<9z)`q45!P62}*Wqd0DX0;K#QoUvL^uDo%TMId>zlq%^lwTC2g>FoBw4 z`3ibm!~6jYJD1I^i5Px|B|Q{2hv`1#8#?Do))31)f4JQsU)7={=4HvrZw_g#Jv|F= z%C+PVt0eHhddV}ZuXnL`5>|Lf>D-?^8HwN0^$$jDKiCTRG^t56Y2(H)f)xuStbO`;^b7tK2DkGio+BGr~*6qK)>IxbauLrA?rD zK2sZn^WD8lU%sVEMZmqvgu0(ovOt+cUM>5-H7q--lS(z$7Nrq24CC% znsMWdqqJ2BjP1D`88KcELn&atqU{RBu|E7d6|zAp5HePK!vMK!em%L}D3K`~pZE#d zz-rAZc`)C&Th@|tpul5SulQ^@=ey3^j8g2phlMEGIBqEW3*2ehXg}}Xz*KN;e@V|n znW>2snN{JL{k0q!`35EuMT;we&rT=13oBGGJ8(bQWyEr0C?m9SSMngjhd zYkG;CVG>1b+7bZKKQ$UK-M7mG{K^D#FvoL0Pb8_)JYo7t3SI z#^Iyoj|%`YSG7K3O?rCXbvcu9;G^sMJA3 zDE_fCDnY~lMGiDvsxG{S$4}#u3vFfp6AmaTiH@PN-l?3^)17IoVc*hKXgmfJ>pegB z{*t++b^gQibFAYvn6C44WsFCwFpND@@5?aF&CM8CJ3ulTQ4=r#WE7)(p)gf#5|{wv z7X&XR1L{NrmxfeFK-zJOnxb*wbszR6z%>7qx6TNlut|EXps7n!3Y6O^UZQk z{-hdj&=VBlb}$Wq6aT_ANJYGF0gq`2$N$7MvN%iwjDFy3`oH?)|0AX$>{d&_6mTSA zi?-h|4e)H@#`fQw3ix7xZh&b}(!jN|o$p_m<{!Jc{a5erUtk)1B?(P|$29T;PYe#z zY$aRAK_hVTJEj3a!du7%U>e}M+d2W52H(Z;I0#@GynO$Y3B)iBaM*2E1H`2v>0`_p zbrQ@oyxN#XX=W%$+mlI=zfy%CjVt$K#pMW3`~hWA&9v?7?>9jd#M1;lqX}mI;??ii z>z?jC`J?PMukPFTf#>B!8{Ef48`z7i@4o?=1q8Fbb3UDZxIA7LR-xt$C*RL0)yr-J zyAy9EC~g5cD5HotV%N%Z!@NaY-pR0p>Fr13%4-{s*6)JNy3Y8UoTU~&qavPtm5D2xt-=iiMisc9IT_2-;9v#&+yNu8WGxwQsGT$V4tzW?D@HO{<; zGF&BqF@LGYRDu{Ui1}^)#_fiDvYE<6&UR29EZww5G;D=B%91DJa=&W*%8Q(uii_f$ z_^-)Dff8H;%fRC|)*LaMYcm>W3ZiG~w zhnn}hb&99pzgwqU6vTYMVx2KnJZn|_)0o-9Vnh(bDu0pvts$e14meeG z_$=&JjVIY5qO#<}@$fe!R1(xWWLd26sx}X<9l{kAt!o0p3I=a%U+!D{V*0tAt!o@n z+{W%pQy6gjbOWzjZsTa-73)yhFgs(nJ|tID%iE2+8R9pMCtW0X{ay4I*|IF7)9Gc- zTWU&C7Vao@48O&DkL*>+V9a&4A{!Cz(~K-klomBB4GYDy@i#w2zV^vzf9iT^wP|k? z_niK&J*ulD%rXCux9@v~`Bw=Ee|`=Q*FT|`$l z+E1y;YRvJTdWF>gEF;`WJBtoL zNGnKuBTDm_WmEKmDm$+(>0RLP@psFWzRkz_&|~1{o}R^XfQ7%lyWu*&#Dulp?d6Zl zE=|_AMf6q-ja{Ra1+?IV+_cd~V_%HLbg~7+O>WAkwadR@NY(Qdq393wpi+NB2M<@a zAbi`)e&C4RbBMM>>Nf2HcPPGPzz4g^jgSukKGkHiCWHWEz^XjI5V=>t2&C-*Mb!V9 zlo;48x)aufX6_;?-Xd{hrrviA>cKJDhOM=1Ya-pnOWS^MQ{-XfLnEZvq0Q&Uiv+yH z*8+a=5=WC`rjG2trj88&c=85|JP7y@EWyo709-nTeTr~uy@Y-F`^}$2wdkj@pv-D* zP5B4dJI{dD`LCeS1TAPZBg1F3zgRdqHM}jL&%ZBl5w;lE0ZYS2(WLlrJlYztcRft2 zpxTbfXZ+oEkdE8wBC_3fiGzSq-ULbTfQ-(BtdgXXY`=d40f|-%vU1EyWbXlypU(K| zG$KBTOgt!p<1Kb|Tt5JL|AAOk_Mbrzy+y}^%J8sZL?a@UJz6=&$<+b2$enO=2q8i>XXfy-{D2wO)rNM4J26)c|DUX67YAp8YD+%Ybrmc2iZg z^~~U1089L}h)H2b&>u?xW(fTUvbn7QC8$9Mq~)MSihHacPp5-Tqdy>Aj?x&ucO5V$ zs&o+XDZz*xkh3s*mQ)!S>MKl1yqk>8-eH6cR*^=T4b#{%J(VW8mr$*R$}{bPDe9N$ zVc$@T!^Rs54OOYZ2`5e7dPRqk5@S8zVI-i8W_4d8ZBoALq&&WiAWRc5mxga=jX5bgK!SiFEY%)GGhpr*gyNhhS@30(A?-SG@_9m zRA4AG_yMdwngg8)3^Nq1l7+h!NDe{B#6q>`YKwoQ>D&;~>fs0k88 zSORX;#SR8UfPKR+BIA&zsCXG?@^c*A@GRd<9)1d+)uqBO1`0O~9(=5j@|2nuR>he%JH~=s$R9Xz`2|(QFoz%M$Kq5)@%b(E{rB zB&tYa#{r`=D77R>%YBB3R6mY8@FGR;^<(n{ zRF;(%530x1gAK`gjz#6G=Oj&NEkt(;t=h^=5TwM#TN?H)o0}CCts58mUkQM(!M=|~ zzK7@zEPUm!1wsr!EOtGFv7C;k>YD@r|H#xsayt5Z|5A<#S;eg3fUOwtI(P%}H3ok> zKZbsGBpOalhE~<^fyWgSQq2+igUWw~dWy|S7p{Y}<78NmJCMnU|1gO09B;qt#fqT1 zucgW`w|T#CUA%4{@nr5>C+B zTqnV5gqegRLB8ORNqFTqEt%g2c3eLKkh@0*6Ne%=0&#+DykR{4XWVfe886~qvkMpK zj}D`j;>eC6!X3-;LmLyv^|NmlO?5xewHZB&v8A@9%%j|Em9O?DsQ5=f#NG1~l|30&%*K<865pL26o#{LZCFr~ zgNY9g4Dm3Ab)=pg9sDv~zN(>cvfC{Aey8rO;|}`g74OAg=uLh+@I9)$GHR6&<{>@= zU&nn$4zfWfhXzB!9l#>1gM?pFth*ggZ2HFOD$ zLB6 zkwXcN#tE&DDXH8lJ=ZBP_l0_RC?{zb?DN}i0q}{d|9W`0zcYE)psI}jnxzn+QU`21 z4E&o@1ET8o!(-+Ykhe4Q7Y7&|d z^?C$%fbx|X0TbaZVyTuMweDAA1t6EF+3%7@8@Y!v^zho?nzY-Ear_gBy-n165<3eX zdZ>;ww4{cOd$jcAyrB|LKN#J;EcqS~OQpM(azy24SLaRn*Kj%FaZpX#2+akWUgm7`bf&@n{1xVG;iRUq6tsorpz7m|t`U zIVQ@$m8QX^uLl+#Q)2}3g9#$b^3A%fhl$G{*cv#E9Jr|6K@$Bcmrl!#`M8Vu!;MAgFmPXuu&`}|9B4=>}Plr9rP^$J9^ z2(^pN=Y?1?!zL9n&FZ&Y3D;O`y>U{5dGKHAJmZSr3>fJ=CgRyQlJk+kic`BErDNQ2 z$)6x%yloVH7Jd8f=(ril?T-bCRdM4QYv|Tr3<=p3VR0!F{lC7WZt-blww% zS|!&3s|?*X7r{q)w3i0Xg*2p6rG2bI{pjlGkhm1Aj@CXVAZg1(jd`&Rdt!P zK(?On6E1!aEV#{9(Otb#DpAJM|EX4X$F)9Nmb?RRQIkYJMdxhmnH3rr*cDW3>l7%L zc*=2H&R8@0o+>?_eLz`>uc?1UQ|bIldM%9lqsK+4&VzF2c0)g~wORb%&(D}C4NS;c zRDKMf89vX@(5WB+4+sZtVf-)x$_mAGrI@T7)Z5|T~u?wd0+GYsW3!$2qE5C2a{HJ* zd*NcYa%NIXgv}1cE`3Yz1b!M&&+V`fPqS+Ta4a>xcOq{R+E#32w50D9j zAjKDh5*NXIxYK#z>{bC6NN0mNF@vUpI3n=nW~FY4i9V^O<<)CxIb%Q`R!IJZBd_@s z;SYy#x)n!-yhIWo{EswpCp?<1O3iBJWUQSJWxqF8(tMmid$uzF$Fz%<{JRbT^~ae$ zT9Y`>@HGc;cvxU$ZiQt-=4!7~t#uMxgKpP1!c^#SZYX=l;?vumC}1eMIdu8RZ6?i> zpb_*8qnX9AmoBi^Ou{xxCe56-z9Hd3!A1HCBKiX(%%?=H_CVPIgrg?5i=0LVfI5IB zBIIN-W&SKt8ck|&6IT)?cLgT7Z+<&3B)hP!+clBJ_L)EDs3A3_d`T)|{9E-N?cO5| zIAKL2aVI%%s#Tffma14DWf4ten!CipJ9d8{(YtgUbO@jFd!hbC#&7RG1{kzd;=^v& z#tF_KJd%R0)}@sBbM<7Ylz2v%c+inV3qM5ODVYqovhLfp?}l>r1>CP^VXvxPKXD>U zf==X(A0;F2)9j72`NZ4R)0O;TU$YxjLI6@qWEX^#S7J7n%i3JZeBIM2vH{nMnTkoa zmP-r|E)JJdfZpY34XSXW7Dc>aODah=(ZtqM z263tfJ{tRW|BM)n%~H9mc8uwz#eGMKiFUmd!VyV=tQU97txfh{ps4gSI=l1^ z*W^~0{UntX5Ny|(6{Jm)opw{mO0ee3^PY+^XEu19yU*a$Ky%;5vRRm7I#QLj-|!9n zesx$0+`q|>iZm0=b6M=I*oRA$9^W`=xt{$PQCf&ofRE#_EwEaoISuob!VQa+N0g3F z>yVC`*j7-rx9P;}Jx=y4K<;UG8T@Qo#Q~ugWJNRNDOE@Gp?U$OgdG z`YmCkUScJj!5lZpKBD|+5Jk>KJ7p7p3#m>`OqJt=m;# zTAJo``)Xg_D^o?YxxE(}S7iBY(Sq}aYg%8l))0?I9_QUSkdm{!@>ySSmh}B0i?6ws z{0|8I8*sbck+VB+qe|rmg7lOcj5NyBslqR+P+4#wd>WF%gQLFV9j{i;B*~SosOj3< zwU1QdLj01*xb$iz)mdi((hfT~5gUrqb^Lf8&fNPpi*+2`@5k1mdIrkF1c~XLPN%=T zWm~sj&hp>-W!TW^KZ5wbscz!J;b;Vi@kfL#is?kDHe{W^r_8l#2=X z$;0>im~!w?G`FTrx(^QLJZ^f^SmJ4sO-5ww+VPnC+KZ<%2j}=bgA*-<7*7UXp?@&- zw0Itk?s)aG?Q$dj%{iCK;i{3(*Kg`w&a1MK|5AGM)hOw-MfEWux*BK*ub2M?+vlSvx(OS@`=B)(jP0;04sG$PoK7#6&UAGwQiuLhLxi`aK|A2@x|0_gv3gj8)2s!%Kac~=(ROUPY zJPoe0=0B`V`C45JF7M;~6NJVvVqS<)RqxF~vDSY;;Gc~GAZ3V{#kMBuzlS(8I^;2K zB*73bEl;mA-Y2{{kpdk`8Ucf{2S^NaardE$h{MDD9`NW8OUf2T>SKk?zHMuz^I`SK z$ZpM>$0m-C4Yvd-;w4m~+U=@?BnVFisFJ-1Hg%p8U;`}HGLvV56;4(xbRAW_VQVay zSFn;r39zw7$0dj-&#xL>cp_=h(ii;_c~N)|-PJ?`PJC9k(`k=q;^HTshoIx1@5vZ= zNT0L+HfrIn_2TuA+*|epCsgUmH4cAPT40QJOKqevuJo*f>K3R3C#c)n^}EKmqv1GF z5NFqVu=T_l+_9iaj)?8--f?VpkN?l?9&X%kI5EI-frF_)VvmcsOj%#25FHL8pJ-{= zEE-OPE7Y>d;qleW>M&FrpghA}Bx$Q7H$1~fmZ^N&t$^o%)3{jdch7K<$XZBxhG?9~ z@?6Q3O!^CWYf$X>!}N{ac;YN$fv&;L3<%lls%Q8TmX9v&Ef7wlDH#)+!5=kYl_8Q(aHcOiv_rKWDirtMWSg+F)A3Cr*B#DISaz~@to-aM zVh$SvDnP?|z=$N|@dG*bIwLf@25KE`GYI2Bv<9^jtiV+g0@!ud;aC0QTGv8}8V;$^8{7k9_z|+Z zy#ccY(6W4!Gi@w`n-^ls8XEY*&Kl+S&8ISLkG`G&`##>%O#RSH68PVHD5B=c1*29~ z@>Qyb1x`KA`!sq%^XT-U#H^ypRO8EPmNZSrU`7FX#BAA=a;YIR!?NcSL_VvQQ>w`F zXW6^(-h3>rGP2h;w&JI#;$V5NcB6>(y(TkH#}OJpU*FoRy6|mXb_#dPz}=%sc;=(($k;XW-sb8%2nRUyk^8z z?A1zEdQvn@l660{hUe`p{K>Opt$BFg@2qGQb}QsaR^xoQ?>7b2F_1pq&cnHdZP+?B zysn#i%PBKA>)v0;X*z|iQWGQvUXQBgmqkZey(g;ewDm5>Ip11#1f}Ut+elCeH>UQ5 zvp)MUlXjN=@z+zAredyp9XegYc-PbF$V!Nn=p*=DZ&|v=J8@U2s!tr#@kkI1z?1A! zhc9#!$drvWa0l}An5Y+J(Nv~~TP^aROE0M zI;{IK6^R%=eyXTC2{ukQTxl1{xE2*Ntk5wS50skq?a#YiX=CGDqvB=>g@it!O6Spi zF(+?pB>3-d=5vi%X}K7v`1$p^bCs)g@tdfO;v=Ie`%B^h=F#@*`90L7N7^})Ct90L zT31{PK5J8|5frhlNR#{M!@r@+G_G)Sy{0^2VOqeNbvO&nc`KZs3&-fY8kstQ?e`p? z!fe?)8B(fu?!3pL0#{&hHh%sYjzfNR9D^Jn8^5ml6NkVz(^3Tw4xdN+ zkpZDeKYs&{ZAQCwIkjM@wnBMlpaN*uZT(-*_#I&D0w#BmoD8{#t|#MR^h3@d_yuA$ z-oo3I$rdr}T>%3vGwehR44(t+;X#V2iD*AI9TcDi?A*!+H09VsINcbsnSW-OcEKep zxk1}kH+xu)==>zrs-ewlURN`ywIu+xwwRs0ru6L)>yEx_3}1ht^a0uTyw}plxyo*z@svQc@x99IIOfX$Fh@IiQCBH4nnKA_JCU(E zDNn9>K`)8P`scNyL8fHh^}<2~1Qt3l3v&j!Rvc>u0#}Obmg)_!>pzBtCb+a;&b8xv zc{0~_DxbkPIwL)Wo_DjKXxI!@aO*Hf3d=&$!z*&(8`T| zly6|R>P>F|HN7u~3QD<7<06oNHAMLq5DKmj#&Fbfx5f^}0)cl}0y$t;X1^JBwU)hB zoBJ-J2FUFeC@-_CN^$7?2=!AjxdFuAw}4M=8E3Phnh)XJ?M8-MGa3tJ);zDnX)sEC zoo<;rC-7h$B6TC40TM%o)SMyj9+bM1epPu!%Q3?4|x!Ku7$;o25`;OTtKfV0v_+(7DKPQufAEi6%@Hx162tQpHE z>SXZ;^a=X+|9}+^ID6qd|HSI}|yChbQjwY4P1iOdB(a%)WE$Z8uLAz7I};#}PiLu7U9VP`ls|*L+cO3L`N)G-yv0 z9>`vQIbdn3U*I{ldb*k-Ue9gywnY1muAXx(7hAhtpPG!qlXHJgkKo(WrpOZW<7?bk z9^TpK&P|+%Wx1+R`?^`-`&^gv%yT#qG$|J6924R_$2~O0jYsSsFxJ-k+-ox!|IQMh zHmBb-9QZ&W+RgJgVQsQ zgbB6LmMd@MFW0sY?ing)vD^n+3Otj@IvS(aCeM_2(W>o<&I#8<`H!Znv^CT^Q6{9f zEM#PeYka5msTbn9X)ivLcQhF1=umvC2mZ;bEwXS-sk4uEtccOE9nh}&=@ zj};RcT;&E-p!*F>`hOSyaNL^?=;6GA6X`i!t_$NPOAS7lw^{PJ*3{_U!MRr8{t?mPt~eUras3iim{)T|^Ha z!mQ5{mlcM*#!CvO{*iRJ@lgR)qKDi9sXYbbn3;k7fmzx_RyO}*uW*0t%{902OWs#? ziS8)p;ht=ljPmNs1@Tp~`ilqWzp;05-6Hpa`CPGYgz6h8PjWXBbDJ47m@Nhuuo(g2 z{s#c)$iK^DF!VDrFf223n6pJr2UM_yqot8ZTrqjd*YH_C${Iw|JTt?ovG_g&Wf5>V z>cN+C9WuOgbHm)Mr3b<9h9SNC$Lfj#q$N9bAx?ngDqzCXMDGj9bMg-EAf6GqCL9-c zh(y(nl&657@?d5zO!eCPoE0Fw*iPF`G2crWETnw81??Amcv|yPUU^Z)>1mP0@_2-k z{QU#Gq7T14VB=AAdZ$}8Uz9s+U8__($WzFEt}MQ+`;*p}J&aP39x&wF;P!hQsnb-? zW6aq`qHD}TeH$j8b+Q&;chJ7rbS7qm=wv_DYrGd!2U;^zQtK3Oy>HCum}x%^Bw`W# zdONTD*0V4TN%Y|bvuFeHwSDw&=&~vlX&;55($QV(fz?8ff)v^jGK7v0a2FJooaV0; z7zGN>cG_K^&HVGsfxv03LTxKuj<)wY6G%xVg^Q);vT?y@{w)0 zh2qxgnMZYTH))@ijGBEs;qzSfu(z>ulABNSeS9CIBd3iXee|B2GrX4E!<{0TXGv~) z>f7>{`5#w$#(kgR=dmi&T~{`Jd_0)O-|+H14T@F`ExdS|k<|U+a6Sn|ISDw^t-~V* zQ%Nv<(kt-0js{hSTHdBN8KfIBiN;J;2<>gHA;X}tIpvDiBlM3KuJMFThPV814e~6PF6KxyE z6RE|gBeol%=d(D%AXr}3>N9$v0wO(;Y0gG>*QLf83y(_`-hJIuWeCDOgRmw}YYO z8cS6CETMLnxa}wY04E2s(fu*a{AAIhc-7ofsnjf2Z1A2P5B{EV2o<+zlEI=PotH3YsX+~2Yl^mCB4X$kL-L;d=k0S-4531bq<>v)u1Ciq_5IRVA`@T zW^jZb98ZDjABf;1Y8SKY*%$!wKvHV@C35vhRN~wqRiRFxHA_y`L|#rIhZ)r_$_H z>g??3VkFKBv6q9u;aelc4d;YXNK%r0bH-y8rA! zETz`g%(RoLR|yd4{)>hZ!~c!hZDD9AH-G-K>J`+y3Ij$iz(oOSGXLwkSL7c&l)uP4 zFzXJs&~v-+6}3g<^{em|waMl6tMGN>GSGAqO9{X!2_Ema@5s%gt^a@j-A;bScE^9X zBj$I%-{XHe+Bomm6;MC@ey?BW|NaD9f8p={|9R#A{K?k!{}*Tdr+aMujqMWSjhFeS ziEh2=ziCsp$s+-M3JP@cHc=v$JW>K!;IB^ro#CHUk{H4iOieNP!V*Y={{aE$uPY=0 z?IHF{mP6_n=_CY!9x7{6pj}P`5RClF1!w2GgP#C}iC{031GG7rAi&(!^rh9GRdfd!yn6 zb8EmRCbrGKCQyOl+Gc6SNW|G`n#(&`L}d!sY?!&uue9 zg4D1Fpb6U)nHbJUEJO+O8!Tt!HcKX!Gjf|J6GLq&w#}4@<&4CF7dEZ~54g>iiKRf@ z=F7w|T5e=?H_im_cAGI1!x@PQ5N$jphR71N&6$Z|vfNnXu-5_3NYplKCWgBbwN0Ce z;jYBEYFl@a0!LdthvBY7ZL?)!SRyy#l^gfOnAVnMV7Mz$zxXnN#h`xiWrBqd^@}eP zGd_SFZ2mpKRw)Jy0dw^Gng_9C$8_3Gy=rmkZgEikZB%U3C5bXcio;DdOBeGwY~lE( z1!mjQYPV%mYQh2q{><10a;E%-WKsL+hqUoG0#&q5sS2Bbg8V&_P>}|(SNg|(6Z7hK z{M@KsaY>9I51}2mI==cA^W~p)B!qLIK&&qJ)_pZyW2b#7HKzWhe?BlMApIG%idbY{ zNBgT%cCO=WYb0^T^Mw7=xx!R|{Nu4vyWoWL=_)a7L{j<>M=6+B@x~5$;a;cZt?SXr z)3H|xjKC4Y;f51Q^^JAU@lP{rd`{r}Z2RLaCw;w1Zs*dF@}&|POCzmMUj^?MROG&9 zQ7#r5P@1>&o1Kff%QVfMg#Xs^Qsl#$oco>Mq|5I9INjb|G%0Xvit&pij{n<0q4!U9 z!j7bhX!>VdEG4wv^}2QL^gePA}h6IXz8CeYhX5SE}b!CbikECf-yc zW@OJQ{I-odQ3QNKfEqEFzL1Ga`?&lk&D$JA+t*t^;--+nCem5N(N7r?RI~5)TBWSJ z2VNlzvyYolSNY-~AAISZpTD%E(vYZj@eN+64#n8_*8IL%O^q2=JGFz1Zm=Ih(!z%H z!Wt}HF>0OqxARQ14k3dwuLfaxdfdDuVtcsVr;M1cF6A+}oZ|2v3;W(^*%HO0!M-3~VLxfYS@NvUKb<_Q z>tu$|oUVBA`GUQoxkQkYyo~h4hX#i8(lHU|=hD{9I-@_^KQN>4F<<(AZ1L6=))Zs5 z15D4X596<&mo%1-(MGjPFCCy~U3}f6@YzM=)%Cu^=A`|NgcnR+LDZw5uw=7;cM#xH z*=Yw=$17q`?;)&t3~kR@IBQ@ATqV!Wspp=E*TM1x;Odc3mhouM8@mE^0*1obXlY6p zm==={x8>-~@5y(27iC|TWnK+UK3Qho?G|*t;GU_+*|b}S2izIHO2!UbG%{2%uX>=p z>x(O^a|s%g*_iIIm%O8fl~Iup@M~6>9paL|MH@eHq>(E3gmXiBj~8r4?`NKDs4t-r z|A{`6hRHN@hovcd9pQe`4s(}%^ZK3BO)|Ge*G0d_#jerZ@_prJ=Xw2nOHlWci*lLI zdz*!z$>I8i+jR?7u-y@bc?3sX`W03Y3}!11!g+2RfF|+3#Wqv_2l3{u^x%4s3LFkkUB}5o zH%`FG$QFSLJ4m4QIt!r@YV+u@t_~F%6$x%5IO;jz<_#`F`|G_!tK^_n96jAv*>P5t zuQ;hKp;a#^Uk_cqyH0qg)^->t&vqD(RCx(qFl>n?WLqy@R*AT&Y{bOiw^{v)I^UKC zF&eS##eW$a#--S)v4QCgj7ETn5~OGl_JLRLMHaqwCnhNmWZY6~#19L+Kb4wA`7Spk z{}BA?(~quawc$*) zQL1o9F*nNO4$kuAWSZK|GaJ~6lX>IS6vF5tO zWB0DE3s)}5Tzh7<(k)ta&ZRqB@zxub>5_r$vj--I*JlZ)M;78vmFZU%+^%Z6sM`To zX?gOtF13ziV)}hIfL;Yy$YN>`x6s*)L zL%28(v{D=3-s?VF&zl%npEJR^CNM^PB=8N4t=uEj@7&!c2>J*VmH_+jUImW#PR*-4 zUJO%MsYz(ev450%$_m!8FOobdkCMkqBXA$S9v#0ap6WEqbHF8QRaJ~yn{6fL(qnw~ zxN|+{8zcJp=uxG3ueA2Fi9#upLEai4T7g;HwNi{6+i2*;V3)?g%^QU6`n6F26=38W&DNd$%P;@fy8cpN-+=Dz=JC zofFLj5%*+VmzVnCLf4=B9cpxbdm{&N|9zgv#Hab|Gxq1uwR|#P;v+s4%a7a1-cE`e z{7_ns>~q!Zy|Z}G{A*XB=c)Nus$tf5e%_pyyMFWLUgeavmfGTXtq?&T6tphaF3=6) zFU22VC_4%4#DSZ)IenRm|EW=xn(p$iYNG2wFfe(o!m`#?3((}4$!i%POO2z^?Ed@& znM{=UO&sm(2ox_G5wd!KFT{@I4!O~3&KQ3=;whEzy()d|M2oO{40r_W_%6H`BRA`Q zkTrMsp{6GYXBT5sJGGy@?rO~RL^wkb)*!5;_6=X+iru@yA;Nyut~OOXRUjst@n!rp zCPkqg)+PQtEbxMng5z?0@YuSO&|70d^(i}jn}K@kXl5L*aB4|LJ5e#gFRTmVLvGR3 zL&qh_6|N14gGKK6T4 z(_{4KXHf^A3l^eoE@wLfp-OKby%d@84A7(#2eJw}_ zBC8^|`{$3h-Q9cYd$gG|fkclePP|K0h>i?b1~qXj!5EHbtWf4;v5{f|>=O=Msdj7I z^u3!$)v6rXS>4If3{DD5YMku}r@JFEek#o1Ox&HE6Q!DIh}%*G2?caY)*p{&-Txe! zrXd-o_;vlwn%w&2t*bAdde0scS?BLRmOxCeOLsFiRYEPcq@+XrRQF|>>g014a$VAU z2(uv66EUgvf4Hw0=HZ=Mq_}vmxER>R051Xn!8f6nruu)?+ooDT!J+{?9FUT1c@b0S zAXT{GDq$lU0YEsP5iqCFRrn$~hsC-ID?EHWtXI+|oz?KLzmvoD1q9L0A$Y1oldibG z6OtiKLYbe*xp&huxTIY=4rM@4e)=v-QhmVGjfu z3#11hFYT|;i%H-P7SS;do%NvCD2*3VeiwOcz0={sL)RqvPu*4(i!F*Tmm{C2zdmTy z67-YwoV?Jtqopl#>6c2T&JFx{_f$Oj-8=om$1B|s<#*Xi-pL-2U}ST~1=;atBABok z;LuhRrK}?e4ZQ5=8M;OkN+83-U>g(Hok$F8Otg{8@jo`Fnt3!5FS&_wSe%Gf164Y@ zCmAoar@1>GwyqM(6DAjv`CW(Nz41)nmoE})VN=(7q_2ryNHu0%8KR-BZa;IMg858f zvRn8lN8W6yGeza)+dGDm`Y&~;+?F~sLog)jw2sMoA#t#=bQ@40_v zm8zCp7;isP#l}0dRp)xjvy(?nWWV>riX^_ykPG%$6+oCER>I})jsTIOosPBg4RC@M zjR{_U8x?A=b14t!#Q=tSVZ1`2COu(c+CrN1$vG3`Wl?eV*GuN^)9e|qeW4!%0Xwj?qXwn%aoF2LD7$#USge%m-~S$pV&7OhNdmEW%2f z0-4|3*~-uw*J2D}i#if5_zpgK&H3zB&sm`qz57&zwaK1nE`yuq*FWeg&wMP8Jvvp# zXDxhaYP{s+(;IY;EIu#Sp5s~RC~M272qnmOCggC;tB2|oh@NZ;EAob-7LDhK7{zHv3rW_kS_$_CToZ2()2tWxjp3fBfLqew4iT8L+QC=3ED?rdtfKin1y0OWu+^D|hgzoI%4mSJ3=KTttcZcj^h(#rwE9m(XM#eOdsO;m^H^ZO>pfbb)FL1eOaE z^W3Su|JD?7eA6vfcJ9B{76!(D_wg9k#n-gerqGP3gp8mF1HrN-KHAQ7cm?XoF%kB^ zUjm3S@AS#vW1yNC5~Xl;&M0k1!=cw)Z%G3GFv{|t`qQ59^Bjr|(sGG4`OR(haG79* z6;9&S>2v%y+$GfZ2b}Rya_;J_61iLN(3z9^+$50g+U<*Pn#5&UJLRQvR?NMn;_rFS zzGBf<ktC*evYrIOLLV;%WH9Sts_U3~OG;c6hZTntf~TIuzx{K- zm>=z&a`+jJq$UYMJEbNO`*)mQ1DrX(+T$^~!p41901TU_|JbIBBs6V`GUJB zJud93#V}?O%%oHBJenc;KCJ1BcD1kW^*0L%I1hGF~v4;BT4b(dL81huoocpOh;JSK@5hG@?V~cp$CoYAF z@dF6H8>ODu*uF886wX{d*T2<~{T1YIR0i}=Xf!Asqh3M>XzcfCMEfZN6Gwj^M&pd5 z5h_hMGTla?AZd!x9}f|31EVIB*bBh=VX9l4gnMKTLlKN)bv6Dm&+VM2g*O_^b4HZG z(z%omXNleb{(-O-k>8*^#>JFZb37AShLbFVqkrhk*1za@QDKJ?k4`1qebtLOBO9@T!6CW#osa1WeHPf1~=6&7Mh8I$F`*UZw z5$}xs-)cm>ru(*n=)<#x;9@{wWkvq(3-IBNU$)e1aS4o+8F<;;_@iA%C7il#XLgItacP7AOzYoNoMLA0RB*}=T$YqRXP zj1z<@B7G`>nIwibd6!=}n?N=b%HP^W2)~_qmC;U`~kD}onDw1$L z6jyu)GGSI3E+2ko2(TX(bWlu;tb!>lcMXj|tL|f3M{lu9&qN16g$w==w%Ovi-Ngc4 zz7YrZAI8RkgTYkBlx`$PcM4yeNKQ7~6zH0{7Y#+N@cRVEu<9LuLWb{hVI}lg+>Zl! z^Jb<7kq!xoT!j<-vw^zfp<+(c$L@&J6Ew#pkTthoF;2zuFGbpwazte3R_8AJ=kpK= z!89J?7o0`d+=S!7%~KbQl=E4~?iYs?*m7w<8zT>XuQNSSTO2d&x3-!vYgx&wIQk+h z%Bdp!T}6VIkL=)(8@Uw|U>V`oPGN_b#{LlG+3JklxRhH98zGidi)OR-`S{9 z)?b#5EhVDRScBVLq%=nGtfRGgS5>Lk8p8{KL+J-+s^3KUswG)5LNx`#F++i%|4s+# z+0ITqu!|>{xlb$9!5iD95OCHhg^lzJ$r8szTabsV;$@?Y^+9n;7Hqt+amNu+qte+# zBb>EJKa_6htoOk3t9MM7YZ=#SuI`!CZYc~oE%ZF&o3>L?YLY|EQrPR!s}k?7 zkq+vX&VSE1&u;(0vjPEANGCd z(SEwY;w5$5`>R}mqkDo@+CK~%=01Fw&N{?$<5M6VsbDhW`2uQ}_&(A!f4Xlf<65Z5 zj!s9BEA7r&XAQJCbkV9JCo7Wx*Qx* zsK+R$+g-4*@VLG>4zj+U!NR71I)TVM;yIx_x^}*ked*DU7-kCZURR<4eBbkvQ5OkW zr3Nlsb6w9JRapNwp zyPt;`oD1xDY1bNOb{}0YR4VS4Mv{he6!u=(j@0F@+J2S$PDv5qUWw&zoEE&Uz! ztfutWn=KAR`W`CyaR0&Dl_#8>4vWpar_%ajFMc&M=e{MZsbN}P`RX1QWN#ct*w(M#ZtOv?saU z-S4LO-Ep7eVCNZD%kK3|cWxqOmK)ZX=gpU&>>rQM?U5NQK+>|AKczi)CP}UIBPi6( zEAB7sKcuhpR&+0lS3YL6oc<|0V;!$|qpG|1z6n)!0%REDJsZ&~qYK4!)mjT;vi`F7 zZ1KIir#0}W@7Qs#_MajUH*Z9+j7ggM-Cs5hA*OLU>v-VUd7CRxH6~a^i@(-MOpwG1F-%>D%OA=0bxuBilS>}j?>|UHWcjXVNZ}G+mAEb_+}&}(aFmAoFHkQrz;Iz zB;v$Hdi?8}Zuf@6VlJ(b_eli1L75PQD~ST4z&q`=3!NYfjs&pjJ$W#YI`R;K8J5Ky zvOHozA3>{QWX!P-Q=TyGT*|_$jm)moOtgLQIFpY0`Ni@$}+(R3kT_edKQEY05} zw>M>yD%4eFDiSY1H>7UdRjpM!$L^th^Ex%^j7bOO)kE@Q9$Me#IJw76@#bFY-a0u| zewsmoLbv3|+>Kd(Jo%rT>)Ok<19tN_-dD^IIa%9wov=IB`uMA$98uuIu~ zMU%|QNofI4eFp)&#~`J4+IJaX_yyX4tO*U_Fg<&{R{%=YEdo!xm9yYBXZoFzEPluE zh3?C=?DezEO;z8b-EU_x`!=FwzP>>GuvLMR6nu(jH$moRglR7*Qpi$Vd$Hv268SaK z8}Xj;=|sMp+)WWL_tEBHnf;E>#hDFgm;#DQ80bc>3Zl8}4TKM6Q|p&_c)^R=o$;Mr z&*EjmzQTA_c=r`@rssWdYI9&)ZOvJw7Ylk-_0sWPnS=J-oc9G588@~3CR=G9D%eiW zyQGg$AA9uD{Q*SBK&c|3b`iYS?22Jo2L^+H;AIM4!4bie;swbxON8L58-!5 zDXgsKtjaVO?|x)D6sKfTv`~oS9(k}s**X6KSF=_p6EW_?<=T$9Yy!@=rh%sP_x)^m zNvuUZVwHV5uZBQ%CWK?Wn+$0MP4Se2!$l#OeTkCD_l;RW*9rxUvdWT#SvqBx^P>a~?w zolRvD=oebO31q{L&hK4T4m3BSj4~4#H{eZKvqZfVEjeX)Chr=~I{vLhDka%>2~Q+5 z)+4mc8OT}J5+{eU75m9UGNY3Z7aVLqsI8I`n+mJ?I(fqvc5pg5=AXTXMWJpj3OfPq zi~kp(eIQi@PSR!NJC08FJ6N(2i5 z-a2jLe`yI33|?Cd^Ib|(1grQBj==Lt0u?Pu4k;-S2~2}9|FAdy{c>XT6bmKR_5cJrt z{nK8y`u%SNJr)%Yx^o+5Edem=*suR6M+#8!;40v#0C4;ku3c&y0}p`8(%|c#$anCG zH0BH37Z{n0*!H!5qS`rRej)H>!1=%tl#$&&7XrTy?k5Yr0B9KV4FHjmzX0$c9*+4C zKx2`=aPmOU9`hmg8|FeW%!OhAJTHb(g#UNU`tNeAD6GkDbu|V|jw$oofaC&(1 zc>qAZSo_65elhrQfCu0B4T$xkekq&*gzqnfGk_n*`h(bO0M-~7#O5^sYWz##4E)he zg){7H!2ppqP6PPy4M-~H1^}Op!kR7i0N|0a&_V10c+hQ`dVnFuZ22~B0OSr(+amP< zCB6aC!CV7g;RebCdjOsqcv~CS0K7Sdc6ajtJT*26jy(-?x2+}sL^ldxmN!lVFzt;* zCgwB@{2PVM=4~8-{sCD2jT15N5(U8gTSwrI*kQMECb$b2TbsYbz_>vL($*2?E?ZB9 zdC{nCJzOk|8@#NoGckW*14xZA3D9fU*cJ9KU|`%>P2fL3){xq102jVf52_RiVp|}P zSqxN)fOf|t4Ay#63%V4B(eF_Gr5DyA1cZwIRjo+94$co^R%`@TVw+L1&SJS1@VQU~`NbET9G=ev|XY^9l=R zhCtwHI~9+Y9moJCft!jqT#>fQW6mwwntM!)X)d!wKOT3^*SI?HAX29&JvG;Qe)_U< zyukx}!32SCv|e640#SPhs9i#yg#N61tK8dG7*XglsP%*_IG|Vc8GdNp+9#Yol@RL) zUmW8)ryKj_8DV98Wm%&0CneXQ+6@tm-wo^Kl{hg#p-rY~DrV$4Otg$wMu*cxor_D& z8fRmtVo=u#3zSu4{A_#~uZ1E!){RPu@u-I!?H9iedoFI#%iQ+dnF*?AAhuE%h3K6cv$#~BIFN3_Rt%JxAyEgB5(hUF ztR&z-{Ep$8$}eLkdk&2GU&QaG2*;HB(FN#vbSK(Rg$REUJ&k^bKB?SUhX|e{b5(I9 z43;93BvWSaMf%#T*R8<{N^y9a8@JeEemoE>glZ+kre{~l)2&v*?6_(0)lD~ob{uD6 z!73y?@z~ ztlANS$`C9ZNNRAU#p@Q!$Wyls`fCK1w{yo|)X61Jy{F3M$Dz}L(BTO!Z>4`AooN|$ zCW{60^P#SGi|n6mGdS-ps_;a8Pdm{+p*+nVln_$2sE4+Lm-Rrmjyg} zw?VSr&%enRiMF+|A!uKN-#t!XGKXg950INe`%At>!%P?1Z4%ee1fgik4`{-Efh9ki z$XUWvTpSbrOm*hr)zzJNe1pmxDeex+ONR3nns z-@0EH)g$9^18+=q+P&r%SAG(- zEe*HW$4zy(IrJzd%5zWkT$wp-)@^u@{ZsIJ4XP#a1Q~NE)I2Hc27;8@R%y&27L>c8imO8yn8Yh<$h+xc7iWrnX|+Oo^-xG z_kxtpFtOI=cM}=Q>!~LJ42T3n@e~bK zm+?LkTi|yq5Ie+$Wg4<(n_ieQ>Z@eKAgUEEQ$9L+A)598NyDShF=($T3(K$4N7? z6TJ&O7Uib^4^`(j54Qcd(+;c5gEs6nvvK@_zYt(NGy0ff98+|=~ zsWUd6rif->bkEdBN_8lEg$V15@4~NZPS~ui*-y1mTf-yJ>Tq`()JVb~4^!25r721P z)@qt>-rRrnjgX(BDbF#514~iURWjBoW=5LYyoH7Z7QH?Zo#rp8g&gV5e`Ry&wej75 z@XAVMu-JK#iN%l8IP=dx3M8eTpVXp$Z&%72*T^=c`+-s1-?^bfohyzcTC0xO;I_YY z@gi3m&+4!ym$4w?nPTvLg9LvnXD+I&u#0={&J=WACU1B7Fq0X#vH+_NTugp^K5jK` znUvuheX6gTy!zZ8Jo(QP$81ZeuCUgL4*9~h!}sgZGuY+OBh}C3PagN`{V>HDQ+LsY z|ILTEAW!F+BGRmgi%5ht<0l+-ri&UHX?(|%J?|K%@OX5W6IZ&&t1%ue6j`rLZY(9Z zKSPsJ`wR-yhpmhK+kAHs4sY(+gmlvbNViY$2bxs+si%hue&N?9&afB8_Gmj#8C~Wp z*&iI<)uu9>63p}Q>&@U^9yRupYx>^N)CvU70WxE5vUJR0;`aE(!Y!P|yvOhl-J7{s zebfKM7g>Tj*8%*AZ&|JJW5vrJUvBpa7~eLVZ%+K?(YNs6_VkH7iB>x8T$VeR_cz%J zSU%x8;iQU6sOi$UeW{yff4>MtE%`n_KE6Y(=sf8EYqn(uV-mGz}irkwK+Ki~OU z_O(VYSa_6YEK*-5r10>Ge8auQ3Pe%3QZu;sm0=K^?kH?f;qPACMb+Gujb4Kd1yh7r zt`dyjs4Jtj+)G*>$huvpvv;xEo0dH%h=F;kysPY$%FyFrt96luA8_`geToZ6hLKl6 za6DVK$8vo%*A$xsT6qaLTb-0hP#MFM9v=dtKOJ9C;j5VII;x(Yx%x2r!f11?z_W1uv;`~FS>{fi;1%RYi0)d4nKWTq=`A{G}WJP`#AP z{YEhkGL2wn#fofpo_@Tn0vK(QM#Oi~I-U@GZ&*>upgq*chH`Ru<3ve!iHiXz7l~sh zZi+9y37!L`5|ntZs?=?jR)#mMR)VQ9McgH^ z1f3w93@4*pUyi`>O^^cYdS@I|>mi(9@m)mxpZ8Q72CIOG|9c}V6nU)kJT4G|Fps}} zk}D+Q5dHYRatY%c$?tsM_bHIX7JsDM(~`z&Q@L#Hg}+iFG&RssD|DdO%|PtwRkCN| z9ilS=LDKxL)&?aEHxz2>?u3r9MX@BDwhqm$sRxOslRAeUrKGApXbB_hCGR^evBrI} zLz{flDSosvYkj#8Oi6`yT)&hjXj&^T}Ma`xWC3*^wUM> zEx7S9m9MG3Z9DGt^x*qU#{|P?hBRcR<-NJw8S58~bTo7#_`)Rizvww2*Ev}~Sx zOKuZ@`iey29E2G`YW?j>qPb688Z(FpZ1uMr@?SBiuEXo$XYhj7{0KsgaS;LkkG1yz zYif!5y%PeV_bx~;N)3c22!tjbsY($xkOb>%uP|MAKH=Zp=%=AT_a3;^o`Z2)x#0)i|dqQq*jbSH5Z?Q;LBn9XgWS480l z*cr4LLU3R%=v&o%#UKj@QWhT$14|E*?5Zo0RTx#E7uA{9&~*t>|Ez%wZ1&B4!Jz9{*Cliauclk&6Z!} zW$~b%1F|!Fo^KQPT;sU2y2eVZa#vk8Y^JcNTwY!ab}hMk&k}3Lgm*68X^ij7!lml! z8fib1=PzQhs z90~21u}Kl_YXZNWlbPs#9+s^KGBaXX=$=kkj3>9&t@^%6x+s|Bel>WXLfm8tY| zsV6mL*R!)~ML&#ojm_o^&m8~`1kI+)Li#y0UO&toYhvZlND_0LF5^}^b!DejH;DU| z<#Sh=5PU1b=UsX{XKq=tzO5zFzC7)3urtgqfXjLynvTyep_p!-%w7o&iB z_gu{KW@Z_fy}@7f(!UDmi4%noDZvENlYFDVm@eOYcMem|3TPxMw0oE@h_hWi7o?e?E-ypfnx^sWUwe85IpWVkhA3l2dE`bvPQTmzj34{qHMfP^ z{fKX+_ucPzRr!lQmdcFZ^jBiVv=jdJFu1NRTV!ld4Lmiuf^ zXru~KJlHOVM9EB_>xC;#QiunL(!9x4mNSPj$id#b3!)@6qnnH4N04; zPQh*F@8!s(WfY0un&5g-EyhzO!B(GhLreSStL)1Po7vUY_a9c@#$}G_3F#=65tkA{ z5H@a^N3Pn87S+4q|5(OK&EC9=&fKU-`JXc+KzuprpoWVc;Wt3nT;zUDknsOZkO}|u z1o^XW4R{^_)$f5T;N~rC^F1eD0R|YX1){eaK;cIN(613R1RFl-1iL<(&|naJ%WtQb z7MqE{ohJ-?z$)FyuT%2N^?SP*bNrPTGrY0=ATTH-b_Kgx3?z(CaPKT6ks;XpXAbZC zjqWSxEP*ai{1@r@g}nmT1Av&t)O~TL>L9wUH{T_`AKuW+q4SY!Z+6bDRzy4Ol{8P zd}z*M`gVQ^o|W4^wR-J&(Qr=n!5N0s*EvhqQ{SH&_mv6P*}tWZtv$I(gm*=!Ho_@! zh&-Hgc`kH<-R&{PQ}=?K6mm9S15XrsC%DrpBns==HRGc^xtutvP{mrWfpXy8fYZ z+OMFT`#RgV(78A%=^7K`;_BC#eUuVjJht02iKoHmCoz@IO_GhkZru(MAZ7@e{9s;h z+{KU&Jzs)rBe|*-Dk%bq#PzbNq2$3F4`ah%*u&nX-6yq(51te-3tjFRzK8RabmU3i zX;%Hnrwcgqt(Y=*Tfe>yc=M6oR=s@)Ad)=m<;Aw3f5I5~-g!}G%(kV&kB#tV<_dKy zDa?Eadsz`@2zxvV^K~D~i04cy*OiCQLT5Si3OaFwnL>IJrPDr~%^gU#xpawGE9dp~ zvFcs$k2SQjC>qCK0NBDlX$QbXhf9Ty93L0J_dKq_fd83<9sj>1VIMm_Bgp%{*zy7g z0?SAz(*r;~9vob6ip)3w#WN!_Qo$Xjim(MHf;nPg5-2Ox1+2h&--s1R+tv!WEsyD5 zJ;)LkCYaZ1ip_~RO>-Q7)$MENLo?>kX+i!CKxQ!-$iW8`Koog^>PVaf{4X3V+xZv= zzGr_{1v;ufoYOiltMXuStAw(X*P5u=k8gOM7&`@a00c~#EHO3rIMNxnbm6<~L9 zV{%JBt}vOGMxHT)`*m_8wE-33CA1y)$ckuN1xb9GMwKYITJ);#s>XiAwYxfhQCcOH z{j)lCq<1v_O*$?+S!UuU+dK{JFwLDzxQQyMie7at`>D8_x}PuUgw~anWf)d0@?b35 zj#>skyE$0EoILg@HFuvU0-m}CX_|TW5}kq2MAYGbL0Ss$IYwC!p2heuLwel=5wmq; zCJ{fG6F`nG;2lYSed7CJ6?HqxbvSp7#UgohR8LMbxo(y9HII*ow1iZe3?8R*tc*8a zKRp9{KXZ*@E2+!k^OOO0KNXO|NB`l6Dbk=Ro@%i4-AlnXH>G9%oOC2r` zoGKSQ#VZ=(o4UUzTX8a@-9%Mk8l-K2w)^X@%(LGWTI>DD_{#G)rj-l;;rF z3f90f3uy`T@A&H9@U>Yf?w%T7bnacY@^s^DE}evn53q8K*o^yTtFhGQ@e_gACs(Fp z_C&Uhv~p%G`RI%6QUlRCnpSp!$JPN4+xul2W)-Gab$f1j3Y>m?89KdcrjgV?ntZ)w zMq48x_KSt~`xrG9SHZKrt=VrL-qA;jR>|SM@Osneb1h&Q-DB#IL-&XCgUW(9=`4A- zyBY=Y!d`xQUj7i`1$JhME+NgQ2q6G`O(qW0Z@})bXPJ}=LxuzGv%RWP30;h9#M0Z} zcAY;Yi`wVU$adM;()BjlcP@=xGJ9RsA<8fDah;war%-InE?&-uGmY`86TQffVQrU@ zHXYtU_z$BDN{+tSVj2i6y4h6gWe<#;GU|>l71xG3E|+s?f7sP$MK3WMn=nrct}_zh zvW2SMcY+8|HpZ%$3*GtNGfOm{O!IKYTTF02Tw*HSQpYP{<yYsn-B?Dq6#AQBZPZ~Y`aPiD2~~j&;NgWGuhr?a)*dkJgtHOW>&YG+v4_ zaLi!zOE#zRrFWsR5UOuHs05Fu5%pl7dm9w-kt~xrRE%^lxTe9oVTX=BCE!a(B}q;g zca+swv`I75)EF(Ty=DB{Q5MTYp667tb7_>pgZQowKPp;C*R=~rA&gN3vOL#l^b25wzq#cuBTcQDHbXG&Rv;mU3PCDEQ zkA=)R9Y~MG$f6XA0WmUQ`bFt)_Q?WeV$oTPQQEmUwASPb--4znecYvxFA|72bwXQu zm^k3f@LcmSY9UtAER7pUjkNvr8*v>t8`>5F+o9KocX8cbrRs!5-N;&a?lkE3m{GHXQTobEWj!^VeD}025mi*jP~A$X9~|yPkX(FKuA52CxHm#)b-a_ z{T9P_Y5Y2!sji9LKQGtR{=$Oz3wPW3UL;dBj?^Q64sl=_O30AK@N!+HNh500qfdx< z`c6-yu>BqPoU-A-q{*062IHh%{sXofopkvP;TX-z>fhD1-*a+pJGxlXBk|4i`qU6X z>0`>&c+hidlSH?!SJZ6MW;a%}WfQc|U|KQsfl>zW zT=9#>khj~lkqV6{%E6y${h*IW(MpweX{p*X(Q~EyW4nCr>J0lEmH5{BwFEg|u_zd0 zN;f$_m%9|??UFQi}xxv zFI-4+KwvVKK;ClHvQwFkQ{6SgdpzFaWwB?e#*--}SY3-lJ&r8$w(zm`v{o|;7ZP|^ zky{!Hc}+n5e#l1EGiE`BfEem0OL=YArCe((l41)I+LMUouD|TC%q5h zA^-)LOIga!4 z9}Fu%V8zyqRh{U+oROU;^hkB$Y@I=Ur0RqM^Lx#DrnvTg1wUY!1tHl(UW_!mrx^yh zP>h7=Y$E9p1E{=^0YDJM2bBMb@f#A9zec#qv>*im1Y_NZW%vCwQRG2LKJl3Wh{J$i zhCER9U%L&wee2BeMuE!gu;YJ682k{l`^VHR@E2u;KU2zpEf3aza$s2h(8CQ=LnV{_ zsfT;-$_^i1?>#(d@s(6QJRngl48T;Ncm~oS`8^~f6KgP&CRH9ZFB+654QNkd2>@y+ zO1IY;DSyU=Mwfl$(X|PU-;RPkpLDip>Mp*1h(b&!P`t!9!*Qc( zE7Ete)r~fA?&XAUytZhgMc!EwvADo!-qot+k>FC1d9xv^kVRUafs8xliFuB3RxfwEYIaPs8~pUt;+fo{r4|kv`ECO~y(<-joeZk1A~|(&J;Ij67qU z25oq1%riy&JWF1RP^hk=?#cG^VkH-1FMeB%%O<8H<>-usW-9^(WIm~}ej5Ph5G?pl z?2696adb2ZCzx6nh+%ukh>!%o_#2o&@iS61-TZj)2I@RU6W@m)Y90JBcu^?llV1B` zuQW>Z3AzlxO4=o4T`;bGWi*uG(Z!NL)jG$Bi=V-}F2$+nlumu)i^S(l@8cNR3{~U}&Rq6UrQSua!Oh@b*mR z_x=uc>xWutlQz#V5VN2Tj!Bw_4l06ADCguo%(@E5f<6y)>5*A9s6NB{Fmg_J8yfoB zlv;C&TYn6!x5{_^O?;~k*Gf!2%u|50d4dMNGtFtWlse8N^lESb4(AqVYiW-aUGJiC zVE-zr^xL#%S#YEWY=`%6JzyOjzPrTff9L`K!Eung3)z(hQq_=j-;kodK*cj7C?Cjm z17@>L;!Pi4U7vtGM(7-Jjdlwu06R#-&e%lcF%o=3`VC-HEPzE8>NHUjj1vIQMEVZJ zaT4Yyqo;J}nv5nsj^cEmbWr4=e0D$yjFm_>jv=GUw}#3HMJ4`639M_wMp7jLa6|<- zDh3s<+#!+LT3d^jp0b$Pp6QLhClzFl$KkrX5+=izsaRh&|LtPDz7QQAix7TP?+Cjw z|4Y_H3t?79brqv+oBJZ{pqEfj$+;^qQ$8%d(gK6oBHuNJ2^rP%``CypbqFngR*{z& zPcV2dYuyT_BzDV}&{@8M=#a{NMYQXjGokumDJsftu!(~B%ao@Ig$plKv>aRVQ z61C?Gc6nfb2gA52!U~<0&^nm@Jx7E%Er zmiwlDrv&EC-V>pbp#?#w!Oi3^%=9U2cktySWImBa?|tTeWNlcpNTouac>ar;CVvYE`_wclV*fv zXu$NP=f68ljMkhlexe;5V{R16-{k38^u}U|gmVg+%A5WC)e{*P%8(&N-B#YbL@`@4t2v2bo>I(j6CBT zJgs!Du3%G|No&!o!RgE95-#@l-Rrid4x)^SjEb$k?&cyPL~^yPFFxFX_%9f}R5lmA z5qcj?F(kR4fRxYOiia#Kbc*g_UFj@lMrS=VzO2AsaCzYZgTEj{=2+y%Y2E*KPW%Gh z0__3PX%7iJX6BFpLVgKw<(1ZfTyLM6J?S~*?hI1kU;@J8T^B-Z>W?J;gv9}B?o{6qU`cb24F;M05ADHM5vfBV*6;(UsT z5&7cnDqWL@Wlm=#SFKd%hb&Y&Px}BRs#@q30r{m&B#9z1qR{^m_WehpcAsfT^f*VhXc+?G#5W@Zs;%KYO zA7f?BiWgw@dM~=7%7LY(5pMbE4cBx7}U$x#y8Kw?okoz`V*-5Rz zV^_$@zp|qN?!x=SAq@jG(2K}A^l{w9>WMjLrUE5{^RP*gewgrJlzZaS=);4lkDG#% zek+*M_h1j{w+)h$3^bJn62<~*{ZK2x5m5wcWD#TlZaTxy%r8Qo2v>BiN8`1kggZ{! z`oT7U*n-Liw&fC`?PrBi`!0%%zvswdd&UF~VmcElk3jG%8od)}Kz~CJSaK&~|Ghe$T+c8eJEV_Z2 zEOuoQKX)b@O}ZZ(cryR@CIKqDPTK6dosxEq0C3KfmKe-iBlC=Wru2dO+4xhdkPS_x zCj`to?~CQ+@Mtkwd+)^3*JojKEc(N%?o#h`#a7Mb+~Cxg`A5Tq z)<3juyPgYtepVHQVs~^6y?m_gToH9&c2)7Zh*?<1NC%QMfwvbc$`b2>BDNuH-(Sz% z+@zZVjODDjcouq=#y10#E&b*C**y%RE6uyy@LZ}-xT zM$ztGEz>O-2OF8PtFmk0MR1JuIyHufUR9g!j|xGzfWk0ng`g6oCT?{rI>712qRDr% ztL=eOQ(vW;eS%+{s`Dvf}7V%(xu zB$ByaujBjx0>E7t%E~mm-g^o$S~5VmRoMm?h(G5Oyo;IK zo<{A}GR`{IdL8H?TW|;ghxj=fPB+({F9Z?IS<5(K1cC3f2wqhtUa1+|uNE3!%Jle- zJ)@&k>YItxL~4xj#=J7N=ZV-*+x9)%hjWN2vd#-;LfeNDn(D8Nw0t?TvV_F1rII50 zFzNygz6cJJ!BlmHOt9+)%N}PMYw*9j`JOlL}K&oX{+36T}3&>=7kmZjg zhf(B%+KEl!-nwqjLErb9aJ$yxk17TwS}|FA!p|yB&ylTDJEweNm?-^F2SH~eG@iA< zUxdWr$Axo6m}cboSn!{|;h&&Q8l(Riw0WHT0MI7^t#eaTY|>SxOa>(w5}RxjF^%-$ zc{_!Kv9pGOynwaqFUM~LnwW;JIcQv6R5|ZSyWH_eY6v>{;Bc3cL>jD|o%=MW;-}J( zIW~#X>BwotwX7}D^X|c3d$%%uG3BJ1HkV4fKkZ~P-dR%Iht@Le_t){i>AUelZr^ik z&1(94*NY3^pTpY6T^A=zCeo@tj5+Mrjdx9CbwDmkTx3fKK6S31iA;bw{^qjM#&l7S zMPktV;0+p~B&*C*9Pyw1o#SL5Q8=8t9k`y>(ik}?k}tN+GgQ{Vzv2E}HT#BH*>2y< z#Y`wSi7R>MXec{(dacH5Injz%mO=&DGO=F2FWHwKENJ3)vTElkb0yxlVen43s~4}* zo_du1#Yp*iXR|}6X;kHAd*khtAva5^S0)0#Gt-{UX8Q2S7lW6TIul6bP55ri<3gJ1aAB#b@+roK^wmD9!i0LZf$hMXxq?4i*vxtWe(WMQ_OXaD-a?1fP6jt3m%0{FMNa_fKW99;a}Y6YH9G>MVBH zKok`^L!t4|QQ(M^8Y&0iCzY_Vy7n2-fhH(K(@=Qn( zpZpz@V#`Q|U6C<|X;y)hg0eZXd?`Xg@wBJ-&6(31sozgaO?9nVDc-G39h02)M%-4H zh}cYi*-J#dzPh*L>+a*JLbT|2y|Raw2wM=;o?K{I1jUbzJd-Vk>KQ z7|nMV4(i#x021#e2Y0JY{0(IT<+Cymxp5y2$b9Gao_6aoJL^_0U3Gg#z5T#&$Zx+Y zZuBMfDqzhzZx>H4%H6^-n=GU2FJ;RREG+j;!f6UYB$P@5y>@Jm^X6MELr*uU^}Ohp zz;|sosP3hdaB)lTztm!-Hb*MVbyqxoB`IiMLTnscPW}l7WU6rZt4FsuhLZSo!TLF? zZ&q$k4U#?}Z}wdBIfX3Www+Lu9QQjFUG25k#!Q_)TQ)=S&{1O-ky)+OSW$*=6BoA` zd!K6S*(~<~$dhK67u_&Lw@~+@6<>N1LhT!-Pu(H#5S7U3A7pb( z#=>-SzZ6DObR(45J?=-`;)T=FOld53*aL)TLv6_?85-jszxke!v_XW8TYwcLP=NbF zi(Ln`pwtZ*jl|GpHN4r27eiW(h_HV47Q4IJwzdLbhLN}kz(1DFKYG36eC>r9mUYE| zVW#miZZbU{NR(Ws@Ug$VOeRzBOZwU3@Sq{m@i#+oEA6$Z&NCYkhEFs0$!c7^7Y;I9 z9$#MIpBAe>Ck2+C3!_12CUlDeASLmn<4+HGk^pMzl2Vf-@<32+mYyCGW zozkAZRMTlk73#RhqmmlpiJYEqQWG}Bj5h z_8nefzob*Y1sQc+E8N}N@)90D_})gCD^aOPBbl&08QgF|UKnnZaG9l>;4F`0E|-pR zo`$Z#AW1^vnai@Z`m`ny4)vRFJ(|@MVihHvYk;%16L2svNnR zrrOjb^WFJN>W2UlH*(T*lKwK~?+|WmeZ>i9`ag2EJsMN{Zj+X_DB7W~W&Z85J1#xq zz1Z^sehsAQPE11XcWOF#g+K>g3I}ma;+U+yWG|*UV`XN0p_SIEcwSQg-%TW2gL$a{ zW7x-!MAuqJ;x8e?hafYE+M67e4WD47;sgOH>pIUN-wYb73_m0oN|iaPbS->-5a+Z- z4*xv(>A#je6dwKQ6`ce^Ciq_n8SFRZ@Z;bAtsD*rhXW)cl+q^v`TD=5kO_+YP9YNn zuH%?O20x^50VJQFG%P0gPa@Jk6f*cR7wI1g+0n;<^!r?0FW8r zGKj+X08)~e7z$4&CVHrN4&cjB{sjz$Jp(r20V)cg)D_+7{U{&PXVSq|fD!a{riXzA#aLg)AZYShv1!hnGUnBV{k z4SsB*qrd+&obWl60gsN(!TA85)bZyI&3XL$;d_LH4-s&{^&W}1|NOS2GoS!=s52e? zJi3BkXtbmL{`V;UIT-#xqX`0lFko~D{sSL_1GF$eZb#@JL2e;H#v7gRIPg~hJobN< z8wYq@!23>$Qv={>0IKYNlMe@81>kEZ!K?uUFW{Sju_E>l`t3xxHQ@Z`Q2ztg`5n84 z!gc}10JMcsKTtB?z^MB-bq)9wu<4Kbffl_6ya2G*fFEcz!T~VbKOF&xBm-|e`hnuM z2@3wsWIMWnfA;ZVAO2GN2Dl@oUSHsE{ykr)TQ=O2l2;CsY= zUNwpr_e)|N#f$r0VqEZdiE%*y@^^G<;KPEyONvL&9~}Wb13bBoE)f{zz|qgHK}n1srIH-J6WFyw6+Rum6E!XVCL<0Ekl$p) zff4eXj5zAse-1^I;5htn|L^FWz#sw6^z)p+wH!W=j$T6z)L+*Ej79k2)BNx?U@*cD z0i#Dps44hpbp7@KM5~4i^wUY5s+0&6CXPhGrwB(aLlou$9{+|HjVKBx9YZB42l@h- zI6NwjvAQw-+B$74Pl9p??}8McA}Igs z2*D5Wh(G6-pU}L&0G9u>Mr}5Z8Yw@MICW3wa~YOkg|JB~6z)AI&L3F)C>*YS80tIO zihDEwXrA@legyHuH z(Su>a`LQn(8&8kMx}<};5CpEiARCA`l%LpFU4`G)eD$ z+5Cu)z{ggK!*T;IA6mxw)*7eSuAFymlDL*-c0b zhaMuEA<`T-%pXto5keSI-bfH|im)8Vj`zwcs07m6DS@c4#6Cs{V3u-Yp&J5d0t5A)gE;qC?$ysk!&#|NGFVu=?WcA6LCg7b2PySd&b;8Lih#PjA1lR=B&2`yC$LL z;?0%|a=iYA-1on+|rjDT@A6~l~WPgjB>&m;VzBCdEL)w{` z3!Z`&NweJ+F=|}e%q|J7vd=K9+6{^xj(o!B(PRxn*CaH1RRVtzlY%+`U&m z9fZh*q$K1B<8@>`vJweiTiPcC)S(1mBS=%zNgq%>lZYk(8@4t?8@~_Ihc5`Rpptrx zxc>9*(XEhQMAv9^n@@j{0?pfsdVo?W^h#q=m`vg2deHSGFouLUhE1XZW-K_p+M2!M zrDdZD93&dIoNBIYMFVxCuQD%PS~exITQVvgq^Y{p!ZS`-GIw zrwr!#aR>`tS07XheTh~V6l|3zFO6^2KAsoNoSiU$HuLebD_UZE7eFCz^`WHS4i}s$^JtQGaznke7^3&)F z?JYE2fL0&$FCyzf9o0l&8H%PAQry;%Jj&2DjE={aU(3mKl=3K8qS9lT))_jpJSc9J z_G{#B34fkDuy!{TJ4KZ&rdm5Wwqmy8`?BOzoK$L;s4m#+qEp6jiQIj{tKB~+U%lr$ zYu_1g{xqo_SB_V&CP8K~O^9~<7e0;Ni+7Atnx70hwv2UK=*+8$JnFI}pT8pVNW+5i z5?Epn>@mQFE{E`tmi(V7@8r9||LPcX_z`mQ|9Z-+GnEV=fCzwesE?;QD`%EUbp3tta-Y>iX<|?s3M8v|E)+u=3YkXln;bp z2;KrCflR(4@-^KDCqM`^TReP={fhmn{1W_9{KEZ+^ZNWVe3||Hj-|p#G#Nl(V|SJs)DI;srff6qGGK*~&F$L}HU`*nfrcn3(2 zOX$5JJY(pt9ZjtNZ}PkKI%EKZBn{T2C=M>N!41D2i5dP-zh4CwU`0Ampnxe`PQYWg zNbmS?n)Ho~Z66E6#GIsuoj{WP!`aCJQ2u%`&se|X!&+FbKc;xM~nJ{N) z2jk`V7v07s3!l-QC>Bx?6z3AoXcC%zHe$M&nT2dFV9!bsJ7$hOQVDC=4|JV`#*u*k1)4=TsvZTn zQgFi6$1!^eArg^JUnHowL}O*DTZAujnNOZ%ASXR@(yVUB61o?4NA|HXMf0r~kq7ij ziEH0)^InXY;}8ux$D`)y&{pM{I4^3fdow<1m2K>N%@?=AQX?H*@z=5DDT9~iKJBj= zSbzNrh`EneRV5|tnGAo1(zSRY%GizG`Vf+oD|z#z2Rb9ZIs5W673-UUvoCE@E|0gY z-F-5%2#s0Xq+T6lTYQYTf0j7tHA1=wpQ8H7sd)@@y7({$3?0lg-R!G^TV7J)XT4f= zwD@nG+lc=0$ZPIN>b_?8L7sK(fDS@(xD7j<#j&w7ljXLn&KAxh!_|tY58(S*SJcEU zDLb_h!t~k>tR=W=q@qzdCU}Mm5K+jkpD~m5#eyh4%@R)z)}%>w1A~QUP(I}*Mo8LI z>xPl;)T({wY+`Bp`F3K(9bE_G9V@e;KI-zCUL;4}J#y_X$CIeZY;Zri!HULN5&+_JCvJQ(K%7KzpfSyK%zrgr=Z_jVwL^#Z&l z;e{SB68Hcp=mlgc(%RH?uim?M8wA;fG$Z%0X=dm~kgd|DMb?u@NcO_y05Q`Bw#Hpk zC*W;$2jJ`^zl8S#J|tUy7kV2q!I|+$SP>G0g=)Way4G>~C$ZBB(X{_iL*dUU3ednf zar^&Yc0Ll-`A7^Xw3)Gkk%nf3u?V8P4fSTS?FO-Yjck$DB+I+Ef(?Ohv>*|_D%!($ z3}ZU#VB27Vci2E?7J}m#yoY%<23<>0F}Ock`Zvjnv%STkhdI{Ls2xZ~<~7Wh7nI(r zEVG~r`ll-aCHn6@Lzs62?qXcM#(1rrqTY=ROR}r4%g?fQbl*BjeV5VryE6~>;*Gw~ zmTJwx3P7(~vZ=7~0{y(<+b*sV91M!Gcj!?y?Vw?qWZ0tbHzKhFmb+vfUYcpF-JHLm7+O#_1h&-x zKr525+ym2LnuFW~4oppvAR)+G(_HFL(wL^^J^i!+arJxm{Z7+JF#1OA1L^y~jUj3( zcpOR6Qy8j2XALx-;oktd9ee8SFa&^_TVBw28JmlevHm@q z`gwgH13R-8v$k+N?xq4r6QMu+ZC7lA=?Za438}K)ZRlG3 z>+O1Yhgq}1C)ykV1+VolY;&Bpl615>?UDIZqz(7f8e{`8okcX=2xuP0iG3E=zL!M^ zE_q1$Bgsf;;@roQsrkjAMy!V%T`e4YhVOSc3qR?1r97{HM|O3Yk-f@{=8`DLK8f^) z=i=JJ@qDR-Hb^PLLZA2!6^@Y36iQsj8=hP4D_U|uL6jBzODPN#38e;?r z4*JaBNNv|k_S5K*%OgrqkfJKM8np7moZ-PTxBE+r7i%wO7q1UYb2LG`L6+jeoFJpBN%%@rSh<|CHO zWrcc>G0&E^KLzPG{TdSLPI^T{>f zL`#=AbVfxNl>4~bsOgaZIe{hq&xypp=!lSdz;7YhfXMEf1C!STNFKol3D}3gnvh^M0YJYY z49KWzl*ZAEkR268Ssc2r}Hsp+b(D=2#-5Oz}xIfndk~_om7eP>;?C~Fv-VL^%2#9e4kicP;1T> zh(1#&CuX7rf(H8rw$eAtFIO$K7Ts}f6lnfX-OqQxVz!0!?DGvfn_&I*m3`%K?WFc6 zDrvp6Xm4}_1D%Ka7dhE*;oie18`Mlk?$^K|clmQ*#GeG4Sx0OUz2XB2*dxA&Bp?Vg zjh-Toe+G^bG1Z2K%+$-Q`YDTuwT}3z37WDeL@Bw$&JwdAk&2wm)0)C9uV_M;WPDZF z0L5?uQ^nH=_G5=f9qBQQuGwgOTa>=p-%m0qgi!>*byR2JFfIR?Whu_fOHn9lI%ThJ zJc#f+sh!jvVaPHB^UR)z&a(;ZJ=5^IZ&5xtp}ZZB z)9L-3{01w?E5TDkUomnl?g!Vrz*nx(S*6`S1%qFUN>FouttudedA!_pErz}vSSMqG*Jb>l+jGoR4y zfWK44en4YRbh@Jg!1n)1&54qX0haIpSKADbjQKUc{Kr8`{6FIofO`1uEx^0i4F!f0 zWX%T(Wis0n6+6pRmkoX45_Q(*iy5KkF+YL%b&e>y)}V2RfxM8Dx29yE6wr~x(W0If`w)V2^5eNA!(*jwW3ZSG9y|WZ5BPP1V2_vvk7kAurVndjG>yDg;2@wC?MHV@^@ z=_QrkEp{M#Vl6k{m2`A?^St1ve#F107iKw6E$V%B6#@i47|=EnHw>&J>U57IeCXWh55=Tr|cs*P_c5 z%01FhKd^3dN=^NNl{1!4dOQ(_nWwF_n-%P7p|>#OAQcgHtzFtdK3Mx^ou>sC?)SkB z9s9AX_-T2B8UxUGKhCKQLQi=(%}=Exrz{4L2@e8Ywri$%kr3g~N9f$Hf2l(L%n^F2+)6X~IX+7Ox3M~)k_lVP=E5hB`^IfJTq{WebC0&B#GcUAc)l9Zl}!U>^IRj; zEKPr07lWlvxFI-3aUMUJz{=9Ok1F^I8&#ch17t{;+v|g((2q-%2Ey`q8ioj~%gc+| zP)=;~vVlH|cl+2*ZFO2<}@$_8|#0(8QQC@IU68d2|OWdc7YA1r7WYL!-}t}!GX z?ze`igJTWQG3;zJZ0xnSuaL^9GS=%3<+CY_xZT}FY^D$zLcb65N+80Rhvpw_eD-Q!YaJn zF@3GhFZceU-1?5*k|4mR#R!eF(FcWeg6s zuMpk3es@gY(!{8%*_n~G*VQx$rKxSu#UKS5-dgy%ULdPWzRdPqF`4N!%@kK7-<{!+ zgkn(2psO-cME;&Rx|s@102n1BdQuw}qN*N&?~EkcX0|(1!tJ z9vfFmhH%9xQhHh(kIV0Ak{uJh=f@{Dcdk;XHPCue^m(X|x@cH4I?+Js*UzlJ?wAi% z4*j0m02&v*->WXVUMbE0h2sO(inlU$+8fqfRScR6Cb{yQblQP8L|h&M7ufUY>&{xp zCO3W|j9iHd4SH4}^OUX^k`^iUS)s`eqWE1e$gl4VOP<6g-Iy-Xv{9$&40^00|e{?V)1O!YA#DP2Ab#+^{ zeqf%qIwmAYZN&$q(;>|?zzA#FB2K`#W>Mg$i)SEF(5tzdULiM*;YO<|+A@lysT;r! z#w7?yzzE`dTOR+D-t$n^w-#Ny(fO-?fs%aGdn_sy=kG`Njn#uc-Ihx)8PD7ImpoF+ zV%GMdlz_^oQC0j-7LFjs8Ks*@*USrCZ$a%Nq%JTG zmk|h#PJTLgn&Gs<4S@9@ce&Ub6p5w4*g-!eQ^RqLrymBlz zo(y^K?#+G7jTl*fQgu};5s|Rc`E^62k6Z1zNZN|!tizgNl}{q_X_7mSe(j*@ypN{$ z65aie3Ine!Ukv5%Xh=>+Q&vHqV!AJb6V4|!ps^M@!AX=x@k#G8z3xNl1?XgL8_8K! zh7518&27o=Z)LMTp-R&F34?KIXc0xsh95g3$U>;UTwvD>ce#~ba!2KxSjFcDjRm>U zbFQ$g?XZUr=bztl&w(SJ|ORriKL%S+VtLu!2jO(P;?$K5FbCOLE+^Cr9vpT z67cU`NK^!7xMl>gR_a-pF8tcy&KLMUQUL1@q;bw8vWQ?v>qsBdcXeA4e%msH!Df4! z;7l4R3g3JTT`sU1t!WN}@7V`H-~fZW36x87fuQN27Od13;D0K7`gnWpa@Wlwbj?KP zJp&oMCvE0z&vA(uH6}H*cSyG73PLpGS+mMAwi>ey>H&8)&*tg5M8#H89-ZK?4wy}F zeV&O`tN3ZR%u5t5b&l6BICwd{dV@QUXjY=B%#MF{OGrtHe}txwJVcpON9uI9Q6Eqs zCfCPma*zJm8QS7ugKw2<$?<5%l$kZIA|`Mi+t zzR{eu+C&#`E`izGJziZJPk7MyvPy20V2L9AhOCPhc*Nt{D|Gfi>sOz=+Z}rj)i0Y@ ztT6@7%f7L~nK4d(18mJ z=udJLFG034bJJ{2wT(t@=}vPWfbkYBlF%6tO|%!4DSgrb<<={p3heiSCNEQ}jRP^!bX!-hQ}pK3F^BYg4cL=Q$G|`zT|xe0g`3c9y5z%kwMS8W!zq<{b(Z z4NvI@%kwVVEY=k+ve+ZZUY?qPA~wV3vau%}<)_)Kyt7#)@RS_V!L>QnVu-Bzl+ari z7xbVsDT&|mPO}?1Z)cm_AU`v2sv>&!g)(JubW*Rn{R>@a=8X2J--@;}lZpoW-R$b2 z^CXfn@)53r4mdf=WBH!Fy&OdA7ZSzwmMA1}*JTpVwO$OpOSmq~l~G&x3i%cr8B$aj zN$@r~j9P)ykA-tlq4zvVSxxS9+N;(ioaL1y6TQv(z3BP?o$n7M`k%B9jC*BJd+yLl zY7BB5xz{N{J;`;sSnv1(f2vC1Ko#2DA16-9 zE#P`Zu0r4`PBAKH%%IbVuZTXJa)?0WD5#-WGT|YU4P72C+f$rKIM2viZW4_*zT=^V z{*F!oiOEGOG>!MLwyk~EbYZr6(@R2y$mht|;B4hD}G?-caD z+#%K`yE~4~&S*l+f`5@O*x6OA0zskkd<2gPUyW$hlDCcAdt`mq)U+w7K#vx^eU*RNbk~%fPxf3u^|YeqBN0SL_`G? z=?H>|VgZHZn}njTZ|?oU{eKS+c``XCCo}uZnb|XY?X`r?PiR-?S6wshYeQUeK+Pbz z>V}V<`%&MG{mMUVH+!{k*EuM8n`apzyAfIv>_C&ws|!y-529(*k~Cpehj59>iHud! zqr@ANj$5I~5sFJ!rbNz0X0SFY%w1v3n&MiHSz#WYZDuje8oHR4U2|^8O_~2$TXVz@ z=d%r_(eBM>Es#m3$Ncx^OP;m3(wvRDL3vwNo2yHhq>wbUhKNCqG=*VuwIr&XK}gCj z@Q0L%x(PzYWqGX;(YNdBh10&m3k$cLG)`UGn;?!@Yz$c)KUb=5Q3pM}WcZF8Ov(8_=m~!?F@$n-ER#FDp--trcaDVR`q4}lOwfLz=}M&k zz@820_aJ+Z0p)q@%l4MQBplC~~PY)>c_+ zn7ZI>>bI@5Zn|?E>6h%fa+)Q1v@goS@kA)=$TV%Tyz^cv+2p}RlggZO%H@JtgR9g+ zboE(lapM`Qv|NT54Tmo;gzNy5=gMdImNenVnLSB;k%d+bXLq$7JSN*_k=Szm)+>pZ zF^5Y5bE!M8nnZ7V^OEt(ofmNsB{B3_Ag~vA+1g>$g2G8YeU0kWwTuwRHE6fF)eA0? zM=A^i%s^0>@_*?aV7Jo(0kA6nPrUGoIrfT5Qg zmobnf^5~z$^Yo$BVa4#sAYzjWm28OH%^2nr@NrnX7S9dQ@4iH?smhU%ArJcF`&HW1 zKdB-xXk~1EVRS84HDXm2gQb~Qi}8PW<~ZqbD8%Q8s~#b<5c1Y{fL1u#gHL@T?S--0 zWNS#l6@aY8U6CR7UW%&>Rd*T0@3B}^rO!M+p25vG#p=MVN98xxD1%((P?||3`mQ+X z-BA7PWZSRM9w$xFpPo7gT}~D2X*hSw2_my(a-14pcYaiJzMFeoO?Wzy#7i`eV+fkZ z^x(eCv+FjyYzzIL-IICH!~Ek8g>Lh*X`I<-jOi<5%yQN%UfFij=gj)Y@6Ef;&=}@j z`M8Vy$ylVt)bV74$7+?4hrU8Y^kULpZx9|zt5OwQ(;OwQDQ zF*#fQ=g{XCcH;v(pv0F)EW&~s;3JsO#>-d$+hW0np#i9eoE?l8De-_nk+^ko<{AqU zDBJ@=VB(~Woy{U)YeL)s_&mnHp*zU!dzgvI_4*{z3@!Bx2t?Fp+LN|wm5QGx(6n{W48e&_#7Z7mu)LvqP;zx?e(ZQXMPX^9a@ z!ZgA&flitSEl^iFu#N9biA}B)Hz)Nzgc`_0$>pFieHgBj$#6tTWIWB$4mJzXV;R>; z^xo{(tDJotY!B(DQ#?OsY;fYyi-S7zg39{wJce52vZ9Qh2X5p+j(!g~Fw(2%exFd^ zAmE=%%kCg$r1f&A&oD*+9kiEadO)q|hQj^R6a=XL$#huFz3cvUYmm z95N~^4GpV#P%R8`dI&&&HMGIlCu70{2SY-`=hjg> zTW~&l*l8X@Z6)9bgEZtFJO}GpMIfpLBE1_Bd(D4wiCO*U=U_9cmCePz3`4YqsHv+l z7AOQatcH!L(o}>%{H`YrgaEGd5J>n207*ns8%?Mw1YAw&ooq*JJkq_grT}<<7zezU z-2mRpkWg`nWOFr5ZlTi?KvI-SDAQk@sOtd5vUPxB3o*?+3;;9>-~|*W+#oE`xCo0L z2nl`gRa-7W^#gmzs>|wPK1Vq3s-vn&Q*c9cei#-;yhKm75r@QHJN%7MGYNQx(jWtP zyCc`V?u~IglO@dUI5Bd!FLPpw3)8kC7GcDq20Du75)M0G)P{FT#}zT z@kH`$0@v3EurD3gcu09CbmLl%Yx?P*X$jLgBY3Cyg*5DYtL$u`3_3sHiGO}AX$hru zlv&5>IAl(>%HUgK%`{{fIS|e@eY~-Qgv-N=A)L81QYot&75}8N-L6r|F8IufU=~&>2GFw=pxfgqsqO_Ctb6_+N1-=^ZHVZr})knub5A z;|2-`$^7?dle3LSP<1f`oe~0~XAx_#=$|mPpAjk(5qqPtxba4Q0J(@8aeCk*V9W`_ za>rs?8$!snz>I`#YzTn>J{L4LG!fI#fI(CA!i__T#@B}+EH>F_(>D2 zBbcqsXurcM{dT z)n#Psj9I{}q+ag2%DtG;dhtqVlVZN?ut+U0r1ieu7Z&f zWTU##8mzFw)6m+>w@srS@635opwf{A7EG%q=ya`IS5|Tj9{$YR%$c@-52ZgQ>H%$zJwL{?_kA*at&>!#pMVz3@{w#uf zkp8%&Nzxj53dBv|JQ!@igN0QMAKlo4-(TZQDAWKziT@rVcY;HZ>%-^TLhKzix5h|; zf4M3D#a-WYQwCsd$kh;&kWp+?D?LWl8{2@*T_avX!h^}5`eefpSbJAA1`ywetD3M? z5_ba;4r1=j(a(KwekQ@}!iBHa1!8*qW`31KekX%Oj7A}Ruh-=Mk-Amt3 z^(!lz52uVAw@s^mnH}Wc`pE}oJURDGmi&wOJ5t+s=9LbyVX3eOA`0qJ;!XoiW2CIZ zT?ML~8IWc7Ov%iuw&ECOuSt3a+S^)Q2U(eI^vkcXI=?b>py!H7Z5oRh`5Naln{tDq z#?4g|8>*>OLdYfrow*&X&%h18iwo&m;1=-!MJBtEXDCCAqqpb$Gr79!|4uZdVWOd7 zA0VPU3?G955TaTa&C_|cJpjhk6e8jr*&{eAY=yu-kuC{2X^RD zInZRn-u&60@<<><>b!SM(luHZ8(+MbMtS(4$*#{oKnQ3R>~Y#bjV|y@4718z4*g`J`~;=FvB1Vd!h`%5IO%e_K1?XA2fciQT+1-D^G3B zVW;O5oCOu1M9SZ1KVf~Yr@{7>xWv*f=iin?5t5yg0lY!m?Lme;0PdhhL%tXyDne3t zGon{H%*%`MemHDXI#XS{QIERrh`f$nqfY6ig0BW2`XYX%M%qZ44lJA)c(3Noa(rOw zgxcA7)DPL0=zAr>8t-zigg&mYXg?qvq?{`^`Ry9U;%QrijnSzn}r&+sdjnxKQ6LZ~aSCIO>jLM9^MWXb^kuI-xWU{9kv zK4;4kB$c_9@lpYzSns=5APEOx9*4~7wJAz<8d+=Ork{T|4T=n8rirV(t2aBPwA!?~ zsxr*8zgVQmNTI-1>u2ICK1>0x-;L5c!*2JQ^%B0E|4K2H>!0yC{Cj;2ZAW`+hDHYR zC!L;z^kq}as}tkNqh#vSE~8vh#8SKD$zyZFxH*%UQ9Lf4>{dx9Z-{l#!4Yvy_1W>_ z6GzQs9%yQ}tC{+*J}ckg|1@cQEsW@ip!y(fzK$WQ!wwN>`TlI)3R&d6P1B{+8*h)bOwQSTa#v z?3AzGobFMx`<4v?-*l~uy|`vY$a#0=-W`W|$`~J$P(3lKNg$&Uh3EGAdkWeC3)H<0 z-QEygK7(`tClF}ru8B?5NI+Ami&Nue(;GD$lBxl|!R)HpYfr|MD!Dt^K)VoQLKo1* zD8A5otL2s~OfXN+O)23>@>}PSB0+%)`x3o9^VwR}lXFui1ygT1@6juMoXP!Bxq~B;)ZBc1@9%%|G}+nd z0X~{oT8W{P_%ldgz}g?-6_c|lUn7qG2|SMm#6~ozF3*U-5(|Z=W&7*4$^ah7?G6QA z>t1EcXP-fSgrV)|gG*qi=m*hZ3Oq6s2wB5%9UOh^iz^TSAL|fLN?4ke4c87XSHfD} zy_I~?@pizhovqu88BGbPw90&OUd0C?X(x8iTKQiHSh$~7Lw)Y-xsZ&iI^~yEefxdf zZ+n-#5$TW!4JHa73{|KW(6{X|okwWCb&Nuct+25#v}tv*k=y;uzx;xu5&DUKdGCn- zccWGN_bjV9#|7?x1kL&Yi-;~xp#JsIK2NdAn!vaRbgcS(-Dx>esL z!}9tz%iV{+&|2g>1`)db1l-P@Br%=YEz_oGdhZ_dOG<6# zIUXL9Z`yL<&cz1ZU+E7$5>9OyUF@RkC*^SvK1L!J9(wJ$bNaWJNlU3;EGb1!vy4l* zIUi)FrY2p3l{eclQBXQ+toE0(8SZ<@aegiQL4Hp8=@M*cc+$NGGIlgUO(U(B+zH6d zqwrjRe|0$N9gHKHo71y6DYbflgh=-8#j@&qc^;%bI7~~S2@k1iqv>KZiQUuI8TicF z`P-G`E2)zddZXW(pyK;U;+y#Hs0^jUa%%Q`PW2IeF32{oBe?tO0W;&300kdi;Y!A{ zg66hoZXGH%XVrLGG)@#xMLp1P-7SK51_I+&r{g8NIq6(3Jr_#m8ZPM@Mx>My_NbBQ z`!4hyVygm58wO%hnMD$xrFhuLCG3;aIV6>`s8^_K^k3G{-wAaj0^Y974n#UKpVi|f z0~bFrc-vFFrH<{P&aU-Wap0qHhaaZ6s?QgWHlyNXnfHcu>pavcuTps1m8-40_mZ8o zQpk5Mr)Q`0&3S$AJo9cl-ILGpv0YL0YHs~oJ2~r%EKaHeig$g?gsr<)7SFBnD_I&S z3Mfq+YnhFY=V)zUPH3f=?yGqc+3Ra6)-2B84wn>%2D13?s*xYd2z;8E+Wr2*+r?Cy z2>7{s$6sKg3o`1LL#i^(C}}ZLdszbx+`e^UcEQNPv2^#Fl*c@AiBETbDRiG_bhF5i zi73-5c^&?4mYhZW2mweA3eN}mSO3`wlH4m9FFX`wJizaJ>2YJvIcT0!WyBEsk;90n zg3KriY8=pRZW5$xECJo2NS{2?p%+dAU6pYfI(X=qNtTA`gk^--Z%PSGA0?$UqFE5z>NtvIe49e)>~;f+5YCs z!IV59-H0_9f{!~~l|)Qq>3nds-c{j2HqqFRDg#dRmu305PV;|%Om~?FJ(EbP)Ok$& z{uf%qpFSZ3x*Z`EJ!!DU-0q;CxwWs>s6loP`uuU|yQANV)1z+SU1Ua*6D&7G9t?uv zYk@H+;-ZDs%;;$9uV_`lQv%3xsN3CsdRM2mq*>R+aLrD>*0KBk3XZu2L%qF4R=x>h zmVLmnDU3gCE>r4UC0l$dyGxfi-z1yD^z!?%o;k}j$`m4$J$*0JUJVL5wI=EZtqNh( zoZ(s0b@Sd-5{2fbk5?JY#v~LtZZA?frQ1C(dm-MYUu36@sp%{Ao&n z+Knq! zD;dxbxRL-#>m{*Q-KPP&%BkrdzZ3li_xi2quUX~0znD2EWNaqSa=^Dzk*WcmPl@o9q(n| z7e)0Y!Ugpi(m%h)+uFT5IV1o2y~UN0zN3aORjRGS9Y`QqQIfk6=It_KWFmtJXbxdt z%6E<5H0Jqw0^sk%9*_2gEZWTs+s&AjI9?kd$q{P5Of7d-PV+OL-Yv)2^rxca>aRnX z>A$LmzJlKDYE1unB*jWvuW_K0*hrjr*7(p|#tL)8q@QIRo5Z_Owx2X!ivPCWpBn+& z;Gvh6{WlN2%pW}T8}I+gLob1XO91y3ENkRs{%0O~X}SNvLyy|z;@zYkhod(6cz^TI zqc%u+f79@;e+@j4RMs-`l6X#M-0N=&_RV)F1xb*Pg_6S&vai3&DN4$~@h^b)7%7SO zIdEtH<0B+sdj~Yd_*ay)q%5wJuAjW|bNDZ=ANhZI-MA+HYI1-_UIG5+E52Hw4hnEt z1xWzI0zB?uB$h);g6t?joev0}0K+_JjI1Q!DaO&l<4B$*0YfuNMiP)-uU`s~%>!b5 zU;uC;{*;Z6@!to{#*_80n`Yzs>u2F#0f|0fJKgvm{*z6^f^XvMivTwaXP@;eAmwoU z_Zp->T#UEX-`1+X} z=i@7~MRvd5sWu1j^+#p1$NfciFD(aHuyOzLaA^f8NpGJ3r(@253>;A49c;oOQNKR{=YzH*e_v&tHXqQfgKwh%UHkuEg!zDF9n?z})JqOr zSq@woJgaiZt(wRIf_wZwXqOzQl^i%*4xIf@5`9nxg!A&?TzPQbPU-Ujg+914o;Y6- zuJ~IRKoMLIgd!B>wrYbX%?Iahhd3X{NRGQ1|1(=Y_%5K!|IZZp>toAi&j5EG+(FzD z4o?9NZXOst)(b!|1Bt>7xVR(WNkigEyzm7uQs7CQa0S3fi(_6}KMmJI@M8!52qkJU|2ly-?lhoN zjRa%d#t~ezw+f)8Ah@w{1hf>?Vxs_B3LsV+1@Jqd78?cJ{n@k|$l*eQVRropVEIRa zT5J@+@8Agsa5Vrd|42}ajU&L;;DOpGfEvhc)&M98An_T_`0s!k;4^~p1yBQgW-Gn` zY5)f0^$P%WDRN6*0gQggEky+|$0G4LX1MPF{(t0_@B$DIKyE2F0RDgEmeK(b4?u1y z9e}kma!cs|fI^U4N(bNxMB;g?ahC>@2y#pI0N`4YTe1gW?m%wI9!P`jawHxLi~ABz z834H@d;laaKxxw^K!dQtBliQsPV6VimAPexxZ5UbTV%2IZ?RI@N|Z-$XqE1ks0lcmWQfc%Zss&MTPt{9}{vi&+Sp0XF}eY_d}90 z#tysRovGRP;CV#ALL%Rn2UW>gEewh|@*VKk*_sP|lDyrlDIUV2aCw7Wm2DljD6e{4 zSbecFpWgYcajBK2vHKxk-F;rZ*W?F0$Q~Yev&_=P_nK6oJoIgem-6&jdP^6E=zY(A zk0p`Y<@L!ZMtwEaUw7KQba>mP79FVw1c_02$&$^%f0La6blC0Q7vv@vA)qHPC#4qn zpMjcK^#UkcdjkdwG66&sEp%5a+BX#O`#^PLcWRhIWx0^V>F1k{1r zajWM3e*+q_|BHs}CZGWm(Fg?B`poYoPEFLWJQ5q9trEl_O-=?}cs zrzBS?H?&YfS<7K7m{-hRSI$0t8Wx=XtKg&7^=IdTJ+4UZeGF$_TlmqJ(o0`jZeCc@xlY~VBhhvG?CyM+-JRS!3wKH;gCvYgSf0Ht ziz?-y6gMA^FggLLy}`-!k!^^%M4&{ZZBHrmZJH_*-)xNTYJkzTdVhPL&%6f46%s;@ zg0EEFIvOnm&Iy(kyzsrT#5vb}be>C?$Mjjy+v00EinH;&E+^Gpc(GTNuDz7suj(IJ zEIEqx877^};tsffYRHA0OZ#f$i5n`O3<>Bx)6n2j4g@GFh-N?lLh8EvQy1%jBA*Qq<7b zvHFNmdPmEopHiAK@WIXblydNhw%_H08K&wkikVp!yS`>mP+wpcb+$gK$rqOJV& z`Ep@JqecgnHT&_o!d*fh^pH4RIrGqBiuEi#!o#9)x`m!^%K;o$5wTLk0teHK#@1vx|m0ZK~9{WkVa*#i`^_f)_ zD(}lm0E&siD^>k9w(Ml5`5sWSYyx~6QXkN>tg0Fdqz!RS<~8W+EO9it$3BE+ySCE7 zkLbZ9lAj1Tb)lyo_=nd7GlIH8(^9slvX?)?KlSJ7W4EN7zTl zMtyWNwzl7*)}fLwloT79+jb>9$M{CYjSGBqGWuvH zHKov9^gm3JOxl$1vMWWU>bSHawG|z^Vu>D#d3GR(s}z}s0-&oQ%ha4w@SahcVd%VD z^h@s!j=NsF>#o-;i58PDU!V}(GZpL5Hj-_7eqP@%=lXE?wX!Q`?s-06ENpaHCGFmS zo&en^3NP^Ww+(jyTzc78i?bp5q6G*4@VHJ}E#yQ|u=jB%VJ`V78wI1%w!;&3Yue^>_8s@j2bE!;{SAFkdAg>&tv{z{ zGg)Z|&t5iavptaG>E8tMOr)IH?jI;?FIz60{*Zccm&;g3MckDX7na+InvS)TN=K=iQlG-NGhgkO?4p=bY1)tjxhrf_6m&MVsC z=WKBf5V_MHH2SPuPiZ!NqEM`Id-k?xVGCHk=N(rT~%JOdv}&RCBuO zT!!+KOji5WoQD%%O{7zT7N@dQSI1|^^o*(Z7DZehmu}XjSJ~%i@zBIk>4)(H?(#>4 z1CFQoI3JCrfjv~-3TxZE)PY^Y9XU$ehe$>hA`((noxHMLOj_NE@7qAkqWoz0raL{2 z6l81_F;Iz`-U8a(cUGSKC3rq+NP^VbuZZs>M6q(IaQ&j=Wgc` zG9n=y7l>NA!n(4gHozJ&8-`Nsi+P%eIdN^upK>wbl(T zx9-6bB|^6rx61sxX<+&JKQxV*l-hBlX@3*^uv-$!$oi-J5&Qgl-E%x`WYDL6T%NuSd~y* zH|SttBo(hrKAC(x*_ThdI_1*pdy8@g67lma)JvC=hb3ADTTgy^cjQI0-}lAR`_6279|4iU*oh5NITDop2@%mFPor^KqVCw0{F7fvQnwHvn@ET@gL9?;I zcp+4*mVQUEH^Zl!eeflwB8Dq$1AVW{4ptWz=-yDXN@{{BpBja0L24)U}fa3_G4>JYN51y_%%fdUB5J3}p z<`g3v8*f8l_iH(9zY{8hIgKSpUxQPpivDw?pQp2%&e;JTX>pTVqDiJ*-K^Rb1)VEr z)KVp`u@tqrF{izCNM$bO3@3wR-@D$|=vc-bG(E&;@}p&p%#C`qqKpsCb#{hs*Rmwm z0`|Cr4nllLZlrKc{RQzim|gkl?grFLas_eUC?D$a<%pjN!n5{Rw0+9w6mdT!(MCbNDSHFT(z6g93gbaI+?Ewx0b|F4H|FyHVn2=l3df|MM zeysM7)k+qb8hKdQG%YHI6-VIWYP=>waQPaVy947(pKHRqPKqa}O$R0*l+{zOFfgB? z#+ZGei7V|}M0s?Fin*WuC2)0*2-g^Mr@O@UE;r0+FOOT-Z}0aYZaPE~B?(;Q{pyPFlFmk)2^z!HL_`8y{YRQRY zrMrxWoq(QQCXy1pH$cd41UzSHuwS>`4)pSw65SI74Wu{VTc1GxZg^taA%`0l@mgep z+3XC=2$@i{cXEsAnMey7r&9M^8N@fpk?-lNmbtSzmmT9Ticg>d&3sAI{Z*^&{T{2i zsW0?S!VfExy1fppSLuOwb`$MtqxwPhf_Q*Rd#uQo;bAtXT;YY=?G>8iaeM8SWA`3j zdVZ7V9N9>>F_Rs&lq*9Vb&FR@#81jKm>$dJ9HtAb)r(Mf+u~1zT8`V>`@6@0z0&Qr z{3yUNfi(k2_AV($k=8+#*dTA>6Ez}A7zAzLsqsXnP$5!^puV zI^DN$|9$eXMb?~$V4CfI^bP4J^3v{j%Sy#&7IN*uRX+S$T66o zr>{9I1$OS*WgT4hAgIv0^GJs;nL_{Z`Tj|PFBEL${S%>4L&S{1tPPmb zQu&bYyXo3ZiQ6)&Aht>d2du z(29^val4X#*;E>AhHlrUNZEcQNXs!nY(n+rH?m8maZvs2OPaq~RR3?NK3Oy;8AF3H zrqM{oA*D-KP>Bg-2~RZ^t7P9IPsgeYBC{vKyoyzg#wzHR{!};mB5$JKLEQ8n7M_7^ zMUx9+BXluX+J*r%He``pqzw}SAsqp+bLBl`fus(=+M!;#R4vg znb-ed)5-H>-#Ho~3MhqU@{F!UZ$h>p;C15EY`1HRsLe;M!sf_Oi_RiIKqxda8t#dL zj}cLdQE8E8u)t^dUs*i7;FBW0Oy)Vu{{>N58Ky;Kdc^qR(bE{rm#=|)8ySrjOS2s; z$JRG&UC2@Xb=FE|w(#}YC_(FfAVe=tRzGKW4jHh8-=ixOz z;wdyLT@jydkyxF+&(uo(p|+`~%{uidqodD$CLNoz=2rJVv|Q@FYN{GBnHYy2iH#<{ zm!sl^eUzz$nxiv(qw4vjHje?roJ)RA$@1n$i#+$d>ZiBAxmUfoK1B6R&*Zh$wK>mg z9-ba2W@GAP1kN4Qm#kv$i#F>z?N!#zX5U^*aKt09M=uTHZQH#KNaRbTJdV%-1!1~6 zO*PF*B^qrLWh3lN1!sh_?fEU=>d^9k%eSJ@DUJ}R5@Lm33lX6gjIgBVAH)*tO=IC- zRE1ZFi>K+erbP7E?((vK#bQVKH^>|g=@n%V>R<$XUz}L;c3XDtxuJeDf&nrrYDsc; zmNAWa;DV0bQI5cUjglmr;H9~*t}6OTDaY7KCRSrr;bcR1+?=nb2aM#?Xu39Cz9UC- zZLGpys%+%B&DFJiImQ{kw%~Pjm4DkGPN2 z5uG8v@8ta$gBZI6eJA$+oW5A!27k2TME21jM{S5y{UrVBE>0_^D&3O7{V#4^wKTjb z`_3b1kCoG_dA-VG4yaytv|-rS$w~$nzA&{!i-78zXx1<~>o$$Fqm#@OYL_|z%9wR4<0FR<_6k$u0u z$3jH4N3UIpt51qGf^b|RZ`08cxWpgpN9O9RyWG#9Q1@(h(pkGhokLr%xs64o9vj&# z5!;g{BzRq5{$9AQO~cE>GtN!L3=bicv<2O+E;&&-jxM~zHEz)l72K>aIyceR{l%+_ z+xN)GzW!?C_7*L(%eu?@L1`PAHd0h;QYseXVAYr>Et#UA6G0P^smFDza`xZ}-V>M2 z?u;dN?`2##(l7kjXSnL)(N}l)4QqOaN2)N5Sjgd(&<6eaOdT?AYnz?X0~{!f4F zzq)=F05SpC!^V4X1zhYDK$vh}1=t~cFn{wM5Q>)sQTO$8*RO#)O%^E$ngvizAT|tM zK@9iL-v=Nf037&@`@x%O)8+cFxE~ZiiQxWa0fGjHton`YSx3*n<$fbr??>n1z0(Jmh zZWBI%!-j0ZPH@^6zdU6d;xc4Gk6Xn?Caqk+z}99 zM}hG9`UUW~wcm&Wa0UDhF+Lg(CMy(pzBZ!qAijiN^0 z1XfSdI{@!Xv_GSQnnI#JUlIKvXpHu|+_09_E+OZb@yO)Xo??~@R25ToE9}qj|2p?B zQ6uG`C~QhUvDPR_zU9@epykAgsDnj=Vtsq8OZ3E57M>vu?_KVE-an)`sA7Iw)Yi3b zO1OS$&ne8VhiYNGHS7h+e8LaLA(Bk;tB~6>JY@eKKv8%s&Svk}43h)9Zg&ckXji9( zszaj3jLrMAR*7Oh(Ib#bgD1~-S{Yc|ZP>CX<% zjIW>vFPgK~_|;X!SDnqQn!DgOvH#BdU;5TwSzoH|(b+Dv9q~w_zZh~|Z=_1MRx+K2 zhHS<$lCMR?x0TXHdtsTlA&X}>p#~9HegJgIwi|?$=|=*2P!j~PgC>akukQv@|E<^z z_)`9>V)LaMteS~S(?+1*J%v?_P*n-64tC_0ZdTPCa5@0ZTp$#_t`dG1zi7!m$4!j?hj)QDzK+G_OaRX0qPZiEY zsJaBaXz3l0EF|`-(1n}OKt^@gb!!4i&Ju+!rYoevh3gu%c9TyOC(&`{eRk!J0-Iehj4ve-^WH3}rk=Su<`#Rw~(9!Vf31hI5OgtSS zoBZipZ4L}u57IVn;k{lY{vHC}d~;HB&GiTAUs|4tIDUnHX?gy6|3cudXf(tW2Jcq| zT9zR+30M$62sQXcjE=`bvmLOItJxu;qAjXusz`MUp2!<9Cf{k>GFG8`Sb9y~_LJW* z8A!Mg656dN(sNJeGZj$w9Ff7O(NVfSfM`;3JJW*Aa8p${3nE~XHBUXFnUFaM>^I8* zh~sw0$r2o~HEz>P%7%iB*-mZrp&EaC`wb=vZmyyyDHfs{Hw2HxlRRS91^F{UB$;9f zQ#qD{EpC~T*r&D~dU%T+L5GR_7Pt-Te*w0}T1kF`?b9)EkokYm0TfS0t7gX(^ zd0wS?l5a6;z&PtdRb>Zf!tr0XmOfV3d3dQ`gGd}WQz+2hvgm@tB-%}01wr*%bS549g9Q|6xcY>tSXvR!pp@O&_vj~mj8)RLB;|_$l$dghHrVOEWF;YM`h#Yy4uWkbfD*S<2#>}8SFe#_Hye56)6J-N!hnS);TQi5U6 zSwokOcm^@KiBHE)72g$pIDHGQNH5)f2G6Z}*T%>){Z`-=ORwu??BkRJ^1YO7r>p$9 z&W=SIhgUJa`_6Em9#VUD@#hO4$FMLjgf5qX8F{8ct`=ADN?YLp&VgQ5UerxDZP>iH zVhW+=6L86PvLjs5vk|LfB1@Be9QK{`s{$Rx8==@Z`K=(8_@eW&h%(WYChf=Pb;n(~ zf6{urs5ezGWTMjTjWlO51 z6HobmN;j<&@Cux&w!J+joi;ZOP)Y{P`whpQ^B|7Fb@k@HF9>H((UD5`f8{?gN&NerO#pm*&Olrdp zQ@xHIdQrrfcx`RyV@};wUjx$j2kC~?U!Sm(2n{JYNNa}fU-IbEPEbCRBj8W&h$$dx9rTDzaj<9jLHab0tyeIM`L^T+JoO=v$7cCRcX#x>;d>A6 zWg<~?ZVs1-*r>ZrtqZHzRi$r(pK}jf{%m@pZVv?`>%h=V*(W8Dxoo?ko2x$} z2Th=#x8V+H{_7#QNJupDPj`s)I~j)f3q3pq3pq(YqPHge=pOZD4Q8 zBb5ooJqgNq?qHR0yL&_bhE|m5#UxR zfOYkBU8;M1UgNt$3LBjwRp8u<)F(&W>4i8isR(h*3dKmzUSCb?aSqyd^ki}RO7^)v z(K|ukUuBedX6cJAohP0Takw+_dNv}H2>p0-RQjA zyr=)fA`QS+$u%+L^Qf>@7$yR`u>fwI+E2)F#iv01U2l-0wC#E$0W=IKq#+$i9XCc} zBrz!i$#He2_J=k`mu-HIQ8|;@{NzM6PanwHffp$bNAd&4s`!p^`gss3L&Ou`_O_2UYa&;^^MK#m_ zaMJm(N20(G4cH{H>=-PdzRzF7($Ana+P-Re20>u3#%Fx}-S!={5u*2L;RRc2hcQ2muG?W+pG-c8Ww+CJa~5MR(Wny`?K zsw!%xR|b;2Cw*}H(BMUrI65Zbh-?HWh4?q;a~b=dWYR74K9J)ppGi^ZzL*6vIu@dSKgq*J(NS0-GE8)0?$wmiFJY{I*Qyzvc zjB9A(FRxEmZCE9X$1chv0E2eu1bu(wyjmp&@ZE1>V$n`&1Zcxi_$<`F2elo5 za8$ZB=H_4}!f)5q;UwIPWw3RM-c9kNn8NazL=*t(P6$p8<;xe>UH2s~Y^3?NV5Wd4WZ zuvyuEV6zeowm}z5Mk7)RiQX5=Aiypi0;V*e0g4?7K{$||XGqNUgP&}Hd8$2Kj7csu zidM&v6aOOK;7&!n+((G{l8|(z^bXPkw&t&O=$QpV?vO5CZm}8yn#X?W50=;jiT}HK zT`=}}4h*0`PSz?9L6^Ef4)kV}>Ii{JJvy zxjE`^61ac(RO!F_@(yM&!>&iT84PeSvV=usztUKI9J=6rh*Vr%-~pcdDI72$51l!U z^5gymY_rCO#y{e_8wjwG5*Rhs-a5@|mOO&hJ$|uW!Whj#)P2a%r79k!6Yld2R)X zq@v1O>JRtlVg?wWiY&tH%wn>Fd|zW5g8f=iC{zg>mzgr5WLA8F_+KWF0ol;ooy51c zPVrqB;P^KJMKZEPxO>d?M;A80@5ukn1pLoX5f{CL`L)bsC?_m#>!LrkgTJTJSM3VrrSAAKC=vik>G;v!$6fQK< z5?l^d=Wz_7DPDetP<0M9pAX5d4=aTwn0$bx5Gj%6IRrJBgsDq&LmtL%EW7vFSeg>* zC5JwS*p~(97hf2)81$pgGcfuIe1i z5*hSTM64gHw^Z@Uu91fP*$;1&Ixo_WWb?i`Tg_w3Zk7JA^TtD^Qlq6)oUFyRgWvTM zkL^Ba5cp{2NE+up)w_JMyqa`93+=%;7c4d^?_e zCe|AI@De2kf~vjKR%bIQQmjLvs&PC?_b2QT=TYQMMUo)F)M-gykiADz7Ijc5l1u}W zxR|VFOj_GZn!9g*Eg>rra0qw8l&jKFuMaqeNf()ANb07_IO10>H=KQO$hDBYbXOO= zzg<021F-G86`Eht>AW(@xG=-;__9aun0&5k`rtQ1cP#4Zxaf7_yJ1b0+MBzPWTI# zQ{6QmC%du3KRxsUk(E$6X5(8^LYkM-Bw)OE(v3Px z)|bpz0DU-eRJ_K7qVSi%k9*4P^l1!-2yuepi5C9#xNU!Fek>UTdrTO#bV?aKm!ueO z)rLy6s}iwA(^nIlGU|%4@nwFLLC3J$4DP*{R3uPp!FQ|c+3th!txvjoWIWg^b-G`E z2q>%WmwA1TZ+8W0vrXq|-;?=wZwS7*At8A7>K@B)MI(0aBb;u&zGpT(qlAQ+d9k=r zClD9;Y4z0%!M%hLABod?PJbvqb>i(KVY>8U-7_RLp<|3MLM;>71Dhkyed+aPUKQldN(2nrit)eXtQZV;FEb zqed11du$M$V0e~T?rqjx0bb$9#xokP^yOk*=aVuN`^2j+d^Paad!#s0_JerVQ`){= z@eZ>b*Kx(Om&Iy1VI;I(4Dtz$#JLRzOd3LnlCR8^VR;89pN%2$-O)+S;1bH{aRC+nYP?un})ySUe?kG$N+Jni3R_|t*h1`16^;a?~;>2<;v09lbl zY`*^|3QZa=0njO6tqRg8|5p$i@;`#mHYr1XgU~j4L;e7vZLo*@frY_;4Lm1+s!LiD z52L|>ZUCPaz6AK8$P6`4PV0>cyo;Tukf{XYAJxG+romO02B=W4}f5x z(J1+?BY21#9_9w1Zh%(oH}DKcAqBP^0gKcounnNz?14Q1Yt+^!IPxe!RHnG~i97(5 z0sl+>Hx3P;-f#pq3bI@0{8#WBz@q{58+gH?*Zu{215{D}kNi(?K-scY6N30Re7D^+ zv_0rkDB%0AL&JcVybgfEVZlHo8insc_yVvhIJ$ls4koun#Djy$Z4vR{D3rE{cz|_L zTU1TJu&6B-C}3067N-<2CkmkO)-Me#iNeRlaX*R!(BVVK_yYJ2xcvHQH~`%iHxiD# z2en0xgd^`kZLuTa$a_#*^hh}J9@G{;5{|rQeR~~ueca7J;rF1|OW?P+D3Wk=J*X{? zB!JcgrOm1Ua3U!ECgB0UzD1LSqw7I!@g(8sdQkXh7yk=@t_QWnlq3y!|JE0&>&Jk# zwz!h;FuX0YB!F)OHr_-M;^=x%TXacyh~5TY(jUVTp4e#fj&DF$b`*Z;#i#Ixj#Vma z)s!TUo+Zj znfrVt_kzn+3f)nF98 zZgb?iLzXC0v+Ub0?qdMd9`e+X*(>xk)%(*qHQf=rCqhz)4BqM%X?n7TkiY&oT7FeqI%ePh z*V&T*LiKfTmz^j?nMjsWvzVB+0eud9NdB!fbES|z@cQpy)LZm z5@ZpAlwfY?!v*7B?L@hQ8QvYU1sT(z1Z07x zv68kzxpzRmCUL-?Ck!rZ1raM3JIAwxqb{(drm(;i6cXCc68Pv@r>yw*!$h;AH2IKb-LuD40x`j51dGVLed8yLK|NMva#~OY+I-O_Y{bB8UlG*kO zZV=$2RFUA!jTQp?hD*cF)Eb{rDMQxqq`OU~2{{ZG3(dC7UcCR%={88aJY8`se}~Q+ z(#a#QpMI~q`f8K4o6bw0zytf5$`uYjO;~8!t4!|bPn+HKwtr{bjM%j`W-E78HrZ6J z5LcJ5Tp$&-ZAz=dt}lBC6ZuxFXyzZu7i-9mh*el1*0*TR!)m#S%a?r%bu(yA<0ogz zC{3yBX>^XdSbAn`i2we@-}p;Ni|27$Jr`x^U$4!xJ=J^;jZcrOUhEM-F=8Z^U&fD6HmeU#yYby=0o5K$u zzc~H;bh3x%>3N%~%p#@Y_1kjy2CP`Rn5I>^W|sW6GO}~Pnn$mEZ)X@?J9Bm3wL@}? za#Jh%0}LOz9bTTYsl(DTsQi+8OYX`l+r7>i0PZC12rhG4u?v-F?C|^T@j1 zWm*=xrXjZP6b$!F51Lvx&cre_{bYX?iLxVIqn_9E@hiz-GxL>&+_sjR?(#1|2(*Wk z981jYWp6GhJ5*ZnpUl3)S+*416cmV_5@QGWOJXdCt49Tw1y8Ax9H*f|Q7PWT%A!ND zht}XAh+$$WD0~c^0Hp|r{mT#UTW~RZj$GYnloHSlOg!9u7uoxIZ`wv!J3_Wt?^IBM@-iIoA42n+CWGh6hqlk z+uwwb5Uij(l<;xH4uG(LUWJA$C!=K;6mkRQ3Y@DembKLEvy0jcLNb3qjgRye; zy!1bncEu&Ixc1jvEA3{mEA29O_wHsvtuLNe{L&YK35Sb$!le`>LkO+BcZA*uxd{0S z(S+s-DGG`0=1Z9}_5}1A%xL!^ z{ozo?8#&=uP)75oKt@DP^T#)82EmNa_)-I*+?%=KED@_A6&CsdM}uEBV^frlu_5hUBHaA1>HDzohcJ zfV%l&{Zv)W%e5ZZK3|3cv&&{cq++TiQN_DS^esk&N z*U!|>vQxVx3VHg+6g=m)%Ut9g@}F(~-*N9Z<8NUF7w_Csx3d%d%2!$~wsF8NrePa9 zcI*5!$yWy8I}gRpn{-os8VD=1JnE8_CN8b(|M|Pzr(?Ij4LBwpTLKr@d%Pe!lFc15c%7 zqv*bll+e3LFUX4ZDZBPc${#XWcyhu+g3kI0-BKw<)c6+*sV;S4CQtnzZq&VK8LcXt zYF%(3-1)-k15ZrE`HxRro^nl)PyY8u<=pC*yEd^7Kf1A9{O8e6bBff6g}Fb!`54e2 zJ}_J4)~|fpdXcZs{#S2OvT@-r%zZ8v zvpvkq!(b)#`NNs3^5V_7ft{P61ypym*h-;2ra)b85VY;p1Svz;cK%baQ7GwZ=>u49yUd4{H5U3SdD-e__Fa2d^ zN&(3vVPTYv#pI37ldWCPq@42mv2l{R=@ko^tDnLmBIuscE31mjT~n;Am3@Nmo=&O| zQJI%-)wb|0Hv$VOAk=;?_{qxP>|Rr zL}QiKFLm7;Fi^VLKAZoLtZ}%Q@jii|eUtj+XT7uTlGzw_&iC*>^-P|l^4S^m^yOQW zxUGGpuraJ~Lm=O1*S@o^WFbEZLJCC0)+vZ?*U|JnQS7wodbdNWwWn0r3B4%G)#+!f za`AtSPHbFqa+k0Z`(hXAI`oDh-4g9jrTXJnJmAao|o68?X}jc zH2Hi>a=7NN&0?R)J4%Dsy{c~I2GWs6v--D|&$xgo#NDbmiF zGDWN2h;W3QXpjPV^CllZZu<4~I~lz|qW6?vJB*&qs1A))8T+I{rNcLaV&NG&)2H62 z>QK*-Qbls7kZir~*KUdPeD%B(iTBkK;o3RdZuad6UDNp_DXh6=dvv8tw1oeq7|-^H zByHi2z!yEKU#6ZqM9AkI5YxFlW|>h^{jNO4RCQzV-^wOpU&a?|os29oZ7)0QJ9Cpr zx93kif#p_nCtXZA8yEUnmeRLEf9f4E(cfElw-Q+*U#Bmg<68QSo;0`0WO?1K6L|+# zY}1v@Y+hXtne&WyW@R3?>Qn4&&v;e{*4okHr{9CDh5UwqpTRdA z>_tCI6bA45-q`1NyerbaA~SOHPFAYui?t#S_2;g8oOCGYSDl{_ zn6P_EQOIJY{4|fs7&m)vD99yl;$IwvP`?FGP(iR|gWi~d+w>@wnC{gBtt26hh(OJ> zWs%ZjrYMRnTF|h{^bwScj8R?$>{xPM&zjK{~KM=_ZHb&=AGLlq{rZA#&})?^e!RBlyANpOF8w1_n<>wLV?G#qt@i&|&EMIJ#~4YFSdI{aFDx*ecqrD>mP?o)m7SX%cgjg% z;U4v7P#+G75Dv?X1Z9WhbJSM%b~LuSzKY^5meg++zk${Me*U;@yS|d*fiof9Qv6>} z3X6k{hBe8qM|g^oK^qG$5D_Xv2rT=!V4<0n;$OCuL2jmxESAyKp(}~{UarfyZ5KC5 z6>RmRUGMQYc8phqsg(d`n3Jzv^P>W>0jE_S#|9B z@!a#K$L-hab(lSspCkSJ@XZGrPu4}9So*%^(_N?=tdoD=Rr1J+>^KPWvKu%3%xfMJThIGs~Qp)7uOT_F2tAq+%sQ9;Q&vG z=uX{9>9Mt0MhiR_Xt)=y4GO==2;OcN7+B8ydEmC$cXL-}_kdh#c} z$TpU$y>VK0S|WXQwq(Y`sY%A21@8vle?HP(@g%>x)wb!`w2MwBOzvN~y@Hk>E|~3C z7nN{f?CiQdTgS)_JS`Wz>G2}X+3@5yfhmd6N4bHDoA~-KQDn4+YlDVvd&-})|P4zM&cH?X=}^uIpfXDJ5L`Zn}Ql#2TqxRv#s3=Qp8nJ{3q zy-Ga$Czv}uht=V`KPNUKZq;w_JQy^%h_jKq5LCJ}vRcni@0vQyGs{CgPN%MCn?^9kI@N%*eLviPB zefiTpQun7F`dQz#gzVH^P+hjtcMC)Ooy{(r$5#_n{1jF{<%R}a#7+|U1xI^gD8p#L z4km;=3Y4Ln`!u+@Pf?4q_;YnX3$E@9TPB&me6%t(EmQZP`AeFT_U{=~V zLVjFl#jY*065m@ysNHltcEC2`NcP&VP!RFJ44N1%&HRPZV#&6J);feP9dDNNvv~o= zse$5j)_K;7o#l3snmxwT{kAEZg)7T;5Zx!$<*t;r65H6Z(cI%Kk3#~tC3BPOz&$gDJ!f!<~q_eL|gN;LQTxa=H)2rE45CUdpD>HWM zsb6JHebGnO6&+#j-~W1-#FAg0u8g_u+wMro+_YRqO|BYP^K(hIh1pBb>Bzt^6-x74HEVV!4ffrxO>-o2M?<#SpYAHH{98rwLl zcLS05F_|88@5CpCTbG0vE>AC)YLa+R)R(NH>yx<1c5+?7!iI1BYpYjr@t{yaG(s$$ zkfPp|gO>6h_31RxfCFh0b(@T1>n+5YKByS&m5f=SUJCrgd zZU-~J;nQx2vbK)&ySudfX^3Rn&P&tYQxmFB>?WO6-+Z!ud2`xp#gqHrPCWZ+M>+3V z(q+CM4RW!HnLLaYLfdlr-nH^2DimwCs9Q_YvnY(S38gjSHin<3Uh!UHsUR-UuH@t# zFz(r{)VZtrCG;I_)I0jsdOnuO6DMi|K5>0aV8Sy?rAhU#093GL0*^NF4XhBLy}T!+fp{(|GLi+y{zLr$e6 zeeeC_H5(Sh5%rV0^*ysMEUsC+#r?*;x=Rf|Zp3X}y(0g?jrP4~=uP#nKYctmc5C{X z*Y7v~{?+|m)!oO(Ud(Il_VtZ@hVL&%H1Xe^CzWk)tgAjdX>POHCuz^jGtXsOwyP^? zJ6DA|xg~bUM4eeVHgVm%1+2ZlmfX)hF>!oGgp*4ffl?OMkmbvF%3QcE@~cUq$D7}U z($^L>y131`=26NwU3L8K`BkdClAabddxBO*@7{RGCdHSH(54Z$H(#U{s4PGlRe zR2Yc6zs9{?xJKFEl^aaC39|oC#KP|nL4pkT5dplA;{K?yH|gMDF>`^3lo?Hvc7NTlc5 zI3?P;9;tg1t*>8XtGRCdNuR|XyFdHXOv-E`N(b;<*HoQzA(#IYucTS5DAhC4Lm}4e zaEo8A;$8-xH)=npbZ&0iY}J2w);B6|R!qpj zF5aWNXH3XXRe4es#ifYFNLg+azCRL|fJ7n$3$a~Y5Rf?Z83{;)+AiQ4f=HeJulG+^ zcck}^{5LBeb+}UKAMYP^s8Hz4zurIUP=d0*zNf**K*(d)4uv2lthOY38Ia@-T-zi6 zWG`;0W$-ipPe?ciF;aBm40gQO@UIjo$w?n2W#0elb^IGq2!U9C;^}lD9+3S%J9!?w{?9K|2uQ!N=!$5|xOmG)tdx1Gwv5>+)wd_E` z8k88=zhD^-hlAQ60FVl^8GHtVDg@3Ae!-#$|CGN2$m;&7eg_dax_^q_0d#f$)V_o0 zB;7xy?*OK{;OZTG8|L$6n?l(!cUasY_<9F_!lD5OtyzP=!Cc|@q2EBx88oi2e}idL z|CE}?DJ$bB6A@X`XKTZ zR%LM1@&m1bLc)Tt1Y}GyL|sfo*;#w-iXisNMSVzm;19rVCENekTggFiqxiejK(Ik^ zuka7y9>q)%&!*v9w*4%{~W{6eQ(Z<1_N`s(qk{nTGVl3wGS{JDn%iw)3n&kg{VoviU-4zAzJf400pwTN!U(&mA(s9pmdiHs-NusaL z$o-5zCmpod( zCz6r$_Rbb!Ng?UesnjtEGjS zpL^I&8S}T}PZ{;3>L<_6 zxp%Z1cs(J@eHT%DceujhFQsa68k&Ubd>IQG&M)7+u(Lk(ef@{V)D^SKPIp7n8Z~~^ zeKX68?qxomuexJ#^4)=n4fK@_={0^4>(f6hR1xdhb=BX0|5W$FoyA3=A6FlyI{kDX zclC{7aBzR2QBH8-V?ih3eyewVsoFQqa>#fcR-B^0NX^~5bNcF^784C_{uC&@# zzHGM8JdY(E%9^YBI-?w>XB~To)ujr8{l~;Ibfb^T3Vc9MEnJJ zi|AJM6U|!5rgVBiR;EFrzqV~}u6y(c7a>Klirf`yH9swW`^U9ejcp5xcQF%r)f+;eQ#Qz(A7WV%dhG$<9()gI%iGqz8)f_Des?LkXn$n zJ#vMFYj`_fefV*~75cC3S-*r_zxzfCW>Smfk~~svPj_F8bvw3Ep`f^A$>yij(~j*^ zt-d<{uJUqeUT?B!$+!OU<+7~@7?kq6b2=jznO~UY@I-dmozSF>4poe?&%G;rr-^0H zoSda*P2RpovDI_`8i!u_{SC#eol30PW$lT%^Iy!%sc?I{XnVe0+iQo%755SjJf1nt z)B4AB@eGNQ)a>%fMtcsOlILUIu8*X5zmhoMVyM4Am?77o5Zway3%^;d0^adUm)pPwjaKm%bfr@JX0pb|!GLmDX6rI6DUei--ff0!mh2HvLN6 zG0#NGdQtMM?iJpld#1*Jm0O#&b8D8B{C)NI>{DOWwdT9de;`S!@%ycq%ZGw#+Qg@|j)uQvt(C5Rsw3K--g5uKk&7x$JZUP+^Mjto zF8*HMV|rC=JWat()+V>ax#F4dSL>sBKNIG=wT>5dA9#4{a%ZEh0;S4K;NQ2Ji2c5Ob^MRJAUXoj%?8mWW@)`^f5Pemk~QypR68_vw-! z*FJnQ{N^>zP08z#c@HBKyZk-$n0Z!Q#!oANGD!y z>hLA((>I|jy%*}rFWufJ6xlu9h8x~tg>QZ)g%e=kKnl&kb=B7!B7a4@Rc`Fi&ZkflUT`0E#kfB#9I1*$C=E^3@0fD z*B2*=Gm^%6x(FJ&@Fs6EocfhNdDB;(n#9!PN}dhm7_#XR{`&&XwiX$Xm4kSYU?83_ zbe~hG;K2`W;NYeg{=+5qgAdtd4>aEJZEma)1op*sMb(6Y~dHMn;gRniR4typ; z133%3Pe;&69&sciIiGdjk0diGgGH&R6RN^F^?{WDWR@#pJE$b7%MFZNZh$g=Q4vDc|F4K(W0M&tq}&p-pmW+3lB)Q1g?Dg+l^&%?-PtzFALr>Ui&?>C?~W*!jmC z|L8Q$i*o;EygcdrINND`gl~d26MH?j&DS$%G}W&4ic0zUYvvBaKK_1>n5MirNtq4P zGfy#iS7_QrHf9rC40R-VmCqjVO#Xf1ZS%<+&1q7W`&>I!-^*;@lAOj@Z+0}~KzM6= zNN$hdYODPU++fQkDv1L3t40gkiwCMf2A8fl($fL6Rd(lk)o zo@e8{lfq)sVUIsc?mFCh%$Kn4%|xB*B0m51whd9mj)(sT%koaJszMA=$Y_pGS&$Ruy^4WC6^mt{ z01>!G3O~eKEU)Q!2)l*!|;w@)vgWEX5QC@kWp%c@p2N?YPlq4_7yFJ3r#4yFJ zb}>psp#{_KO8%D0QX~MgQJn4`Ldx<4Qb4>PWlsG9b!NMuic}Q~QewY6vQjM}Z1g=)^9Thh0zXZm8?XyZeJF>2WuU9~P~`Q`XDkbsBa& z=bhQ}jmuiN7U=s2FHz^iR46nD!Dnl=4<$FLc`h-?H4x2ICyt%Ez+mzi(YP^VrsQ>R zR3D=}A+%A=LUl}qUx+}2arUo)oWtgU`vR{wor-&Zb%M;13Dv4Lr%3Te^3QMkhIYz* zkBTvTp0KQb)5$02tCjt>*QSo~o7WmuDv}`*dNr<{r{&s;9kRryA;0!0G#+Z4r#^G3 zfhEtj!rYei?MwX|LqFfUsutoMwIuAfnC+hXN^j{lXFtfupF9w}yT5gV=cAw7Gz^1m zD+`a@u)Nwdv)Db_Xa|qVioAYq@ZlD9^bgA3^9TBv7fq<^vfOm1E9b*&lZzXgt$nq7 zgg+AAwE>-m?6@_TO+PFR6sTBy=jW`b@NGPieRmHV5H{U9Y^?0|XT*J?8hSNDvxFM^OqK<#X|cd5)-Esv@rI_=ALOI_YQbIb|p&_kuGgwvyz zIh*9xADm{JIxR|S^V4$$`zAW6f0H)kop?&{9i_{^Vb#u61KB+)w}aa5mgT5^r*As+ zDB3p7*oRBfCMvru1)c!HC5_Qyy&qHy1BQha=8I}$R||_D<1uA|s?J_x(J7gl((G#9 zYqxmIUV42jPA$(u#y`nR)L-nVMiXU#Qu+R4wtJ@P5)JAvA6LCO4R5Nq%{)AgdOrQj zH`gb}wLVvFUuJv%x>kWw?>5#BQhV=@c-Lj_?@}|~KlD}NW3+{|pXCXBvDPW(hgg7d z_dW;Xnk(VHC41(q$&`24d;Hy%MK!1TEBJo%2Y6Zjj#ExOo6zhktNh`hvu#Wqb(OZ_ zwYeWxf7rkJgk04uwS(W|=4G2ny55y=lyld`n4TA8x5cZ$!DinE#wJ$spX}hTb|k1qD$BgAj!; z{Bz@F!ktW1n+_}pnqoD{qB*=WkoZf*m>h04z%nTAQVBOE*+<342xnScg>+!<1{zID zKzE5(C?N}i>ndc`gf9807%H}QnN3Qp(#JCCb_?JB*FXXqJZ9(U5ovj67A9Gv#|WGhzILTZAaRoN^Ql*iX6H&)#ykk=J#UsJso4-`*V`|=)1atH zgw-xXz9##$j;AoQODLE}Wq&ZYxI}gh860pa1}rF4ctx?yC3d9 zO%QrA?xSICf^c`*B!KOt@*fYsXa?+>w0#wA@f(d*_EJJeV|~9=XID)>WJ;NxWgawM zc02i6?7;xiSFMxnNqkZdTvirG4XVLq+>1t)x4x(u4l!!D@Z%SkjBY8IAM)bFBzscuodO^YB>LNT$LbD^S4SZ^oKRy##T`Q+6p%Z?{@P6vJykMand}f>N=yi)t zTs4e7^2{!lyu$y{;H;0fcA|vHvYZN3m@-{i<%Dy>8qrXwIdRdi; za-NaFfur$~(vcgIL>k+-0`49t9yauu(@P4)!+`&Tn+5Y*{eRu0RN_cCDdw#mDjr51 zuH^LBO*&l8X@r|}sHoEjH!1ruz(}|xNr$8-gU-m|Kf$fY;qWA4bDsfj7bav_!8|hH zew)E|FbzL}{~M}|kuggalRkqz9sKWu-~S9vhZ~dN4r9*1gg$obV9Vc$41XGIAAFoj zo&lAupkx?$fd@acy|3Un{_Fo_k3M|z{;U6UFmDQY-v0Olso?vBD=d&UW!V2Xcr){V z=S)F;ng1zg3a57y^Gw3sPA)D^b^?|EoJECQH;lp6*$FPrze_?(h{4Z)v%g@K#K7gr z7P}pCf8{43deF1;upuL&> z1-m*t?DvF|L>*BT3sDj4>XKW;_LjR~Z6*r|yP{l~?LxiLAa zf>AlDf(?pa*u3B#GV~nw!D0zW1{vf?=Ojn$*C1+HLghan(V0dbe(t^mxXEm3@9hkq zsO&UG@LuyxPCkUS?Day{8f=b)L5M+yA@;Yy1w$qw?Ayp);NT$tGT7iEAX0$+{=ve6 zP~vRx8{0DrK4AQ4>|`>-TmCPj4Q8kz3?`g`OhfEp22;gAiM_#Z=xA*4{FmV{dN&9< zHq5OEg9r$*2gt;^lms-g_qBKP*nsT@?9QLf7(om<>f=ENOTHR z*u=Hbs5A`wIr>o152XB(T_+YVH641M#RGa=Eub77-v5Q zJDLR7M!~}rL=iGZRyq1X^b3|2inEQ#grF48HX;L{k87hrjw_%b9M96o>|hR#HY$UT zDH0rQkj)ND<%DY^W9g(g+knjC@P!D0=jgLkCi*Oeoseqe`$%*s6^WlSi9x}#N{sAB zBr(WXf-ufD8VL)G;b?=4Z+Lvh&N>#*!qJZe<-qalg(HI{sp9Ad*o-C0;%tNb3P>9q zZ#)i>sSGSai1S$r6TAP!*~S3mMcRO5;M-_K*+X)I8PqaT?; z!6e9$ZLl9G>?wd7_-82;GIF+{Vmg*1ZRGoiu;o~GG+Y}M%h|@+Mq!6FakkNz2tGq_ z4wej!vmYdS!|yKz$U8CylU*y7^I19(V}KlOx^VRI`=-kU?bk4eDOhV**7Rn{#_XD!

Tm?iNFs6B=PtMJO$k&DhBr@L3^EF520NIK^L-4EWANj` z(MQ(Hq+qft=d(;2ItG)0g&lJAgYZI3DZsVC1|w~BgpNZPC4xf`K#7h)qCz+VK9$!GEM{pOEFihX(d{&o) z@LHgL2rUK(AhHit7Z^4EeE>kT4WZ*iDis7avCXhbYtn{&^x#jSzW=MyI1Nr87~u zFu3d+Ll+C^=A1JFr5kW8k@H3a#RqwoiSm80j&gYx<)L)o3wS)B6G2kM%?~6ML{Fgu z$-&bII#{@n_dy0nJPyGQVeU5$7|;}so_ATlnQiSTJUg^by< zINt|k5`TW_6qF~^f!rhG>SD>NaPNbB!^rzUj6l`_R|W9+Ob2ln=|^Q->Nw|21;UFT zmxj_h=!MtM;qbEWHE_O&g>-_scs6kixLEIS_O z{D3bY`^zLF@+6(fw*PTF3mOwbOF#lfa0s|6f-ejb18oD_10F66GH8joIWyp*BHE9E zoCOBpB=RiiQOL8P*W&St0SXNsR=`#f9AZHBXJiae)sSbw^olkV&SX{lF}M=K(NdL@s7RgdZ|4g@)`oa58jlpnf1Q0A&(w1IYvjKd_{sV{nm6 zfE*w;3cy@s&q0ks_z@FCWn?ZiAb|L}0G-62EhaD|1TLW3BQT}I>B7H{K}2+2CfLpJ zXCKU+=v;tK;&F`$n}o+n*hxemWP&LJH)pW1gVBg{e$Yn6%g>lC9!n@c5>LT`M@HXA z0{b@7kBQfrKxzj?58sbW#q$(Qy2sOE&{o09h<_GzKOFp^A02N4fSmmZ?4gazI-mvs zZZDx9SibS&g1V2kA-XPD+&~LMo(0N=Zv$H?ZZF}Ao-SgmC(=M#L7ruz&(g{0oat;g zB|}H^k-!4Pep6 z%>~S1cv%YiA-WJ$-ykC}AOpf6??ZJfcoq&Jeq1scFaMFqpnl+S12fGb&r*oUSs;UX z2!AfY?}E@FSOs2IflWjFLeR#(z|BF2Kte)j31|VxzJUoIFY5s}Vj%Q@45|yVUK+c4 z4ClDO_wo2l1~CVZSHSfVJ_uG2bl>QpG~xFHOr>}p3cek@+zHkzcIXG^{6H{9o&|vt ze=f=3-a_yNL^A{iOgNbM`2pR{8o27AS1GheIy%Y)wIjdk9!D7%jV2`<6k!N*5g2DF#)diU!h`ac+ zMS&ay$g^NHM_>xJG-O;Fupj)mG#awMz{&7BF<5C({tI42y!;IPnCSk(l#%&?0FA&3 zgiQQ;!5W6}eO)3Aultd7iKzSnW_i4fj>Tvoyi6C&Qh0g~(mg^;FqH&&cSGPFknNFHn74YZ4A5~4_JWeLR3IgWF0`VkvW4Li@*TfYIu4Dt{4P| zz+8gRM4(J~TM!uVm;hmaMf<@a#Nz=CV(#$!3l>mB&IVU6 za^AqpgpL8IhCd%*K0s(9c*@Z|2NHqDD;n6PkoPeVJ30y22XfvZR0y35utq#BfyhC; z51d2?@ee;Pup{JbfsYEIf0(-)f6l>5h~OF>LOSsKK?nOO@+=u3h&#Wa#Us3e4!$R} z9|O^yK?+0m5<>3Kw(ipyAsiMr> zT|D3$U^4rgF=4IVJaDi9<25#68bhUABO}s$Cesj<7=|H}Y-~8sXfEM@cUec6J6F`l Y*WTNg9p?%PJcCIVRac+C#6Proposed Solution + +Abandon the guarantee. You will see every record if no changes +occur during your traversal, otherwise you will see some subset. +You can prevent changes by using a transaction or the locking +API. + +2.3 Nesting of Transactions Is Fraught + +TDB has alternated between allowing nested transactions and not +allowing them. Various paths in the Samba codebase assume that +transactions will nest, and in a sense they can: the operation is +only committed to disk when the outer transaction is committed. +There are two problems, however: + +1. Canceling the inner transaction will cause the outer + transaction commit to fail, and will not undo any operations + since the inner transaction began. This problem is soluble with + some additional internal code. + +2. An inner transaction commit can be cancelled by the outer + transaction. This is desirable in the way which Samba's + database initialization code uses transactions, but could be a + surprise to any users expecting a successful transaction commit + to expose changes to others. + +The current solution is to specify the behavior at tdb_open(), +with the default currently that nested transactions are allowed. +This flag can also be changed at runtime. + +2.3.1 Proposed Solution + +Given the usage patterns, it seems that the “least-surprise” +behavior of disallowing nested transactions should become the +default. Additionally, it seems the outer transaction is the only +code which knows whether inner transactions should be allowed, so +a flag to indicate this could be added to tdb_transaction_start. +However, this behavior can be simulated with a wrapper which uses +tdb_add_flags() and tdb_remove_flags(), so the API should not be +expanded for this relatively-obscure case. + +2.4 Incorrect Hash Function is Not Detected + +tdb_open_ex() allows the calling code to specify a different hash +function to use, but does not check that all other processes +accessing this tdb are using the same hash function. The result +is that records are missing from tdb_fetch(). + +2.4.1 Proposed Solution + +The header should contain an example hash result (eg. the hash of +0xdeadbeef), and tdb_open_ex() should check that the given hash +function produces the same answer, or fail the tdb_open call. + +2.5 tdb_set_max_dead/TDB_VOLATILE Expose Implementation + +In response to scalability issues with the free list ([TDB-Freelist-Is] +) two API workarounds have been incorporated in TDB: +tdb_set_max_dead() and the TDB_VOLATILE flag to tdb_open. The +latter actually calls the former with an argument of “5”. + +This code allows deleted records to accumulate without putting +them in the free list. On delete we iterate through each chain +and free them in a batch if there are more than max_dead entries. +These are never otherwise recycled except as a side-effect of a +tdb_repack. + +2.5.1 Proposed Solution + +With the scalability problems of the freelist solved, this API +can be removed. The TDB_VOLATILE flag may still be useful as a +hint that store and delete of records will be at least as common +as fetch in order to allow some internal tuning, but initially +will become a no-op. + +2.6 TDB Files Cannot Be Opened Multiple Times + In The Same Process + +No process can open the same TDB twice; we check and disallow it. +This is an unfortunate side-effect of fcntl locks, which operate +on a per-file rather than per-file-descriptor basis, and do not +nest. Thus, closing any file descriptor on a file clears all the +locks obtained by this process, even if they were placed using a +different file descriptor! + +Note that even if this were solved, deadlock could occur if +operations were nested: this is a more manageable programming +error in most cases. + +2.6.1 Proposed Solution + +We could lobby POSIX to fix the perverse rules, or at least lobby +Linux to violate them so that the most common implementation does +not have this restriction. This would be a generally good idea +for other fcntl lock users. + +Samba uses a wrapper which hands out the same tdb_context to +multiple callers if this happens, and does simple reference +counting. We should do this inside the tdb library, which already +emulates lock nesting internally; it would need to recognize when +deadlock occurs within a single process. This would create a new +failure mode for tdb operations (while we currently handle +locking failures, they are impossible in normal use and a process +encountering them can do little but give up). + +I do not see benefit in an additional tdb_open flag to indicate +whether re-opening is allowed, as though there may be some +benefit to adding a call to detect when a tdb_context is shared, +to allow other to create such an API. + +2.7 TDB API Is Not POSIX Thread-safe + +The TDB API uses an error code which can be queried after an +operation to determine what went wrong. This programming model +does not work with threads, unless specific additional guarantees +are given by the implementation. In addition, even +otherwise-independent threads cannot open the same TDB (as in [TDB-Files-Cannot] +). + +2.7.1 Proposed Solution + +Reachitecting the API to include a tdb_errcode pointer would be a +great deal of churn; we are better to guarantee that the +tdb_errcode is per-thread so the current programming model can be +maintained. + +This requires dynamic per-thread allocations, which is awkward +with POSIX threads (pthread_key_create space is limited and we +cannot simply allocate a key for every TDB). + +Internal locking is required to make sure that fcntl locks do not +overlap between threads, and also that the global list of tdbs is +maintained. + +The aim is that building tdb with -DTDB_PTHREAD will result in a +pthread-safe version of the library, and otherwise no overhead +will exist. + +2.8 *_nonblock Functions And *_mark Functions Expose + Implementation + +CTDB[footnote: +Clustered TDB, see http://ctdb.samba.org +] wishes to operate on TDB in a non-blocking manner. This is +currently done as follows: + +1. Call the _nonblock variant of an API function (eg. + tdb_lockall_nonblock). If this fails: + +2. Fork a child process, and wait for it to call the normal + variant (eg. tdb_lockall). + +3. If the child succeeds, call the _mark variant to indicate we + already have the locks (eg. tdb_lockall_mark). + +4. Upon completion, tell the child to release the locks (eg. + tdb_unlockall). + +5. Indicate to tdb that it should consider the locks removed (eg. + tdb_unlockall_mark). + +There are several issues with this approach. Firstly, adding two +new variants of each function clutters the API for an obscure +use, and so not all functions have three variants. Secondly, it +assumes that all paths of the functions ask for the same locks, +otherwise the parent process will have to get a lock which the +child doesn't have under some circumstances. I don't believe this +is currently the case, but it constrains the implementation. + +2.8.1 Proposed Solution + +Implement a hook for locking methods, so that the caller can +control the calls to create and remove fcntl locks. In this +scenario, ctdbd would operate as follows: + +1. Call the normal API function, eg tdb_lockall(). + +2. When the lock callback comes in, check if the child has the + lock. Initially, this is always false. If so, return 0. + Otherwise, try to obtain it in non-blocking mode. If that + fails, return EWOULDBLOCK. + +3. Release locks in the unlock callback as normal. + +4. If tdb_lockall() fails, see if we recorded a lock failure; if + so, call the child to repeat the operation. + +5. The child records what locks it obtains, and returns that + information to the parent. + +6. When the child has succeeded, goto 1. + +This is flexible enough to handle any potential locking scenario, +even when lock requirements change. It can be optimized so that +the parent does not release locks, just tells the child which +locks it doesn't need to obtain. + +It also keeps the complexity out of the API, and in ctdbd where +it is needed. + +2.9 tdb_chainlock Functions Expose Implementation + +tdb_chainlock locks some number of records, including the record +indicated by the given key. This gave atomicity guarantees; +no-one can start a transaction, alter, read or delete that key +while the lock is held. + +It also makes the same guarantee for any other key in the chain, +which is an internal implementation detail and potentially a +cause for deadlock. + +2.9.1 Proposed Solution + +None. It would be nice to have an explicit single entry lock +which effected no other keys. Unfortunately, this won't work for +an entry which doesn't exist. Thus while chainlock may be +implemented more efficiently for the existing case, it will still +have overlap issues with the non-existing case. So it is best to +keep the current (lack of) guarantee about which records will be +effected to avoid constraining our implementation. + +2.10 Signal Handling is Not Race-Free + +The tdb_setalarm_sigptr() call allows the caller's signal handler +to indicate that the tdb locking code should return with a +failure, rather than trying again when a signal is received (and +errno == EAGAIN). This is usually used to implement timeouts. + +Unfortunately, this does not work in the case where the signal is +received before the tdb code enters the fcntl() call to place the +lock: the code will sleep within the fcntl() code, unaware that +the signal wants it to exit. In the case of long timeouts, this +does not happen in practice. + +2.10.1 Proposed Solution + +The locking hooks proposed in[Proposed-Solution-locking-hook] +would allow the user to decide on whether to fail the lock +acquisition on a signal. This allows the caller to choose their +own compromise: they could narrow the race by checking +immediately before the fcntl call.[footnote: +It may be possible to make this race-free in some implementations +by having the signal handler alter the struct flock to make it +invalid. This will cause the fcntl() lock call to fail with +EINVAL if the signal occurs before the kernel is entered, +otherwise EAGAIN. +] + +2.11 The API Uses Gratuitous Typedefs, Capitals + +typedefs are useful for providing source compatibility when types +can differ across implementations, or arguably in the case of +function pointer definitions which are hard for humans to parse. +Otherwise it is simply obfuscation and pollutes the namespace. + +Capitalization is usually reserved for compile-time constants and +macros. + + TDB_CONTEXT There is no reason to use this over 'struct + tdb_context'; the definition isn't visible to the API user + anyway. + + TDB_DATA There is no reason to use this over struct TDB_DATA; + the struct needs to be understood by the API user. + + struct TDB_DATA This would normally be called 'struct + tdb_data'. + + enum TDB_ERROR Similarly, this would normally be enum + tdb_error. + +2.11.1 Proposed Solution + +None. Introducing lower case variants would please pedants like +myself, but if it were done the existing ones should be kept. +There is little point forcing a purely cosmetic change upon tdb +users. + +2.12 tdb_log_func Doesn't Take The + Private Pointer + +For API compatibility reasons, the logging function needs to call +tdb_get_logging_private() to retrieve the pointer registered by +the tdb_open_ex for logging. + +2.12.1 Proposed Solution + +It should simply take an extra argument, since we are prepared to +break the API/ABI. + +2.13 Various Callback Functions Are Not Typesafe + +The callback functions in tdb_set_logging_function (after [tdb_log_func-Doesnt-Take] + is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read +and tdb_check all take void * and must internally convert it to +the argument type they were expecting. + +If this type changes, the compiler will not produce warnings on +the callers, since it only sees void *. + +2.13.1 Proposed Solution + +With careful use of macros, we can create callback functions +which give a warning when used on gcc and the types of the +callback and its private argument differ. Unsupported compilers +will not give a warning, which is no worse than now. In addition, +the callbacks become clearer, as they need not use void * for +their parameter. + +See CCAN's typesafe_cb module at +http://ccan.ozlabs.org/info/typesafe_cb.html + +2.14 TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, + tdb_reopen_all Problematic + +The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB +file should be cleared if the caller discovers it is the only +process with the TDB open. However, if any caller does not +specify TDB_CLEAR_IF_FIRST it will not be detected, so will have +the TDB erased underneath them (usually resulting in a crash). + +There is a similar issue on fork(); if the parent exits (or +otherwise closes the tdb) before the child calls tdb_reopen_all() +to establish the lock used to indicate the TDB is opened by +someone, a TDB_CLEAR_IF_FIRST opener at that moment will believe +it alone has opened the TDB and will erase it. + +2.14.1 Proposed Solution + +Remove TDB_CLEAR_IF_FIRST. Other workarounds are possible, but +see [TDB_CLEAR_IF_FIRST-Imposes-Performance]. + +3 Performance And Scalability Issues + +3.1 TDB_CLEAR_IF_FIRST + Imposes Performance Penalty + +When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is +placed at offset 4 (aka. the ACTIVE_LOCK). While these locks +never conflict in normal tdb usage, they do add substantial +overhead for most fcntl lock implementations when the kernel +scans to detect if a lock conflict exists. This is often a single +linked list, making the time to acquire and release a fcntl lock +O(N) where N is the number of processes with the TDB open, not +the number actually doing work. + +In a Samba server it is common to have huge numbers of clients +sitting idle, and thus they have weaned themselves off the +TDB_CLEAR_IF_FIRST flag.[footnote: +There is a flag to tdb_reopen_all() which is used for this +optimization: if the parent process will outlive the child, the +child does not need the ACTIVE_LOCK. This is a workaround for +this very performance issue. +] + +3.1.1 Proposed Solution + +Remove the flag. It was a neat idea, but even trivial servers +tend to know when they are initializing for the first time and +can simply unlink the old tdb at that point. + +3.2 TDB Files Have a 4G Limit + +This seems to be becoming an issue (so much for “trivial”!), +particularly for ldb. + +3.2.1 Proposed Solution + +A new, incompatible TDB format which uses 64 bit offsets +internally rather than 32 bit as now. For simplicity of endian +conversion (which TDB does on the fly if required), all values +will be 64 bit on disk. In practice, some upper bits may be used +for other purposes, but at least 56 bits will be available for +file offsets. + +tdb_open() will automatically detect the old version, and even +create them if TDB_VERSION6 is specified to tdb_open. + +32 bit processes will still be able to access TDBs larger than 4G +(assuming that their off_t allows them to seek to 64 bits), they +will gracefully fall back as they fail to mmap. This can happen +already with large TDBs. + +Old versions of tdb will fail to open the new TDB files (since 28 +August 2009, commit 398d0c29290: prior to that any unrecognized +file format would be erased and initialized as a fresh tdb!) + +3.3 TDB Records Have a 4G Limit + +This has not been a reported problem, and the API uses size_t +which can be 64 bit on 64 bit platforms. However, other limits +may have made such an issue moot. + +3.3.1 Proposed Solution + +Record sizes will be 64 bit, with an error returned on 32 bit +platforms which try to access such records (the current +implementation would return TDB_ERR_OOM in a similar case). It +seems unlikely that 32 bit keys will be a limitation, so the +implementation may not support this (see [sub:Records-Incur-A]). + +3.4 Hash Size Is Determined At TDB Creation Time + +TDB contains a number of hash chains in the header; the number is +specified at creation time, and defaults to 131. This is such a +bottleneck on large databases (as each hash chain gets quite +long), that LDB uses 10,000 for this hash. In general it is +impossible to know what the 'right' answer is at database +creation time. + +3.4.1 Proposed Solution + +After comprehensive performance testing on various scalable hash +variants[footnote: +http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 +This was annoying because I was previously convinced that an +expanding tree of hashes would be very close to optimal. +], it became clear that it is hard to beat a straight linear hash +table which doubles in size when it reaches saturation. There are +three details which become important: + +1. On encountering a full bucket, we use the next bucket. + +2. Extra hash bits are stored with the offset, to reduce + comparisons. + +3. A marker entry is used on deleting an entry. + +The doubling of the table must be done under a transaction; we +will not reduce it on deletion, so it will be an unusual case. It +will either be placed at the head (other entries will be moved +out the way so we can expand). We could have a pointer in the +header to the current hashtable location, but that pointer would +have to be read frequently to check for hashtable moves. + +The locking for this is slightly more complex than the chained +case; we currently have one lock per bucket, and that means we +would need to expand the lock if we overflow to the next bucket. +The frequency of such collisions will effect our locking +heuristics: we can always lock more buckets than we need. + +One possible optimization is to only re-check the hash size on an +insert or a lookup miss. + +3.5 TDB Freelist Is Highly Contended + +TDB uses a single linked list for the free list. Allocation +occurs as follows, using heuristics which have evolved over time: + +1. Get the free list lock for this whole operation. + +2. Multiply length by 1.25, so we always over-allocate by 25%. + +3. Set the slack multiplier to 1. + +4. Examine the current freelist entry: if it is > length but < + the current best case, remember it as the best case. + +5. Multiply the slack multiplier by 1.05. + +6. If our best fit so far is less than length * slack multiplier, + return it. The slack will be turned into a new free record if + it's large enough. + +7. Otherwise, go onto the next freelist entry. + +Deleting a record occurs as follows: + +1. Lock the hash chain for this whole operation. + +2. Walk the chain to find the record, keeping the prev pointer + offset. + +3. If max_dead is non-zero: + + (a) Walk the hash chain again and count the dead records. + + (b) If it's more than max_dead, bulk free all the dead ones + (similar to steps 4 and below, but the lock is only obtained + once). + + (c) Simply mark this record as dead and return. + +4. Get the free list lock for the remainder of this operation. + +5. Examine the following block to see if it is + free; if so, enlarge the current block and remove that block + from the free list. This was disabled, as removal from the free + list was O(entries-in-free-list). + +6. Examine the preceeding block to see if it is free: for this + reason, each block has a 32-bit tailer which indicates its + length. If it is free, expand it to cover our new block and + return. + +7. Otherwise, prepend ourselves to the free list. + +Disabling right-merging (step [right-merging]) causes +fragmentation; the other heuristics proved insufficient to +address this, so the final answer to this was that when we expand +the TDB file inside a transaction commit, we repack the entire +tdb. + +The single list lock limits our allocation rate; due to the other +issues this is not currently seen as a bottleneck. + +3.5.1 Proposed Solution + +The first step is to remove all the current heuristics, as they +obviously interact, then examine them once the lock contention is +addressed. + +The free list must be split to reduce contention. Assuming +perfect free merging, we can at most have 1 free list entry for +each entry. This implies that the number of free lists is related +to the size of the hash table, but as it is rare to walk a large +number of free list entries we can use far fewer, say 1/32 of the +number of hash buckets. + +There are various benefits in using per-size free lists (see [sub:TDB-Becomes-Fragmented] +) but it's not clear this would reduce contention in the common +case where all processes are allocating/freeing the same size. +Thus we almost certainly need to divide in other ways: the most +obvious is to divide the file into zones, and using a free list +(or set of free lists) for each. This approximates address +ordering. + +Note that this means we need to split the free lists when we +expand the file; this is probably acceptable when we double the +hash table size, since that is such an expensive operation +already. In the case of increasing the file size, there is an +optimization we can use: if we use M in the formula above as the +file size rounded up to the next power of 2, we only need +reshuffle free lists when the file size crosses a power of 2 +boundary, and reshuffling the free lists is trivial: we simply +merge every consecutive pair of free lists. + +The basic algorithm is as follows. Freeing is simple: + +1. Identify the correct zone. + +2. Lock the corresponding list. + +3. Re-check the zone (we didn't have a lock, sizes could have + changed): relock if necessary. + +4. Place the freed entry in the list for that zone. + +Allocation is a little more complicated, as we perform delayed +coalescing at this point: + +1. Pick a zone either the zone we last freed into, or based on a “ + random” number. + +2. Lock the corresponding list. + +3. Re-check the zone: relock if necessary. + +4. If the top entry is -large enough, remove it from the list and + return it. + +5. Otherwise, coalesce entries in the list.If there was no entry + large enough, unlock the list and try the next zone. + +6. If no zone satisfies, expand the file. + +This optimizes rapid insert/delete of free list entries by not +coalescing them all the time.. First-fit address ordering +ordering seems to be fairly good for keeping fragmentation low +(see [sub:TDB-Becomes-Fragmented]). Note that address ordering +does not need a tailer to coalesce, though if we needed one we +could have one cheaply: see [sub:Records-Incur-A]. + +I anticipate that the number of entries in each free zone would +be small, but it might be worth using one free entry to hold +pointers to the others for cache efficiency. + +3.6 TDB Becomes Fragmented + +Much of this is a result of allocation strategy[footnote: +The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 +ftp://ftp.cs.utexas.edu/pub/garbage/malloc/ismm98.ps +] and deliberate hobbling of coalescing; internal fragmentation +(aka overallocation) is deliberately set at 25%, and external +fragmentation is only cured by the decision to repack the entire +db when a transaction commit needs to enlarge the file. + +3.6.1 Proposed Solution + +The 25% overhead on allocation works in practice for ldb because +indexes tend to expand by one record at a time. This internal +fragmentation can be resolved by having an “expanded” bit in the +header to note entries that have previously expanded, and +allocating more space for them. + +There are is a spectrum of possible solutions for external +fragmentation: one is to use a fragmentation-avoiding allocation +strategy such as best-fit address-order allocator. The other end +of the spectrum would be to use a bump allocator (very fast and +simple) and simply repack the file when we reach the end. + +There are three problems with efficient fragmentation-avoiding +allocators: they are non-trivial, they tend to use a single free +list for each size, and there's no evidence that tdb allocation +patterns will match those recorded for general allocators (though +it seems likely). + +Thus we don't spend too much effort on external fragmentation; we +will be no worse than the current code if we need to repack on +occasion. More effort is spent on reducing freelist contention, +and reducing overhead. + +3.7 Records Incur A 28-Byte Overhead + +Each TDB record has a header as follows: + +struct tdb_record { + + tdb_off_t next; /* offset of the next record in the list +*/ + + tdb_len_t rec_len; /* total byte length of record */ + + tdb_len_t key_len; /* byte length of key */ + + tdb_len_t data_len; /* byte length of data */ + + uint32_t full_hash; /* the full 32 bit hash of the key */ + + uint32_t magic; /* try to catch errors */ + + /* the following union is implied: + + union { + + char record[rec_len]; + + struct { + + char key[key_len]; + + char data[data_len]; + + } + + uint32_t totalsize; (tailer) + + } + + */ + +}; + +Naively, this would double to a 56-byte overhead on a 64 bit +implementation. + +3.7.1 Proposed Solution + +We can use various techniques to reduce this for an allocated +block: + +1. The 'next' pointer is not required, as we are using a flat + hash table. + +2. 'rec_len' can instead be expressed as an addition to key_len + and data_len (it accounts for wasted or overallocated length in + the record). Since the record length is always a multiple of 8, + we can conveniently fit it in 32 bits (representing up to 35 + bits). + +3. 'key_len' and 'data_len' can be reduced. I'm unwilling to + restrict 'data_len' to 32 bits, but instead we can combine the + two into one 64-bit field and using a 5 bit value which + indicates at what bit to divide the two. Keys are unlikely to + scale as fast as data, so I'm assuming a maximum key size of 32 + bits. + +4. 'full_hash' is used to avoid a memcmp on the “miss” case, but + this is diminishing returns after a handful of bits (at 10 + bits, it reduces 99.9% of false memcmp). As an aside, as the + lower bits are already incorporated in the hash table + resolution, the upper bits should be used here. + +5. 'magic' does not need to be enlarged: it currently reflects + one of 5 values (used, free, dead, recovery, and + unused_recovery). It is useful for quick sanity checking + however, and should not be eliminated. + +6. 'tailer' is only used to coalesce free blocks (so a block to + the right can find the header to check if this block is free). + This can be replaced by a single 'free' bit in the header of + the following block (and the tailer only exists in free + blocks).[footnote: +This technique from Thomas Standish. Data Structure Techniques. +Addison-Wesley, Reading, Massachusetts, 1980. +] The current proposed coalescing algorithm doesn't need this, + however. + +This produces a 16 byte used header like this: + +struct tdb_used_record { + + uint32_t magic : 16, + + prev_is_free: 1, + + key_data_divide: 5, + + top_hash: 10; + + uint32_t extra_octets; + + uint64_t key_and_data_len; + +}; + +And a free record like this: + +struct tdb_free_record { + + uint32_t free_magic; + + uint64_t total_length; + + ... + + uint64_t tailer; + +}; + + + +3.8 Transaction Commit Requires 4 fdatasync + +The current transaction algorithm is: + +1. write_recovery_data(); + +2. sync(); + +3. write_recovery_header(); + +4. sync(); + +5. overwrite_with_new_data(); + +6. sync(); + +7. remove_recovery_header(); + +8. sync(); + +On current ext3, each sync flushes all data to disk, so the next +3 syncs are relatively expensive. But this could become a +performance bottleneck on other filesystems such as ext4. + +3.8.1 Proposed Solution + +Neil Brown points out that this is overzealous, and only one sync +is needed: + +1. Bundle the recovery data, a transaction counter and a strong + checksum of the new data. + +2. Strong checksum that whole bundle. + +3. Store the bundle in the database. + +4. Overwrite the oldest of the two recovery pointers in the + header (identified using the transaction counter) with the + offset of this bundle. + +5. sync. + +6. Write the new data to the file. + +Checking for recovery means identifying the latest bundle with a +valid checksum and using the new data checksum to ensure that it +has been applied. This is more expensive than the current check, +but need only be done at open. For running databases, a separate +header field can be used to indicate a transaction in progress; +we need only check for recovery if this is set. + +3.9 TDB Does Not Have Snapshot Support + +3.9.1 Proposed Solution + +None. At some point you say “use a real database”. + +But as a thought experiment, if we implemented transactions to +only overwrite free entries (this is tricky: there must not be a +header in each entry which indicates whether it is free, but use +of presence in metadata elsewhere), and a pointer to the hash +table, we could create an entirely new commit without destroying +existing data. Then it would be easy to implement snapshots in a +similar way. + +This would not allow arbitrary changes to the database, such as +tdb_repack does, and would require more space (since we have to +preserve the current and future entries at once). If we used hash +trees rather than one big hash table, we might only have to +rewrite some sections of the hash, too. + +We could then implement snapshots using a similar method, using +multiple different hash tables/free tables. + +3.10 Transactions Cannot Operate in Parallel + +This would be useless for ldb, as it hits the index records with +just about every update. It would add significant complexity in +resolving clashes, and cause the all transaction callers to write +their code to loop in the case where the transactions spuriously +failed. + +3.10.1 Proposed Solution + +We could solve a small part of the problem by providing read-only +transactions. These would allow one write transaction to begin, +but it could not commit until all r/o transactions are done. This +would require a new RO_TRANSACTION_LOCK, which would be upgraded +on commit. + +3.11 Default Hash Function Is Suboptimal + +The Knuth-inspired multiplicative hash used by tdb is fairly slow +(especially if we expand it to 64 bits), and works best when the +hash bucket size is a prime number (which also means a slow +modulus). In addition, it is highly predictable which could +potentially lead to a Denial of Service attack in some TDB uses. + +3.11.1 Proposed Solution + +The Jenkins lookup3 hash[footnote: +http://burtleburtle.net/bob/c/lookup3.c +] is a fast and superbly-mixing hash. It's used by the Linux +kernel and almost everything else. This has the particular +properties that it takes an initial seed, and produces two 32 bit +hash numbers, which we can combine into a 64-bit hash. + +The seed should be created at tdb-creation time from some random +source, and placed in the header. This is far from foolproof, but +adds a little bit of protection against hash bombing. + +3.12 Reliable Traversal Adds Complexity + +We lock a record during traversal iteration, and try to grab that +lock in the delete code. If that grab on delete fails, we simply +mark it deleted and continue onwards; traversal checks for this +condition and does the delete when it moves off the record. + +If traversal terminates, the dead record may be left +indefinitely. + +3.12.1 Proposed Solution + +Remove reliability guarantees; see [traverse-Proposed-Solution]. + +3.13 Fcntl Locking Adds Overhead + +Placing a fcntl lock means a system call, as does removing one. +This is actually one reason why transactions can be faster +(everything is locked once at transaction start). In the +uncontended case, this overhead can theoretically be eliminated. + +3.13.1 Proposed Solution + +None. + +We tried this before with spinlock support, in the early days of +TDB, and it didn't make much difference except in manufactured +benchmarks. + +We could use spinlocks (with futex kernel support under Linux), +but it means that we lose automatic cleanup when a process dies +with a lock. There is a method of auto-cleanup under Linux, but +it's not supported by other operating systems. We could +reintroduce a clear-if-first-style lock and sweep for dead +futexes on open, but that wouldn't help the normal case of one +concurrent opener dying. Increasingly elaborate repair schemes +could be considered, but they require an ABI change (everyone +must use them) anyway, so there's no need to do this at the same +time as everything else. + +3.14 Some Transactions Don't Require Durability + +Volker points out that gencache uses a CLEAR_IF_FIRST tdb for +normal (fast) usage, and occasionally empties the results into a +transactional TDB. This kind of usage prioritizes performance +over durability: as long as we are consistent, data can be lost. + +This would be more neatly implemented inside tdb: a “soft” +transaction commit (ie. syncless) which meant that data may be +reverted on a crash. + +3.14.1 Proposed Solution + +None. + +Unfortunately any transaction scheme which overwrites old data +requires a sync before that overwrite to avoid the possibility of +corruption. + +It seems possible to use a scheme similar to that described in [sub:TDB-Does-Not] +,where transactions are committed without overwriting existing +data, and an array of top-level pointers were available in the +header. If the transaction is “soft” then we would not need a +sync at all: existing processes would pick up the new hash table +and free list and work with that. + +At some later point, a sync would allow recovery of the old data +into the free lists (perhaps when the array of top-level pointers +filled). On crash, tdb_open() would examine the array of top +levels, and apply the transactions until it encountered an +invalid checksum. + diff --git a/ccan/tdb2/free.c b/ccan/tdb2/free.c new file mode 100644 index 00000000..83d916fc --- /dev/null +++ b/ccan/tdb2/free.c @@ -0,0 +1,710 @@ + /* + Trivial Database 2: free list/block handling + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include +#include +#include +#include + +/* We have to be able to fit a free record here. */ +#define MIN_DATA_LEN \ + (sizeof(struct tdb_free_record) - sizeof(struct tdb_used_record)) + +/* We have a series of free lists, each one covering a "zone" of the file. + * + * For each zone we have a series of per-size buckets, and a final bucket for + * "too big". + * + * It's possible to move the free_list_head, but *only* under the allrecord + * lock. */ +static tdb_off_t free_list_off(struct tdb_context *tdb, unsigned int list) +{ + return tdb->header.v.free_off + list * sizeof(tdb_off_t); +} + +/* We're a library: playing with srandom() is unfriendly. srandom_r + * probably lacks portability. We don't need very random here. */ +static unsigned int quick_random(struct tdb_context *tdb) +{ + return getpid() + time(NULL) + (unsigned long)tdb; +} + +/* Start by using a random zone to spread the load. */ +uint64_t random_free_zone(struct tdb_context *tdb) +{ + /* num_zones might be out of date, but can only increase */ + return quick_random(tdb) % tdb->header.v.num_zones; +} + +static unsigned fls64(uint64_t val) +{ +#if HAVE_BUILTIN_CLZL + if (val <= ULONG_MAX) { + /* This is significantly faster! */ + return val ? sizeof(long) * CHAR_BIT - __builtin_clzl(val) : 0; + } else { +#endif + uint64_t r = 64; + + if (!val) + return 0; + if (!(val & 0xffffffff00000000ull)) { + val <<= 32; + r -= 32; + } + if (!(val & 0xffff000000000000ull)) { + val <<= 16; + r -= 16; + } + if (!(val & 0xff00000000000000ull)) { + val <<= 8; + r -= 8; + } + if (!(val & 0xf000000000000000ull)) { + val <<= 4; + r -= 4; + } + if (!(val & 0xc000000000000000ull)) { + val <<= 2; + r -= 2; + } + if (!(val & 0x8000000000000000ull)) { + val <<= 1; + r -= 1; + } + return r; +#if HAVE_BUILTIN_CLZL + } +#endif +} + +/* In which bucket would we find a particular record size? (ignoring header) */ +unsigned int size_to_bucket(struct tdb_context *tdb, tdb_len_t data_len) +{ + unsigned int bucket; + + /* We can't have records smaller than this. */ + assert(data_len >= MIN_DATA_LEN); + + /* Ignoring the header... */ + if (data_len - MIN_DATA_LEN <= 64) { + /* 0 in bucket 0, 8 in bucket 1... 64 in bucket 6. */ + bucket = (data_len - MIN_DATA_LEN) / 8; + } else { + /* After that we go power of 2. */ + bucket = fls64(data_len - MIN_DATA_LEN) + 2; + } + + if (unlikely(bucket > tdb->header.v.free_buckets)) + bucket = tdb->header.v.free_buckets; + return bucket; +} + +/* What zone does a block belong in? */ +tdb_off_t zone_of(struct tdb_context *tdb, tdb_off_t off) +{ + assert(tdb->header_uptodate); + + return off >> tdb->header.v.zone_bits; +} + +/* Returns fl->max_bucket + 1, or list number to search. */ +static tdb_off_t find_free_head(struct tdb_context *tdb, tdb_off_t bucket) +{ + tdb_off_t first, off; + + /* Speculatively search for a non-zero bucket. */ + first = tdb->last_zone * (tdb->header.v.free_buckets+1) + bucket; + off = tdb_find_nonzero_off(tdb, free_list_off(tdb, first), + tdb->header.v.free_buckets - bucket); + return bucket + off; +} + +static int remove_from_list(struct tdb_context *tdb, + tdb_off_t list, struct tdb_free_record *r) +{ + tdb_off_t off; + + /* Front of list? */ + if (r->prev == 0) { + off = free_list_off(tdb, list); + } else { + off = r->prev + offsetof(struct tdb_free_record, next); + } + /* r->prev->next = r->next */ + if (tdb_write_off(tdb, off, r->next)) { + return -1; + } + + if (r->next != 0) { + off = r->next + offsetof(struct tdb_free_record, prev); + /* r->next->prev = r->prev */ + if (tdb_write_off(tdb, off, r->prev)) { + return -1; + } + } + return 0; +} + +/* Enqueue in this free list. */ +static int enqueue_in_free(struct tdb_context *tdb, + tdb_off_t list, + tdb_off_t off, + struct tdb_free_record *new) +{ + new->prev = 0; + /* new->next = head. */ + new->next = tdb_read_off(tdb, free_list_off(tdb, list)); + if (new->next == TDB_OFF_ERR) + return -1; + + if (new->next) { + /* next->prev = new. */ + if (tdb_write_off(tdb, new->next + + offsetof(struct tdb_free_record, prev), + off) != 0) + return -1; + } + /* head = new */ + if (tdb_write_off(tdb, free_list_off(tdb, list), off) != 0) + return -1; + + return tdb_write_convert(tdb, off, new, sizeof(*new)); +} + +/* List isn't locked. */ +int add_free_record(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len_with_header) +{ + struct tdb_free_record new; + tdb_off_t list; + int ret; + + assert(len_with_header >= sizeof(new)); + + new.magic = TDB_FREE_MAGIC; + new.data_len = len_with_header - sizeof(struct tdb_used_record); + + tdb->last_zone = zone_of(tdb, off); + list = tdb->last_zone * (tdb->header.v.free_buckets+1) + + size_to_bucket(tdb, new.data_len); + + if (tdb_lock_free_list(tdb, list, TDB_LOCK_WAIT) != 0) + return -1; + + ret = enqueue_in_free(tdb, list, off, &new); + tdb_unlock_free_list(tdb, list); + return ret; +} + +/* If we have enough left over to be useful, split that off. */ +static int to_used_record(struct tdb_context *tdb, + tdb_off_t off, + tdb_len_t needed, + tdb_len_t total_len, + tdb_len_t *actual) +{ + struct tdb_used_record used; + tdb_len_t leftover; + + leftover = total_len - needed; + if (leftover < sizeof(struct tdb_free_record)) + leftover = 0; + + *actual = total_len - leftover; + + if (leftover) { + if (add_free_record(tdb, off + sizeof(used) + *actual, + total_len - needed)) + return -1; + } + return 0; +} + +/* Note: we unlock the current list if we coalesce or fail. */ +static int coalesce(struct tdb_context *tdb, tdb_off_t off, + tdb_off_t list, tdb_len_t data_len) +{ + struct tdb_free_record pad, *r; + tdb_off_t end = off + sizeof(struct tdb_used_record) + data_len; + + while (!tdb->methods->oob(tdb, end + sizeof(*r), 1)) { + tdb_off_t nlist; + + r = tdb_get(tdb, end, &pad, sizeof(pad)); + if (!r) + goto err; + + if (r->magic != TDB_FREE_MAGIC) + break; + + nlist = zone_of(tdb, end) * (tdb->header.v.free_buckets+1) + + size_to_bucket(tdb, r->data_len); + + /* We may be violating lock order here, so best effort. */ + if (tdb_lock_free_list(tdb, nlist, TDB_LOCK_NOWAIT) == -1) + break; + + /* Now we have lock, re-check. */ + r = tdb_get(tdb, end, &pad, sizeof(pad)); + if (!r) { + tdb_unlock_free_list(tdb, nlist); + goto err; + } + + if (unlikely(r->magic != TDB_FREE_MAGIC)) { + tdb_unlock_free_list(tdb, nlist); + break; + } + + if (remove_from_list(tdb, list, r) == -1) { + tdb_unlock_free_list(tdb, nlist); + goto err; + } + + end += sizeof(struct tdb_used_record) + r->data_len; + tdb_unlock_free_list(tdb, nlist); + } + + /* Didn't find any adjacent free? */ + if (end == off + sizeof(struct tdb_used_record) + data_len) + return 0; + + /* OK, expand record */ + r = tdb_get(tdb, off, &pad, sizeof(pad)); + if (!r) + goto err; + + if (remove_from_list(tdb, list, r) == -1) + goto err; + + /* We have to drop this to avoid deadlocks. */ + tdb_unlock_free_list(tdb, list); + + if (add_free_record(tdb, off, end - off) == -1) + return -1; + return 1; + +err: + /* To unify error paths, we *always* unlock list. */ + tdb_unlock_free_list(tdb, list); + return -1; +} + +/* We need size bytes to put our key and data in. */ +static tdb_off_t lock_and_alloc(struct tdb_context *tdb, + tdb_off_t bucket, size_t size, + tdb_len_t *actual) +{ + tdb_off_t list; + tdb_off_t off, prev, best_off; + struct tdb_free_record pad, best = { 0 }, *r; + double multiplier; + +again: + list = tdb->last_zone * (tdb->header.v.free_buckets+1) + bucket; + + /* Lock this list. */ + if (tdb_lock_free_list(tdb, list, TDB_LOCK_WAIT) == -1) { + return TDB_OFF_ERR; + } + + prev = free_list_off(tdb, list); + off = tdb_read_off(tdb, prev); + + if (unlikely(off == TDB_OFF_ERR)) + goto unlock_err; + + best.data_len = -1ULL; + best_off = 0; + multiplier = 1.0; + + /* Walk the list to see if any are large enough, getting less fussy + * as we go. */ + while (off) { + prev = off; + off = tdb_read_off(tdb, prev); + if (unlikely(off == TDB_OFF_ERR)) + goto unlock_err; + + r = tdb_get(tdb, off, &pad, sizeof(*r)); + if (!r) + goto unlock_err; + if (r->magic != TDB_FREE_MAGIC) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "lock_and_alloc: %llu non-free 0x%llx\n", + (long long)off, (long long)r->magic); + goto unlock_err; + } + + if (r->data_len >= size && r->data_len < best.data_len) { + best_off = off; + best = *r; + } + + if (best.data_len < size * multiplier && best_off) { + /* We're happy with this size: take it. */ + if (remove_from_list(tdb, list, &best) != 0) + goto unlock_err; + tdb_unlock_free_list(tdb, list); + + if (to_used_record(tdb, best_off, size, best.data_len, + actual)) { + return -1; + } + return best_off; + } + multiplier *= 1.01; + + /* Since we're going slow anyway, try coalescing here. */ + switch (coalesce(tdb, off, list, r->data_len)) { + case -1: + /* This has already unlocked on error. */ + return -1; + case 1: + /* This has unlocked list, restart. */ + goto again; + } + } + + tdb_unlock_free_list(tdb, list); + return 0; + +unlock_err: + tdb_unlock_free_list(tdb, list); + return TDB_OFF_ERR; +} + +/* We want a really big chunk. Look through every zone's oversize bucket */ +static tdb_off_t huge_alloc(struct tdb_context *tdb, size_t size, + tdb_len_t *actual) +{ + tdb_off_t i, off; + + do { + for (i = 0; i < tdb->header.v.num_zones; i++) { + /* Try getting one from list. */ + off = lock_and_alloc(tdb, tdb->header.v.free_buckets, + size, actual); + if (off == TDB_OFF_ERR) + return TDB_OFF_ERR; + if (off != 0) + return off; + /* FIXME: Coalesce! */ + } + } while (tdb_expand(tdb, 0, size, false) == 0); + + return TDB_OFF_ERR; +} + +static tdb_off_t get_free(struct tdb_context *tdb, size_t size, + tdb_len_t *actual) +{ + tdb_off_t off, bucket; + unsigned int num_empty, step = 0; + + bucket = size_to_bucket(tdb, size); + + /* If we're after something bigger than a single zone, handle + * specially. */ + if (unlikely(sizeof(struct tdb_used_record) + size + >= (1ULL << tdb->header.v.zone_bits))) { + return huge_alloc(tdb, size, actual); + } + + /* Number of zones we search is proportional to the log of them. */ + for (num_empty = 0; num_empty < fls64(tdb->header.v.num_zones); + num_empty++) { + tdb_off_t b; + + /* Start at exact size bucket, and search up... */ + for (b = bucket; b <= tdb->header.v.num_zones; b++) { + b = find_free_head(tdb, b); + + /* Non-empty list? Try getting block. */ + if (b <= tdb->header.v.num_zones) { + /* Try getting one from list. */ + off = lock_and_alloc(tdb, b, size, actual); + if (off == TDB_OFF_ERR) + return TDB_OFF_ERR; + if (off != 0) + return off; + /* Didn't work. Try next bucket. */ + } + } + + /* Try another zone, at pseudo random. Avoid duplicates by + using an odd step. */ + if (step == 0) + step = ((quick_random(tdb)) % 65536) * 2 + 1; + tdb->last_zone = (tdb->last_zone + step) + % tdb->header.v.num_zones; + } + return 0; +} + +int set_header(struct tdb_context *tdb, + struct tdb_used_record *rec, + uint64_t keylen, uint64_t datalen, + uint64_t actuallen, uint64_t hash) +{ + uint64_t keybits = (fls64(keylen) + 1) / 2; + + /* Use top bits of hash, so it's independent of hash table size. */ + rec->magic_and_meta + = (actuallen - (keylen + datalen)) + | ((hash >> 53) << 32) + | (keybits << 43) + | (TDB_MAGIC << 48); + rec->key_and_data_len = (keylen | (datalen << (keybits*2))); + + /* Encoding can fail on big values. */ + if (rec_key_length(rec) != keylen + || rec_data_length(rec) != datalen + || rec_extra_padding(rec) != actuallen - (keylen + datalen)) { + tdb->ecode = TDB_ERR_IO; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "Could not encode k=%llu,d=%llu,a=%llu\n", + (long long)keylen, (long long)datalen, + (long long)actuallen); + return -1; + } + return 0; +} + +static tdb_len_t adjust_size(size_t keylen, size_t datalen, bool growing) +{ + tdb_len_t size = keylen + datalen; + + if (size < MIN_DATA_LEN) + size = MIN_DATA_LEN; + + /* Overallocate if this is coming from an enlarging store. */ + if (growing) + size += datalen / 2; + + /* Round to next uint64_t boundary. */ + return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL); +} + +/* If this fails, try tdb_expand. */ +tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen, + uint64_t hash, bool growing) +{ + tdb_off_t off; + tdb_len_t size, actual; + struct tdb_used_record rec; + + /* We don't want header to change during this! */ + assert(tdb->header_uptodate); + + size = adjust_size(keylen, datalen, growing); + + off = get_free(tdb, size, &actual); + if (unlikely(off == TDB_OFF_ERR || off == 0)) + return off; + + /* Some supergiant values can't be encoded. */ + if (set_header(tdb, &rec, keylen, datalen, actual, hash) != 0) { + add_free_record(tdb, off, sizeof(rec) + actual); + return TDB_OFF_ERR; + } + + if (tdb_write_convert(tdb, off, &rec, sizeof(rec)) != 0) + return TDB_OFF_ERR; + + return off; +} + +static bool larger_buckets_might_help(struct tdb_context *tdb) +{ + /* If our buckets are already covering 1/8 of a zone, don't + * bother (note: might become an 1/16 of a zone if we double + * zone size). */ + tdb_len_t size = (1ULL << tdb->header.v.zone_bits) / 8; + + if (size >= MIN_DATA_LEN + && size_to_bucket(tdb, size) < tdb->header.v.free_buckets) { + return false; + } + + /* FIXME: Put stats in tdb_context or examine db itself! */ + /* It's fairly cheap to do as we expand database. */ + return true; +} + +static bool zones_happy(struct tdb_context *tdb) +{ + /* FIXME: look at distribution of zones. */ + return true; +} + +/* Expand the database. */ +int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen, + bool growing) +{ + uint64_t new_num_buckets, new_num_zones, new_zone_bits; + uint64_t old_num_total, i; + tdb_len_t add, freebucket_size, needed; + tdb_off_t off, old_free_off; + const tdb_off_t *oldf; + struct tdb_used_record fhdr; + + /* We need room for the record header too. */ + needed = sizeof(struct tdb_used_record) + + adjust_size(klen, dlen, growing); + + /* FIXME: this is overkill. An expand lock? */ + if (tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false) == -1) + return -1; + + /* Someone may have expanded for us. */ + if (update_header(tdb)) + goto success; + + /* Make sure we have the latest size. */ + tdb->methods->oob(tdb, tdb->map_size + 1, true); + + /* Did we enlarge zones without enlarging file? */ + if (tdb->map_size < tdb->header.v.num_zones<header.v.zone_bits) { + add = (tdb->header.v.num_zones<header.v.zone_bits) + - tdb->map_size; + /* Updates tdb->map_size. */ + if (tdb->methods->expand_file(tdb, tdb->map_size, add) == -1) + goto fail; + if (add_free_record(tdb, tdb->map_size - add, add) == -1) + goto fail; + if (add >= needed) { + /* Allocate from this zone. */ + tdb->last_zone = zone_of(tdb, tdb->map_size - add); + goto success; + } + } + + /* Slow path. Should we increase the number of buckets? */ + new_num_buckets = tdb->header.v.free_buckets; + if (larger_buckets_might_help(tdb)) + new_num_buckets++; + + /* Now we'll need room for the new free buckets, too. Assume + * worst case (zones expand). */ + needed += sizeof(fhdr) + + ((tdb->header.v.num_zones+1) + * (new_num_buckets+1) * sizeof(tdb_off_t)); + + /* If we need less that one zone, and they're working well, just add + * another one. */ + if (needed < (1UL<header.v.zone_bits) && zones_happy(tdb)) { + new_num_zones = tdb->header.v.num_zones+1; + new_zone_bits = tdb->header.v.zone_bits; + add = 1ULL << tdb->header.v.zone_bits; + } else { + /* Increase the zone size. */ + new_num_zones = tdb->header.v.num_zones; + new_zone_bits = tdb->header.v.zone_bits+1; + while ((new_num_zones << new_zone_bits) - tdb->map_size + < needed) { + new_zone_bits++; + } + + /* We expand by enough zones to meet the need. */ + add = (needed + (1ULL << new_zone_bits)-1) + & ~((1ULL << new_zone_bits)-1); + } + + /* Updates tdb->map_size. */ + if (tdb->methods->expand_file(tdb, tdb->map_size, add) == -1) + goto fail; + + /* Use first part as new free bucket array. */ + off = tdb->map_size - add; + freebucket_size = new_num_zones + * (new_num_buckets + 1) * sizeof(tdb_off_t); + + /* Write header. */ + if (set_header(tdb, &fhdr, 0, freebucket_size, freebucket_size, 0)) + goto fail; + if (tdb_write_convert(tdb, off, &fhdr, sizeof(fhdr)) == -1) + goto fail; + + /* Adjust off to point to start of buckets, add to be remainder. */ + add -= freebucket_size + sizeof(fhdr); + off += sizeof(fhdr); + + /* Access the old zones. */ + old_num_total = tdb->header.v.num_zones*(tdb->header.v.free_buckets+1); + old_free_off = tdb->header.v.free_off; + oldf = tdb_access_read(tdb, old_free_off, + old_num_total * sizeof(tdb_off_t)); + if (!oldf) + goto fail; + + /* Switch to using our new zone. */ + if (zero_out(tdb, off, new_num_zones * (new_num_buckets + 1)) == -1) + goto fail_release; + tdb->header.v.free_off = off; + tdb->header.v.num_zones = new_num_zones; + tdb->header.v.free_buckets = new_num_buckets; + + /* FIXME: If zone size hasn't changed, can simply copy pointers. */ + /* FIXME: Coalesce? */ + for (i = 0; i < old_num_total; i++) { + tdb_off_t next; + struct tdb_free_record rec; + tdb_off_t list; + + for (off = oldf[i]; off; off = next) { + if (tdb_read_convert(tdb, off, &rec, sizeof(rec))) + goto fail_release; + + list = zone_of(tdb, off) + * (tdb->header.v.free_buckets+1) + + size_to_bucket(tdb, rec.data_len); + next = rec.next; + + if (enqueue_in_free(tdb, list, off, &rec) == -1) + goto fail_release; + } + } + + + /* Free up the old free buckets. */ + old_free_off -= sizeof(fhdr); + if (tdb_read_convert(tdb, old_free_off, &fhdr, sizeof(fhdr)) == -1) + goto fail_release; + if (add_free_record(tdb, old_free_off, + rec_data_length(&fhdr)+rec_extra_padding(&fhdr))) + goto fail_release; + + /* Add the rest as a new free record. */ + if (add_free_record(tdb, tdb->map_size - add, add) == -1) + goto fail_release; + + /* Start allocating from where the new space is. */ + tdb->last_zone = zone_of(tdb, tdb->map_size - add); + tdb_access_release(tdb, oldf); +success: + tdb_allrecord_unlock(tdb, F_WRLCK); + return 0; + +fail_release: + tdb_access_release(tdb, oldf); +fail: + tdb_allrecord_unlock(tdb, F_WRLCK); + return -1; +} diff --git a/ccan/tdb2/io.c b/ccan/tdb2/io.c new file mode 100644 index 00000000..5910fc54 --- /dev/null +++ b/ccan/tdb2/io.c @@ -0,0 +1,662 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 1999-2005 + Copyright (C) Paul `Rusty' Russell 2000 + Copyright (C) Jeremy Allison 2000-2003 + Copyright (C) Rusty Russell 2010 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ +#include "private.h" +#include + +void tdb_munmap(struct tdb_context *tdb) +{ + if (tdb->flags & TDB_INTERNAL) + return; + + if (tdb->map_ptr) { + munmap(tdb->map_ptr, tdb->map_size); + tdb->map_ptr = NULL; + } +} + +void tdb_mmap(struct tdb_context *tdb) +{ + if (tdb->flags & TDB_INTERNAL) + return; + + if (tdb->flags & TDB_NOMMAP) + return; + + tdb->map_ptr = mmap(NULL, tdb->map_size, + PROT_READ|(tdb->read_only? 0:PROT_WRITE), + MAP_SHARED, tdb->fd, 0); + + /* + * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! + */ + if (tdb->map_ptr == MAP_FAILED) { + tdb->map_ptr = NULL; + tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv, + "tdb_mmap failed for size %lld (%s)\n", + (long long)tdb->map_size, strerror(errno)); + } +} + +/* check for an out of bounds access - if it is out of bounds then + see if the database has been expanded by someone else and expand + if necessary + note that "len" is the minimum length needed for the db +*/ +static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, bool probe) +{ + struct stat st; + if (len <= tdb->map_size) + return 0; + if (tdb->flags & TDB_INTERNAL) { + if (!probe) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "tdb_oob len %lld beyond internal" + " malloc size %lld\n", + (long long)len, + (long long)tdb->map_size); + } + return -1; + } + + if (fstat(tdb->fd, &st) == -1) { + tdb->ecode = TDB_ERR_IO; + return -1; + } + + if (st.st_size < (size_t)len) { + if (!probe) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "tdb_oob len %lld beyond eof at %lld\n", + (long long)len, (long long)st.st_size); + } + return -1; + } + + /* Unmap, update size, remap */ + tdb_munmap(tdb); + tdb->map_size = st.st_size; + tdb_mmap(tdb); + return 0; +} + +static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len) +{ + if (unlikely(!tdb->map_ptr)) + return NULL; + + /* FIXME: We can do a subset of this! */ + if (tdb->transaction) + return NULL; + + if (unlikely(tdb_oob(tdb, off + len, true) == -1)) + return NULL; + return (char *)tdb->map_ptr + off; +} + +/* Either make a copy into pad and return that, or return ptr into mmap. */ +/* Note: pad has to be a real object, so we can't get here if len + * overflows size_t */ +/* FIXME: Transaction */ +void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len) +{ + ssize_t r; + + if (likely(!(tdb->flags & TDB_CONVERT))) { + void *ret = tdb_direct(tdb, off, len); + if (ret) + return ret; + } + + if (unlikely(tdb_oob(tdb, off + len, false) == -1)) + return NULL; + + r = pread(tdb->fd, pad, len, off); + if (r != (ssize_t)len) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "tdb_read failed at %llu " + "len=%lld ret=%lld (%s) map_size=%lld\n", + (long long)off, (long long)len, + (long long)r, strerror(errno), + (long long)tdb->map_size); + return NULL; + } + return tdb_convert(tdb, pad, len); +} + +/* Endian conversion: we only ever deal with 8 byte quantities */ +void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size) +{ + if (unlikely((tdb->flags & TDB_CONVERT))) { + uint64_t i, *p = (uint64_t *)buf; + for (i = 0; i < size / 8; i++) + p[i] = bswap_64(p[i]); + } + return buf; +} + +/* Return first non-zero offset in num offset array, or num. */ +/* FIXME: Return the off? */ +uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off, + uint64_t num) +{ + uint64_t i, *val; + bool alloc = false; + + val = tdb_direct(tdb, off, num * sizeof(tdb_off_t)); + if (!unlikely(val)) { + val = tdb_alloc_read(tdb, off, num * sizeof(tdb_off_t)); + if (!val) + return num; + alloc = true; + } + + for (i = 0; i < num; i++) { + if (val[i]) + break; + } + if (unlikely(alloc)) + free(val); + return i; +} + +/* Return first zero offset in num offset array, or num. */ +uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off, + uint64_t num) +{ + uint64_t i, *val; + bool alloc = false; + + val = tdb_direct(tdb, off, num * sizeof(tdb_off_t)); + if (!unlikely(val)) { + val = tdb_alloc_read(tdb, off, num * sizeof(tdb_off_t)); + if (!val) + return num; + alloc = true; + } + + for (i = 0; i < num; i++) { + if (!val[i]) + break; + } + if (unlikely(alloc)) + free(val); + return i; +} + +static int fill(struct tdb_context *tdb, + const void *buf, size_t size, + tdb_off_t off, tdb_len_t len) +{ + while (len) { + size_t n = len > size ? size : len; + + if (!tdb_pwrite_all(tdb->fd, buf, n, off)) { + tdb->ecode = TDB_ERR_IO; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "fill write failed: giving up!\n"); + return -1; + } + len -= n; + off += n; + } + return 0; +} + +int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len) +{ + void *p = tdb_direct(tdb, off, len); + if (p) { + memset(p, 0, len); + return 0; + } else { + char buf[8192] = { 0 }; + return fill(tdb, buf, sizeof(buf), len, off); + } +} + +tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off) +{ + tdb_off_t pad, *ret; + + ret = tdb_get(tdb, off, &pad, sizeof(ret)); + if (!ret) { + return TDB_OFF_ERR; + } + return *ret; +} + +/* Even on files, we can get partial writes due to signals. */ +bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off) +{ + while (len) { + size_t ret; + ret = pwrite(fd, buf, len, off); + if (ret < 0) + return false; + if (ret == 0) { + errno = ENOSPC; + return false; + } + buf += ret; + off += ret; + len -= ret; + } + return true; +} + +/* write a lump of data at a specified offset */ +static int tdb_write(struct tdb_context *tdb, tdb_off_t off, + const void *buf, tdb_len_t len) +{ + if (len == 0) { + return 0; + } + + if (tdb->read_only) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + + if (tdb->methods->oob(tdb, off + len, 0) != 0) + return -1; + + if (tdb->map_ptr) { + memcpy(off + (char *)tdb->map_ptr, buf, len); + } else { + if (!tdb_pwrite_all(tdb->fd, buf, len, off)) { + tdb->ecode = TDB_ERR_IO; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "tdb_write failed at %llu len=%llu (%s)\n", + off, len, strerror(errno)); + return -1; + } + } + return 0; +} + +/* read a lump of data at a specified offset */ +static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, + tdb_len_t len) +{ + if (tdb->methods->oob(tdb, off + len, 0) != 0) { + return -1; + } + + if (tdb->map_ptr) { + memcpy(buf, off + (char *)tdb->map_ptr, len); + } else { + ssize_t ret = pread(tdb->fd, buf, len, off); + if (ret != (ssize_t)len) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "tdb_read failed at %lld " + "len=%lld ret=%lld (%s) map_size=%lld\n", + (long long)off, (long long)len, + (long long)ret, strerror(errno), + (long long)tdb->map_size); + return -1; + } + } + return 0; +} + +int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off, + void *rec, size_t len) +{ + return tdb->methods->write(tdb, off, tdb_convert(tdb, rec, len), len); +} + +int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off, + void *rec, size_t len) +{ + int ret = tdb->methods->read(tdb, off, rec, len); + tdb_convert(tdb, rec, len); + return ret; +} + +int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val) +{ + return tdb_write_convert(tdb, off, &val, sizeof(val)); +} + +/* read a lump of data, allocating the space for it */ +void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len) +{ + void *buf; + + /* some systems don't like zero length malloc */ + buf = malloc(len ? len : 1); + if (unlikely(!buf)) { + tdb->ecode = TDB_ERR_OOM; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_alloc_read malloc failed len=%lld\n", + (long long)len); + } else if (unlikely(tdb->methods->read(tdb, offset, buf, len))) { + free(buf); + buf = NULL; + } + return buf; +} + +uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off) +{ + struct tdb_used_record pad, *r; + void *key; + uint64_t klen, hash; + + r = tdb_get(tdb, off, &pad, sizeof(*r)); + if (!r) + /* FIXME */ + return 0; + + klen = rec_key_length(r); + key = tdb_direct(tdb, off + sizeof(*r), klen); + if (likely(key)) + return tdb_hash(tdb, key, klen); + + key = tdb_alloc_read(tdb, off + sizeof(*r), klen); + if (unlikely(!key)) + return 0; + hash = tdb_hash(tdb, key, klen); + free(key); + return hash; +} + +/* Give a piece of tdb data to a parser */ +int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key, + tdb_off_t offset, tdb_len_t len, + int (*parser)(TDB_DATA key, TDB_DATA data, + void *private_data), + void *private_data) +{ + TDB_DATA data; + int result; + bool allocated = false; + + data.dsize = len; + data.dptr = tdb_direct(tdb, offset, len); + if (unlikely(!data.dptr)) { + if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) { + return -1; + } + allocated = true; + } + result = parser(key, data, private_data); + if (unlikely(allocated)) + free(data.dptr); + return result; +} + +/* expand a file. we prefer to use ftruncate, as that is what posix + says to use for mmap expansion */ +static int tdb_expand_file(struct tdb_context *tdb, + tdb_len_t size, tdb_len_t addition) +{ + char buf[8192]; + + if (tdb->read_only) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + + /* If this fails, we try to fill anyway. */ + if (ftruncate(tdb->fd, size+addition)) + ; + + /* now fill the file with something. This ensures that the + file isn't sparse, which would be very bad if we ran out of + disk. This must be done with write, not via mmap */ + memset(buf, 0x43, sizeof(buf)); + return fill(tdb, buf, sizeof(buf), addition, size); +} + +const void *tdb_access_read(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len) +{ + const void *ret = tdb_direct(tdb, off, len); + + if (!ret) + ret = tdb_alloc_read(tdb, off, len); + return ret; +} + +void tdb_access_release(struct tdb_context *tdb, const void *p) +{ + if (!tdb->map_ptr + || (char *)p < (char *)tdb->map_ptr + || (char *)p >= (char *)tdb->map_ptr + tdb->map_size) + free((void *)p); +} + +#if 0 +/* write a lump of data at a specified offset */ +static int tdb_write(struct tdb_context *tdb, tdb_off_t off, + const void *buf, tdb_len_t len) +{ + if (len == 0) { + return 0; + } + + if (tdb->read_only || tdb->traverse_read) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + + if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) + return -1; + + if (tdb->map_ptr) { + memcpy(off + (char *)tdb->map_ptr, buf, len); + } else { + ssize_t written = pwrite(tdb->fd, buf, len, off); + if ((written != (ssize_t)len) && (written != -1)) { + /* try once more */ + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only " + "%d of %d bytes at %d, trying once more\n", + (int)written, len, off)); + written = pwrite(tdb->fd, (const char *)buf+written, + len-written, + off+written); + } + if (written == -1) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d " + "len=%d (%s)\n", off, len, strerror(errno))); + return -1; + } else if (written != (ssize_t)len) { + tdb->ecode = TDB_ERR_IO; + TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to " + "write %d bytes at %d in two attempts\n", + len, off)); + return -1; + } + } + return 0; +} + + + +/* + do an unlocked scan of the hash table heads to find the next non-zero head. The value + will then be confirmed with the lock held +*/ +static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain) +{ + uint32_t h = *chain; + if (tdb->map_ptr) { + for (;h < tdb->header.hash_size;h++) { + if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) { + break; + } + } + } else { + uint32_t off=0; + for (;h < tdb->header.hash_size;h++) { + if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) { + break; + } + } + } + (*chain) = h; +} + + +/* expand the database by expanding the underlying file and doing the + mmap again if necessary */ +int tdb_expand(struct tdb_context *tdb) +{ + struct tdb_record rec; + tdb_off_t offset, new_size; + + /* We have to lock every hash bucket and every free list. */ + do { + + + if (tdb_lock(tdb, -1, F_WRLCK) == -1) { + TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n")); + return -1; + } + + /* must know about any previous expansions by another process */ + tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1); + + /* always make room for at least 100 more records, and at + least 25% more space. Round the database up to a multiple + of the page size */ + new_size = MAX(tdb->map_size + size*100, tdb->map_size * 1.25); + size = TDB_ALIGN(new_size, tdb->page_size) - tdb->map_size; + + if (!(tdb->flags & TDB_INTERNAL)) + tdb_munmap(tdb); + + /* + * We must ensure the file is unmapped before doing this + * to ensure consistency with systems like OpenBSD where + * writes and mmaps are not consistent. + */ + + /* expand the file itself */ + if (!(tdb->flags & TDB_INTERNAL)) { + if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0) + goto fail; + } + + tdb->map_size += size; + + if (tdb->flags & TDB_INTERNAL) { + char *new_map_ptr = (char *)realloc(tdb->map_ptr, + tdb->map_size); + if (!new_map_ptr) { + tdb->map_size -= size; + goto fail; + } + tdb->map_ptr = new_map_ptr; + } else { + /* + * We must ensure the file is remapped before adding the space + * to ensure consistency with systems like OpenBSD where + * writes and mmaps are not consistent. + */ + + /* We're ok if the mmap fails as we'll fallback to read/write */ + tdb_mmap(tdb); + } + + /* form a new freelist record */ + memset(&rec,'\0',sizeof(rec)); + rec.rec_len = size - sizeof(rec); + + /* link it into the free list */ + offset = tdb->map_size - size; + if (tdb_free(tdb, offset, &rec) == -1) + goto fail; + + tdb_unlock(tdb, -1, F_WRLCK); + return 0; + fail: + tdb_unlock(tdb, -1, F_WRLCK); + return -1; +} + +/* read/write a tdb_off_t */ +int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d) +{ + return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV()); +} + +int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d) +{ + tdb_off_t off = *d; + return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d)); +} + + +/* read/write a record */ +int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec) +{ + if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1) + return -1; + if (TDB_BAD_MAGIC(rec)) { + /* Ensure ecode is set for log fn. */ + tdb->ecode = TDB_ERR_CORRUPT; + TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset)); + return -1; + } + return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0); +} + +int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec) +{ + struct tdb_record r = *rec; + return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r)); +} +#endif + +static const struct tdb_methods io_methods = { + tdb_read, + tdb_write, + tdb_oob, + tdb_expand_file, +}; + +/* + initialise the default methods table +*/ +void tdb_io_init(struct tdb_context *tdb) +{ + tdb->methods = &io_methods; +} diff --git a/ccan/tdb2/lock.c b/ccan/tdb2/lock.c new file mode 100644 index 00000000..dca526ce --- /dev/null +++ b/ccan/tdb2/lock.c @@ -0,0 +1,848 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 1999-2005 + Copyright (C) Paul `Rusty' Russell 2000 + Copyright (C) Jeremy Allison 2000-2003 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#include "private.h" + +static int fcntl_lock(struct tdb_context *tdb, + int rw, off_t off, off_t len, bool waitflag) +{ + struct flock fl; + + fl.l_type = rw; + fl.l_whence = SEEK_SET; + fl.l_start = off; + fl.l_len = len; + fl.l_pid = 0; + + if (waitflag) + return fcntl(tdb->fd, F_SETLKW, &fl); + else + return fcntl(tdb->fd, F_SETLK, &fl); +} + +static int fcntl_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len) +{ + struct flock fl; +#if 0 /* Check they matched up locks and unlocks correctly. */ + char line[80]; + FILE *locks; + bool found = false; + + locks = fopen("/proc/locks", "r"); + + while (fgets(line, 80, locks)) { + char *p; + int type, start, l; + + /* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */ + p = strchr(line, ':') + 1; + if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY "))) + continue; + p += strlen(" FLOCK ADVISORY "); + if (strncmp(p, "READ ", strlen("READ ")) == 0) + type = F_RDLCK; + else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0) + type = F_WRLCK; + else + abort(); + p += 6; + if (atoi(p) != getpid()) + continue; + p = strchr(strchr(p, ' ') + 1, ' ') + 1; + start = atoi(p); + p = strchr(p, ' ') + 1; + if (strncmp(p, "EOF", 3) == 0) + l = 0; + else + l = atoi(p) - start + 1; + + if (off == start) { + if (len != l) { + fprintf(stderr, "Len %u should be %u: %s", + (int)len, l, line); + abort(); + } + if (type != rw) { + fprintf(stderr, "Type %s wrong: %s", + rw == F_RDLCK ? "READ" : "WRITE", line); + abort(); + } + found = true; + break; + } + } + + if (!found) { + fprintf(stderr, "Unlock on %u@%u not found!\n", + (int)off, (int)len); + abort(); + } + + fclose(locks); +#endif + + fl.l_type = F_UNLCK; + fl.l_whence = SEEK_SET; + fl.l_start = off; + fl.l_len = len; + fl.l_pid = 0; + + return fcntl(tdb->fd, F_SETLKW, &fl); +} + +/* a byte range locking function - return 0 on success + this functions locks/unlocks 1 byte at the specified offset. + + note that a len of zero means lock to end of file +*/ +static int tdb_brlock(struct tdb_context *tdb, + int rw_type, tdb_off_t offset, tdb_off_t len, + enum tdb_lock_flags flags) +{ + int ret; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + if (rw_type == F_WRLCK && tdb->read_only) { + tdb->ecode = TDB_ERR_RDONLY; + return -1; + } + + /* A 32 bit system cannot open a 64-bit file, but it could have + * expanded since then: check here. */ + if ((size_t)(offset + len) != offset + len) { + tdb->ecode = TDB_ERR_IO; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_brlock: lock on giant offset %llu\n", + (long long)(offset + len)); + return -1; + } + + do { + ret = fcntl_lock(tdb, rw_type, offset, len, + flags & TDB_LOCK_WAIT); + } while (ret == -1 && errno == EINTR); + + if (ret == -1) { + tdb->ecode = TDB_ERR_LOCK; + /* Generic lock error. errno set by fcntl. + * EAGAIN is an expected return from non-blocking + * locks. */ + if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_brlock failed (fd=%d) at" + " offset %llu rw_type=%d flags=%d len=%llu\n", + tdb->fd, (long long)offset, rw_type, + flags, (long long)len); + } + return -1; + } + return 0; +} + +static int tdb_brunlock(struct tdb_context *tdb, + int rw_type, tdb_off_t offset, size_t len) +{ + int ret; + + if (tdb->flags & TDB_NOLOCK) { + return 0; + } + + do { + ret = fcntl_unlock(tdb, rw_type, offset, len); + } while (ret == -1 && errno == EINTR); + + if (ret == -1) { + tdb->log(tdb, TDB_DEBUG_TRACE, tdb->log_priv, + "tdb_brunlock failed (fd=%d) at offset %llu" + " rw_type=%d len=%llu\n", + tdb->fd, (long long)offset, rw_type, (long long)len); + } + return ret; +} + +#if 0 +/* + upgrade a read lock to a write lock. This needs to be handled in a + special way as some OSes (such as solaris) have too conservative + deadlock detection and claim a deadlock when progress can be + made. For those OSes we may loop for a while. +*/ +int tdb_allrecord_upgrade(struct tdb_context *tdb) +{ + int count = 1000; + + if (tdb->allrecord_lock.count != 1) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_allrecord_upgrade failed: count %u too high\n", + tdb->allrecord_lock.count); + return -1; + } + + if (tdb->allrecord_lock.off != 1) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_allrecord_upgrade failed: already upgraded?\n"); + return -1; + } + + while (count--) { + struct timeval tv; + if (tdb_brlock(tdb, F_WRLCK, + TDB_HASH_LOCK_START + + (1ULL << tdb->header.v.hash_bits), 0, + TDB_LOCK_WAIT|TDB_LOCK_PROBE) == 0) { + tdb->allrecord_lock.ltype = F_WRLCK; + tdb->allrecord_lock.off = 0; + return 0; + } + if (errno != EDEADLK) { + break; + } + /* sleep for as short a time as we can - more portable than usleep() */ + tv.tv_sec = 0; + tv.tv_usec = 1; + select(0, NULL, NULL, NULL, &tv); + } + tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv, + "tdb_allrecord_upgrade failed\n"); + return -1; +} +#endif + +static struct tdb_lock_type *find_nestlock(struct tdb_context *tdb, + tdb_off_t offset) +{ + unsigned int i; + + for (i=0; inum_lockrecs; i++) { + if (tdb->lockrecs[i].off == offset) { + return &tdb->lockrecs[i]; + } + } + return NULL; +} + +/* lock an offset in the database. */ +static int tdb_nest_lock(struct tdb_context *tdb, tdb_off_t offset, int ltype, + enum tdb_lock_flags flags) +{ + struct tdb_lock_type *new_lck; + + if (offset >= TDB_HASH_LOCK_START + (1ULL << tdb->header.v.hash_bits) + + (tdb->header.v.num_zones * (tdb->header.v.free_buckets+1))) { + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "tdb_lock: invalid offset %llu for ltype=%d\n", + (long long)offset, ltype); + return -1; + } + if (tdb->flags & TDB_NOLOCK) + return 0; + + new_lck = find_nestlock(tdb, offset); + if (new_lck) { + /* + * Just increment the in-memory struct, posix locks + * don't stack. + */ + new_lck->count++; + return 0; + } + + new_lck = (struct tdb_lock_type *)realloc( + tdb->lockrecs, + sizeof(*tdb->lockrecs) * (tdb->num_lockrecs+1)); + if (new_lck == NULL) { + tdb->ecode = TDB_ERR_OOM; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_lock: unable to allocate %llu lock structure", + (long long)(tdb->num_lockrecs + 1)); + errno = ENOMEM; + return -1; + } + tdb->lockrecs = new_lck; + + /* Since fcntl locks don't nest, we do a lock for the first one, + and simply bump the count for future ones */ + if (tdb_brlock(tdb, ltype, offset, 1, flags)) { + return -1; + } + + tdb->lockrecs[tdb->num_lockrecs].off = offset; + tdb->lockrecs[tdb->num_lockrecs].count = 1; + tdb->lockrecs[tdb->num_lockrecs].ltype = ltype; + tdb->num_lockrecs++; + + return 0; +} + +static int tdb_lock_and_recover(struct tdb_context *tdb) +{ +#if 0 /* FIXME */ + + int ret; + + /* We need to match locking order in transaction commit. */ + if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, TDB_LOCK_WAIT)) { + return -1; + } + + if (tdb_brlock(tdb, F_WRLCK, OPEN_LOCK, 1, TDB_LOCK_WAIT)) { + tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0); + return -1; + } + + ret = tdb_transaction_recover(tdb); + + tdb_brunlock(tdb, F_WRLCK, OPEN_LOCK, 1); + tdb_brunlock(tdb, F_WRLCK, FREELIST_TOP, 0); + + return ret; +#else + abort(); + return -1; +#endif +} + +static bool tdb_needs_recovery(struct tdb_context *tdb) +{ + /* FIXME */ + return false; +} + +static int tdb_nest_unlock(struct tdb_context *tdb, tdb_off_t off, int ltype) +{ + int ret = -1; + struct tdb_lock_type *lck; + + if (tdb->flags & TDB_NOLOCK) + return 0; + + lck = find_nestlock(tdb, off); + if ((lck == NULL) || (lck->count == 0)) { + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_unlock: no lock for %llu\n", (long long)off); + return -1; + } + + if (lck->count > 1) { + lck->count--; + return 0; + } + + /* + * This lock has count==1 left, so we need to unlock it in the + * kernel. We don't bother with decrementing the in-memory array + * element, we're about to overwrite it with the last array element + * anyway. + */ + ret = tdb_brunlock(tdb, ltype, off, 1); + + /* + * Shrink the array by overwriting the element just unlocked with the + * last array element. + */ + *lck = tdb->lockrecs[--tdb->num_lockrecs]; + + if (tdb->num_lockrecs == 0) { + /* If we're not holding any locks, header can change. */ + tdb->header_uptodate = false; + } + + return ret; +} + +#if 0 +/* + get the transaction lock + */ +int tdb_transaction_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags lockflags) +{ + return tdb_nest_lock(tdb, TRANSACTION_LOCK, ltype, lockflags); +} + +/* + release the transaction lock + */ +int tdb_transaction_unlock(struct tdb_context *tdb, int ltype) +{ + return tdb_nest_unlock(tdb, TRANSACTION_LOCK, ltype, false); +} +#endif + +/* We only need to lock individual bytes, but Linux merges consecutive locks + * so we lock in contiguous ranges. */ +static int tdb_lock_gradual(struct tdb_context *tdb, + int ltype, enum tdb_lock_flags flags, + tdb_off_t off, tdb_off_t len) +{ + int ret; + enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT); + + if (len <= 4) { + /* Single record. Just do blocking lock. */ + return tdb_brlock(tdb, ltype, off, len, flags); + } + + /* First we try non-blocking. */ + ret = tdb_brlock(tdb, ltype, off, len, nb_flags); + if (ret == 0) { + return 0; + } + + /* Try locking first half, then second. */ + ret = tdb_lock_gradual(tdb, ltype, flags, off, len / 2); + if (ret == -1) + return -1; + + ret = tdb_lock_gradual(tdb, ltype, flags, + off + len / 2, len - len / 2); + if (ret == -1) { + tdb_brunlock(tdb, ltype, off, len / 2); + return -1; + } + return 0; +} + +/* lock/unlock entire database. It can only be upgradable if you have some + * other way of guaranteeing exclusivity (ie. transaction write lock). + * Note that we don't lock the free chains: noone can get those locks + * without a hash chain lock first. */ +int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags, bool upgradable) +{ + tdb_off_t hash_size; + + /* FIXME: There are no locks on read-only dbs */ + if (tdb->read_only) { + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_allrecord_lock: read-only\n"); + return -1; + } + + if (tdb->allrecord_lock.count && tdb->allrecord_lock.ltype == ltype) { + tdb->allrecord_lock.count++; + return 0; + } + + if (tdb->allrecord_lock.count) { + /* a global lock of a different type exists */ + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_allrecord_lock: already have %s lock\n", + tdb->allrecord_lock.ltype == F_RDLCK + ? "read" : "write"); + return -1; + } + + if (tdb_has_locks(tdb)) { + /* can't combine global and chain locks */ + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_allrecord_lock: already have chain lock\n"); + return -1; + } + + if (upgradable && ltype != F_RDLCK) { + /* tdb error: you can't upgrade a write lock! */ + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_allrecord_lock: can't upgrade a write lock\n"); + return -1; + } + + /* Lock all the hash buckets. */ +again: + hash_size = (1ULL << tdb->header.v.hash_bits); + if (tdb_lock_gradual(tdb, ltype, TDB_HASH_LOCK_START, + 1ULL << tdb->header.v.hash_bits, flags)) { + if (!(flags & TDB_LOCK_PROBE)) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_lockall hashes failed (%s)\n", + strerror(errno)); + } + return -1; + } + + /* Now we re-check header, holding lock. */ + if (unlikely(update_header(tdb))) { + tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size); + goto again; + } + + /* Now check for needing recovery. */ + if (unlikely(tdb_needs_recovery(tdb))) { + tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size); + if (tdb_lock_and_recover(tdb) == -1) { + return -1; + } + goto again; + } + + + tdb->allrecord_lock.count = 1; + /* If it's upgradable, it's actually exclusive so we can treat + * it as a write lock. */ + tdb->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype; + tdb->allrecord_lock.off = upgradable; + return 0; +} + +int tdb_lock_open(struct tdb_context *tdb) +{ + return tdb_nest_lock(tdb, TDB_OPEN_LOCK, F_WRLCK, TDB_LOCK_WAIT); +} + +void tdb_unlock_open(struct tdb_context *tdb) +{ + tdb_nest_unlock(tdb, TDB_OPEN_LOCK, F_WRLCK); +} + +/* unlock entire db */ +int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype) +{ + tdb_off_t hash_size; + + /* FIXME: There are no locks on read-only dbs */ + if (tdb->read_only) { + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_allrecord_unlock: read-only\n"); + return -1; + } + + if (tdb->allrecord_lock.count == 0) { + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_allrecord_unlock: not locked!\n"); + return -1; + } + + /* Upgradable locks are marked as write locks. */ + if (tdb->allrecord_lock.ltype != ltype + && (!tdb->allrecord_lock.off || ltype != F_RDLCK)) { + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_allrecord_unlock: have %s lock\n", + tdb->allrecord_lock.ltype == F_RDLCK + ? "read" : "write"); + return -1; + } + + if (tdb->allrecord_lock.count > 1) { + tdb->allrecord_lock.count--; + return 0; + } + + tdb->allrecord_lock.count = 0; + tdb->allrecord_lock.ltype = 0; + + hash_size = (1ULL << tdb->header.v.hash_bits); + + return tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, hash_size); +} + +bool tdb_has_locks(struct tdb_context *tdb) +{ + return tdb->allrecord_lock.count || tdb->num_lockrecs; +} + +#if 0 +/* lock entire database with write lock */ +int tdb_lockall(struct tdb_context *tdb) +{ + tdb_trace(tdb, "tdb_lockall"); + return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); +} + +/* lock entire database with write lock - nonblocking varient */ +int tdb_lockall_nonblock(struct tdb_context *tdb) +{ + int ret = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_NOWAIT, false); + tdb_trace_ret(tdb, "tdb_lockall_nonblock", ret); + return ret; +} + +/* unlock entire database with write lock */ +int tdb_unlockall(struct tdb_context *tdb) +{ + tdb_trace(tdb, "tdb_unlockall"); + return tdb_allrecord_unlock(tdb, F_WRLCK); +} + +/* lock entire database with read lock */ +int tdb_lockall_read(struct tdb_context *tdb) +{ + tdb_trace(tdb, "tdb_lockall_read"); + return tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false); +} + +/* lock entire database with read lock - nonblock varient */ +int tdb_lockall_read_nonblock(struct tdb_context *tdb) +{ + int ret = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_NOWAIT, false); + tdb_trace_ret(tdb, "tdb_lockall_read_nonblock", ret); + return ret; +} + +/* unlock entire database with read lock */ +int tdb_unlockall_read(struct tdb_context *tdb) +{ + tdb_trace(tdb, "tdb_unlockall_read"); + return tdb_allrecord_unlock(tdb, F_RDLCK); +} +#endif + +int tdb_lock_list(struct tdb_context *tdb, tdb_off_t list, + int ltype, enum tdb_lock_flags waitflag) +{ + /* a allrecord lock allows us to avoid per chain locks */ + if (tdb->allrecord_lock.count && + (ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) { + return 0; + } + + if (tdb->allrecord_lock.count) { + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_lock_list: have %s allrecordlock\n", + tdb->allrecord_lock.ltype == F_RDLCK + ? "read" : "write"); + return -1; + } + + /* FIXME: Should we do header_uptodate and return retry here? */ + return tdb_nest_lock(tdb, TDB_HASH_LOCK_START + list, ltype, waitflag); +} + +int tdb_unlock_list(struct tdb_context *tdb, tdb_off_t list, int ltype) +{ + /* a allrecord lock allows us to avoid per chain locks */ + if (tdb->allrecord_lock.count) { + if (tdb->allrecord_lock.ltype == F_RDLCK + && ltype == F_WRLCK) { + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "tdb_unlock_list RO allrecord!\n"); + return -1; + } + return 0; + } else { + return tdb_nest_unlock(tdb, TDB_HASH_LOCK_START + list, ltype); + } +} + +/* Free list locks come after hash locks */ +int tdb_lock_free_list(struct tdb_context *tdb, tdb_off_t flist, + enum tdb_lock_flags waitflag) +{ + /* You're supposed to have a hash lock first! */ + if (!tdb_has_locks(tdb)) { + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "tdb_lock_free_list without lock!\n"); + return -1; + } + + /* a allrecord lock allows us to avoid per chain locks */ + if (tdb->allrecord_lock.count) { + if (tdb->allrecord_lock.ltype == F_WRLCK) + return 0; + tdb->ecode = TDB_ERR_LOCK; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "tdb_lock_free_list with RO allrecordlock!\n"); + return -1; + } + + return tdb_nest_lock(tdb, TDB_HASH_LOCK_START + + (1ULL << tdb->header.v.hash_bits) + + flist, F_WRLCK, waitflag); +} + +void tdb_unlock_free_list(struct tdb_context *tdb, tdb_off_t flist) +{ + if (tdb->allrecord_lock.count) + return; + + tdb_nest_unlock(tdb, TDB_HASH_LOCK_START + + (1ULL << tdb->header.v.hash_bits) + + flist, F_WRLCK); +} + +#if 0 +static int chainlock_loop(struct tdb_context *tdb, const TDB_DATA *key, + int ltype, enum tdb_lock_flags waitflag, + const char *func) +{ + int ret; + uint64_t h = tdb_hash(tdb, key->dptr, key->dsize); + +again: + ret = tdb_lock_list(tdb, + h & ((1ULL << tdb->header.v.hash_bits) - 1), + ltype, waitflag); + if (likely(ret == 0) && unlikely(update_header(tdb))) { + tdb_unlock_list(tdb, h & ((1ULL << tdb->header.v.hash_bits)-1), + ltype); + goto again; + } + + tdb_trace_1rec(tdb, func, *key); + return ret; +} + +/* lock/unlock one hash chain. This is meant to be used to reduce + contention - it cannot guarantee how many records will be locked */ +int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key) +{ + return chainlock_loop(tdb, &key, F_WRLCK, TDB_LOCK_WAIT, + "tdb_chainlock"); +} + +/* lock/unlock one hash chain, non-blocking. This is meant to be used + to reduce contention - it cannot guarantee how many records will be + locked */ +int tdb_chainlock_nonblock(struct tdb_context *tdb, TDB_DATA key) +{ + return chainlock_loop(tdb, &key, F_WRLCK, TDB_LOCK_NOWAIT, + "tdb_chainlock_nonblock"); +} + +int tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key) +{ + uint64_t h = tdb_hash(tdb, key.dptr, key.dsize); + tdb_trace_1rec(tdb, "tdb_chainunlock", key); + return tdb_unlock_list(tdb, h & ((1ULL << tdb->header.v.hash_bits)-1), + F_WRLCK); +} + +int tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key) +{ + return chainlock_loop(tdb, &key, F_RDLCK, TDB_LOCK_WAIT, + "tdb_chainlock_read"); +} + +int tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key) +{ + uint64_t h = tdb_hash(tdb, key.dptr, key.dsize); + tdb_trace_1rec(tdb, "tdb_chainunlock_read", key); + return tdb_unlock_list(tdb, h & ((1ULL << tdb->header.v.hash_bits)-1), + F_RDLCK); +} + +/* record lock stops delete underneath */ +int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off) +{ + if (tdb->allrecord_lock.count) { + return 0; + } + return off ? tdb_brlock(tdb, F_RDLCK, off, 1, TDB_LOCK_WAIT) : 0; +} + +/* + Write locks override our own fcntl readlocks, so check it here. + Note this is meant to be F_SETLK, *not* F_SETLKW, as it's not + an error to fail to get the lock here. +*/ +int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off) +{ + struct tdb_traverse_lock *i; + for (i = &tdb->travlocks; i; i = i->next) + if (i->off == off) + return -1; + if (tdb->allrecord_lock.count) { + if (tdb->allrecord_lock.ltype == F_WRLCK) { + return 0; + } + return -1; + } + return tdb_brlock(tdb, F_WRLCK, off, 1, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE); +} + +int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off) +{ + if (tdb->allrecord_lock.count) { + return 0; + } + return tdb_brunlock(tdb, F_WRLCK, off, 1); +} + +/* fcntl locks don't stack: avoid unlocking someone else's */ +int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off) +{ + struct tdb_traverse_lock *i; + uint32_t count = 0; + + if (tdb->allrecord_lock.count) { + return 0; + } + + if (off == 0) + return 0; + for (i = &tdb->travlocks; i; i = i->next) + if (i->off == off) + count++; + return (count == 1 ? tdb_brunlock(tdb, F_RDLCK, off, 1) : 0); +} + +/* The transaction code uses this to remove all locks. */ +void tdb_release_transaction_locks(struct tdb_context *tdb) +{ + unsigned int i; + + if (tdb->allrecord_lock.count != 0) { + tdb_off_t hash_size, free_size; + + hash_size = (1ULL << tdb->header.v.hash_bits) + * sizeof(tdb_off_t); + free_size = tdb->header.v.free_zones + * (tdb->header.v.free_buckets + 1) * sizeof(tdb_off_t); + + tdb_brunlock(tdb, tdb->allrecord_lock.ltype, + tdb->header.v.hash_off, hash_size); + tdb_brunlock(tdb, tdb->allrecord_lock.ltype, + tdb->header.v.free_off, free_size); + tdb->allrecord_lock.count = 0; + tdb->allrecord_lock.ltype = 0; + } + + for (i = 0; inum_lockrecs; i++) { + struct tdb_lock_type *lck = &tdb->lockrecs[i]; + + tdb_brunlock(tdb, lck->ltype, lck->off, 1); + } + tdb->num_lockrecs = 0; + SAFE_FREE(tdb->lockrecs); + tdb->header_uptodate = false; +} +#endif diff --git a/ccan/tdb2/private.h b/ccan/tdb2/private.h new file mode 100644 index 00000000..1fe15635 --- /dev/null +++ b/ccan/tdb2/private.h @@ -0,0 +1,456 @@ +#ifndef TDB_PRIVATE_H +#define TDB_PRIVATE_H + /* + Trivial Database 2: private types and prototypes + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#define _XOPEN_SOURCE 500 +#define _FILE_OFFSET_BITS 64 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "config.h" +#include +#include +#ifdef HAVE_BYTESWAP_H +#include +#endif + +#ifndef TEST_IT +#define TEST_IT(cond) +#endif + +/* #define TDB_TRACE 1 */ + +#ifndef __STRING +#define __STRING(x) #x +#endif + +#ifndef __STRINGSTRING +#define __STRINGSTRING(x) __STRING(x) +#endif + +#ifndef __location__ +#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__) +#endif + +typedef uint64_t tdb_len_t; +typedef uint64_t tdb_off_t; + +#ifndef offsetof +#define offsetof(t,f) ((unsigned int)&((t *)0)->f) +#endif + +#define TDB_MAGIC_FOOD "TDB file\n" +#define TDB_VERSION ((uint64_t)(0x26011967 + 7)) +#define TDB_MAGIC ((uint64_t)0x1999) +#define TDB_FREE_MAGIC (~(uint64_t)TDB_MAGIC) +#define TDB_HASH_MAGIC (0xA1ABE11A01092008ULL) +#define TDB_RECOVERY_MAGIC (0xf53bc0e7U) +#define TDB_RECOVERY_INVALID_MAGIC (0x0) +#define TDB_EXTRA_HASHBITS (11) /* We steal 11 bits to stash hash info. */ +#define TDB_EXTRA_HASHBITS_NUM (3) + +#define TDB_OFF_ERR ((tdb_off_t)-1) + +/* Prevent others from opening the file. */ +#define TDB_OPEN_LOCK 0 +/* Doing a transaction. */ +#define TDB_TRANSACTION_LOCK 1 +/* Hash chain locks. */ +#define TDB_HASH_LOCK_START 2 + +/* We start wih 256 hash buckets, 10 free buckets. A 1k-sized zone. */ +#define INITIAL_HASH_BITS 8 +#define INITIAL_FREE_BUCKETS 10 +#define INITIAL_ZONE_BITS 10 + +#if !HAVE_BSWAP_64 +static inline uint64_t bswap_64(uint64_t x) +{ + return (((x&0x000000FFULL)<<56) + | ((x&0x0000FF00ULL)<<48) + | ((x&0x00FF0000ULL)<<40) + | ((x&0xFF000000ULL)<<32) + | ((x>>8)&0xFF000000ULL) + | ((x>>16)&0x00FF0000ULL) + | ((x>>24)&0x0000FF00ULL) + | ((x>>32)&0x000000FFULL)); +} +#endif + +struct tdb_used_record { + /* For on-disk compatibility, we avoid bitfields: + magic: 16, (highest) + key_len_bits: 5, + hash:11, + extra_padding: 32 (lowest) + */ + uint64_t magic_and_meta; + /* The bottom key_len_bits*2 are key length, rest is data length. */ + uint64_t key_and_data_len; +}; + +static inline unsigned rec_key_bits(const struct tdb_used_record *r) +{ + return ((r->magic_and_meta >> 43) & ((1 << 5)-1)) * 2; +} + +static inline uint64_t rec_key_length(const struct tdb_used_record *r) +{ + return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1); +} + +static inline uint64_t rec_data_length(const struct tdb_used_record *r) +{ + return r->key_and_data_len >> rec_key_bits(r); +} + +static inline uint64_t rec_extra_padding(const struct tdb_used_record *r) +{ + return r->magic_and_meta & 0xFFFFFFFF; +} + +static inline uint64_t rec_hash(const struct tdb_used_record *r) +{ + return ((r->magic_and_meta >> 32) & ((1ULL << 11) - 1)) << (64 - 11); +} + +static inline uint16_t rec_magic(const struct tdb_used_record *r) +{ + return (r->magic_and_meta >> 48); +} + +struct tdb_free_record { + uint64_t magic; + uint64_t data_len; /* Not counting these two fields. */ + /* This is why the minimum record size is 16 bytes. */ + uint64_t next, prev; +}; + +/* These parts can change while we have db open. */ +struct tdb_header_volatile { + uint64_t generation; /* Makes sure it changes on every update. */ + uint64_t hash_bits; /* Entries in hash table. */ + uint64_t hash_off; /* Offset of hash table. */ + uint64_t num_zones; /* How many zones in the file. */ + uint64_t zone_bits; /* Size of zones. */ + uint64_t free_buckets; /* How many buckets in each zone. */ + uint64_t free_off; /* Arrays of free entries. */ +}; + +/* this is stored at the front of every database */ +struct tdb_header { + char magic_food[32]; /* for /etc/magic */ + uint64_t version; /* version of the code */ + uint64_t hash_test; /* result of hashing HASH_MAGIC. */ + uint64_t hash_seed; /* "random" seed written at creation time. */ + + struct tdb_header_volatile v; + + tdb_off_t reserved[19]; +}; + +enum tdb_lock_flags { + /* WAIT == F_SETLKW, NOWAIT == F_SETLK */ + TDB_LOCK_NOWAIT = 0, + TDB_LOCK_WAIT = 1, + /* If set, don't log an error on failure. */ + TDB_LOCK_PROBE = 2, +}; + +struct tdb_lock_type { + uint32_t off; + uint32_t count; + uint32_t ltype; +}; + +struct tdb_context { + /* Filename of the database. */ + const char *name; + + /* Mmap (if any), or malloc (for TDB_INTERNAL). */ + void *map_ptr; + + /* Open file descriptor (undefined for TDB_INTERNAL). */ + int fd; + + /* How much space has been mapped (<= current file size) */ + tdb_len_t map_size; + + /* Opened read-only? */ + bool read_only; + + /* Error code for last tdb error. */ + enum TDB_ERROR ecode; + + /* A cached copy of the header */ + struct tdb_header header; + /* (for debugging). */ + bool header_uptodate; + + /* the flags passed to tdb_open, for tdb_reopen. */ + uint32_t flags; + + /* Logging function */ + tdb_logfn_t log; + void *log_priv; + + /* Hash function. */ + tdb_hashfn_t khash; + void *hash_priv; + + /* What zone of the tdb to use, for spreading load. */ + uint64_t last_zone; + + /* IO methods: changes for transactions. */ + const struct tdb_methods *methods; + + /* Lock information */ + struct tdb_lock_type allrecord_lock; + uint64_t num_lockrecs; + struct tdb_lock_type *lockrecs; + + /* Set if we are in a transaction. */ + struct tdb_transaction *transaction; + + /* Single list of all TDBs, to avoid multiple opens. */ + struct tdb_context *next; + dev_t device; + ino_t inode; +}; + +struct tdb_methods { + int (*read)(struct tdb_context *, tdb_off_t, void *, tdb_len_t); + int (*write)(struct tdb_context *, tdb_off_t, const void *, tdb_len_t); + int (*oob)(struct tdb_context *, tdb_off_t, bool); + int (*expand_file)(struct tdb_context *, tdb_len_t, tdb_len_t); +}; + +/* + internal prototypes +*/ +/* tdb.c: */ +/* Returns true if header changed. */ +bool update_header(struct tdb_context *tdb); + +/* Hash random memory. */ +uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len); + + +/* free.c: */ +uint64_t random_free_zone(struct tdb_context *tdb); + +/* If this fails, try tdb_expand. */ +tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen, + uint64_t hash, bool growing); + +/* Put this record in a free list. */ +int add_free_record(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len_with_header); + +/* Set up header for a used record. */ +int set_header(struct tdb_context *tdb, + struct tdb_used_record *rec, + uint64_t keylen, uint64_t datalen, + uint64_t actuallen, uint64_t hash); + +/* Used by tdb_check to verify. */ +unsigned int size_to_bucket(struct tdb_context *tdb, tdb_len_t data_len); +tdb_off_t zone_of(struct tdb_context *tdb, tdb_off_t off); + +/* io.c: */ +/* Initialize tdb->methods. */ +void tdb_io_init(struct tdb_context *tdb); + +/* Convert endian of the buffer if required. */ +void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size); + +/* Unmap and try to map the tdb. */ +void tdb_munmap(struct tdb_context *tdb); +void tdb_mmap(struct tdb_context *tdb); + +/* Hand data to a function, direct if possible */ +int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key, + tdb_off_t offset, tdb_len_t len, + int (*parser)(TDB_DATA key, TDB_DATA data, + void *private_data), + void *private_data); + +/* Either make a copy into pad and return that, or return ptr into mmap. + * Converts endian (ie. will use pad in that case). */ +void *tdb_get(struct tdb_context *tdb, tdb_off_t off, void *pad, size_t len); + +/* Either alloc a copy, or give direct access. Release frees or noop. */ +const void *tdb_access_read(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len); +void tdb_access_release(struct tdb_context *tdb, const void *p); + +/* Convenience routine to get an offset. */ +tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off); + +/* Write an offset at an offset. */ +int tdb_write_off(struct tdb_context *tdb, tdb_off_t off, tdb_off_t val); + +/* Clear an ondisk area. */ +int zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len); + +/* Return a non-zero offset in this array, or num. */ +tdb_off_t tdb_find_nonzero_off(struct tdb_context *tdb, tdb_off_t off, + uint64_t num); + +/* Return a zero offset in this array, or num. */ +tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off, + uint64_t num); + +/* Even on files, we can get partial writes due to signals. */ +bool tdb_pwrite_all(int fd, const void *buf, size_t len, tdb_off_t off); + +/* Allocate and make a copy of some offset. */ +void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len); + +/* Munges record and writes it */ +int tdb_write_convert(struct tdb_context *tdb, tdb_off_t off, + void *rec, size_t len); + +/* Reads record and converts it */ +int tdb_read_convert(struct tdb_context *tdb, tdb_off_t off, + void *rec, size_t len); + +/* Hash on disk. */ +uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off); + +/* lock.c: */ +/* Lock/unlock a particular hash list. */ +int tdb_lock_list(struct tdb_context *tdb, tdb_off_t list, + int ltype, enum tdb_lock_flags waitflag); +int tdb_unlock_list(struct tdb_context *tdb, tdb_off_t list, int ltype); + +/* Lock/unlock a particular free list. */ +int tdb_lock_free_list(struct tdb_context *tdb, tdb_off_t flist, + enum tdb_lock_flags waitflag); +void tdb_unlock_free_list(struct tdb_context *tdb, tdb_off_t flist); + +/* Do we have any locks? */ +bool tdb_has_locks(struct tdb_context *tdb); + +/* Lock entire database. */ +int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags, bool upgradable); +int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype); + +/* Serialize db open. */ +int tdb_lock_open(struct tdb_context *tdb); +void tdb_unlock_open(struct tdb_context *tdb); +/* Expand the file. */ +int tdb_expand(struct tdb_context *tdb, tdb_len_t klen, tdb_len_t dlen, + bool growing); + +#if 0 +/* Low-level locking primitives. */ +int tdb_nest_lock(struct tdb_context *tdb, tdb_off_t offset, int ltype, + enum tdb_lock_flags flags); +int tdb_nest_unlock(struct tdb_context *tdb, tdb_off_t offset, int ltype); + +int tdb_munmap(struct tdb_context *tdb); +void tdb_mmap(struct tdb_context *tdb); +int tdb_lock(struct tdb_context *tdb, int list, int ltype); +int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype); +bool tdb_have_locks(struct tdb_context *tdb); +int tdb_unlock(struct tdb_context *tdb, int list, int ltype); +int tdb_brlock(struct tdb_context *tdb, + int rw_type, tdb_off_t offset, size_t len, + enum tdb_lock_flags flags); +int tdb_brunlock(struct tdb_context *tdb, + int rw_type, tdb_off_t offset, size_t len); +bool tdb_have_extra_locks(struct tdb_context *tdb); +void tdb_release_extra_locks(struct tdb_context *tdb); +int tdb_transaction_lock(struct tdb_context *tdb, int ltype); +int tdb_transaction_unlock(struct tdb_context *tdb, int ltype); +int tdb_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags, bool upgradable); +int tdb_allrecord_unlock(struct tdb_context *tdb, int ltype); +int tdb_allrecord_upgrade(struct tdb_context *tdb); +int tdb_write_lock_record(struct tdb_context *tdb, tdb_off_t off); +int tdb_write_unlock_record(struct tdb_context *tdb, tdb_off_t off); +int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); +int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); +int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec); +tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec); +int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); +int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); +int tdb_lock_record(struct tdb_context *tdb, tdb_off_t off); +int tdb_unlock_record(struct tdb_context *tdb, tdb_off_t off); +bool tdb_needs_recovery(struct tdb_context *tdb); +int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec); +int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec); +int tdb_do_delete(struct tdb_context *tdb, tdb_off_t rec_ptr, struct tdb_record *rec); +unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len); +int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key, + tdb_off_t offset, tdb_len_t len, + int (*parser)(TDB_DATA key, TDB_DATA data, + void *private_data), + void *private_data); +tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype, + struct tdb_record *rec); +void tdb_io_init(struct tdb_context *tdb); +int tdb_expand(struct tdb_context *tdb, tdb_off_t size); +int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, + struct tdb_record *rec); +#endif + +#ifdef TDB_TRACE +void tdb_trace(struct tdb_context *tdb, const char *op); +void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op); +void tdb_trace_open(struct tdb_context *tdb, const char *op, + unsigned hash_size, unsigned tdb_flags, unsigned open_flags); +void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret); +void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret); +void tdb_trace_1rec(struct tdb_context *tdb, const char *op, + TDB_DATA rec); +void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op, + TDB_DATA rec, int ret); +void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op, + TDB_DATA rec, TDB_DATA ret); +void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op, + TDB_DATA rec1, TDB_DATA rec2, unsigned flag, + int ret); +void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op, + TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret); +#else +#define tdb_trace(tdb, op) +#define tdb_trace_seqnum(tdb, seqnum, op) +#define tdb_trace_open(tdb, op, hash_size, tdb_flags, open_flags) +#define tdb_trace_ret(tdb, op, ret) +#define tdb_trace_retrec(tdb, op, ret) +#define tdb_trace_1rec(tdb, op, rec) +#define tdb_trace_1rec_ret(tdb, op, rec, ret) +#define tdb_trace_1rec_retrec(tdb, op, rec, ret) +#define tdb_trace_2rec_flag_ret(tdb, op, rec1, rec2, flag, ret) +#define tdb_trace_2rec_retrec(tdb, op, rec1, rec2, ret) +#endif /* !TDB_TRACE */ + +#endif diff --git a/ccan/tdb2/tdb.c b/ccan/tdb2/tdb.c new file mode 100644 index 00000000..3cee472c --- /dev/null +++ b/ccan/tdb2/tdb.c @@ -0,0 +1,875 @@ +#include "private.h" +#include +#include +#include +#include + +/* The null return. */ +struct tdb_data tdb_null = { .dptr = NULL, .dsize = 0 }; + +/* all contexts, to ensure no double-opens (fcntl locks don't nest!) */ +static struct tdb_context *tdbs = NULL; + +PRINTF_ATTRIBUTE(4, 5) static void +null_log_fn(struct tdb_context *tdb, + enum tdb_debug_level level, void *priv, + const char *fmt, ...) +{ +} + +/* We do a lot of work assuming our copy of the header volatile area + * is uptodate, and usually it is. However, once we grab a lock, we have to + * re-check it. */ +bool update_header(struct tdb_context *tdb) +{ + struct tdb_header_volatile pad, *v; + + if (tdb->header_uptodate) { + tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv, + "warning: header uptodate already\n"); + } + + /* We could get a partial update if we're not holding any locks. */ + assert(tdb_has_locks(tdb)); + + v = tdb_get(tdb, offsetof(struct tdb_header, v), &pad, sizeof(*v)); + if (!v) { + /* On failure, imply we updated header so they retry. */ + return true; + } + tdb->header_uptodate = true; + if (likely(memcmp(&tdb->header.v, v, sizeof(*v)) == 0)) { + return false; + } + tdb->header.v = *v; + return true; +} + +static uint64_t jenkins_hash(const void *key, size_t length, uint64_t seed, + void *arg) +{ + return hash64_any(key, length, seed); +} + +uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len) +{ + return tdb->khash(ptr, len, tdb->header.hash_seed, tdb->hash_priv); +} + +static bool tdb_already_open(dev_t device, ino_t ino) +{ + struct tdb_context *i; + + for (i = tdbs; i; i = i->next) { + if (i->device == device && i->inode == ino) { + return true; + } + } + + return false; +} + +static uint64_t random_number(struct tdb_context *tdb) +{ + int fd; + uint64_t ret = 0; + struct timeval now; + + fd = open("/dev/urandom", O_RDONLY); + if (fd >= 0) { + if (read(fd, &ret, sizeof(ret)) == sizeof(ret)) { + tdb->log(tdb, TDB_DEBUG_TRACE, tdb->log_priv, + "tdb_open: random from /dev/urandom\n"); + close(fd); + return ret; + } + close(fd); + } + /* FIXME: Untested! Based on Wikipedia protocol description! */ + fd = open("/dev/egd-pool", O_RDWR); + if (fd >= 0) { + /* Command is 1, next byte is size we want to read. */ + char cmd[2] = { 1, sizeof(uint64_t) }; + if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) { + char reply[1 + sizeof(uint64_t)]; + int r = read(fd, reply, sizeof(reply)); + if (r > 1) { + tdb->log(tdb, TDB_DEBUG_TRACE, tdb->log_priv, + "tdb_open: %u random bytes from" + " /dev/egd-pool\n", r-1); + /* Copy at least some bytes. */ + memcpy(&ret, reply+1, r - 1); + if (reply[0] == sizeof(uint64_t) + && r == sizeof(reply)) { + close(fd); + return ret; + } + } + } + close(fd); + } + + /* Fallback: pid and time. */ + gettimeofday(&now, NULL); + ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec; + tdb->log(tdb, TDB_DEBUG_TRACE, tdb->log_priv, + "tdb_open: random from getpid and time\n"); + return ret; +} + +struct new_database { + struct tdb_header hdr; + struct tdb_used_record hrec; + tdb_off_t hash[1ULL << INITIAL_HASH_BITS]; + struct tdb_used_record frec; + tdb_off_t free[INITIAL_FREE_BUCKETS + 1]; /* One overflow bucket */ +}; + +/* initialise a new database */ +static int tdb_new_database(struct tdb_context *tdb) +{ + /* We make it up in memory, then write it out if not internal */ + struct new_database newdb; + + /* Fill in the header */ + newdb.hdr.version = TDB_VERSION; + newdb.hdr.hash_seed = random_number(tdb); + newdb.hdr.hash_test = TDB_HASH_MAGIC; + newdb.hdr.hash_test = tdb->khash(&newdb.hdr.hash_test, + sizeof(newdb.hdr.hash_test), + newdb.hdr.hash_seed, + tdb->hash_priv); + + newdb.hdr.v.generation = 0; + + /* Free array has 1 zone, 10 buckets. All buckets empty. */ + newdb.hdr.v.num_zones = 1; + newdb.hdr.v.zone_bits = INITIAL_ZONE_BITS; + newdb.hdr.v.free_buckets = INITIAL_FREE_BUCKETS; + newdb.hdr.v.free_off = offsetof(struct new_database, free); + set_header(tdb, &newdb.frec, 0, + sizeof(newdb.free), sizeof(newdb.free), 0); + memset(newdb.free, 0, sizeof(newdb.free)); + + /* Initial hashes are empty. */ + newdb.hdr.v.hash_bits = INITIAL_HASH_BITS; + newdb.hdr.v.hash_off = offsetof(struct new_database, hash); + set_header(tdb, &newdb.hrec, 0, + sizeof(newdb.hash), sizeof(newdb.hash), 0); + memset(newdb.hash, 0, sizeof(newdb.hash)); + + if (tdb->flags & TDB_INTERNAL) { + tdb->map_size = sizeof(newdb); + tdb->map_ptr = malloc(tdb->map_size); + if (!tdb->map_ptr) { + tdb->ecode = TDB_ERR_OOM; + return -1; + } + memcpy(tdb->map_ptr, &newdb, tdb->map_size); + tdb->header = newdb.hdr; + /* Convert the `ondisk' version if asked. */ + tdb_convert(tdb, tdb->map_ptr, sizeof(newdb)); + return 0; + } + if (lseek(tdb->fd, 0, SEEK_SET) == -1) + return -1; + + if (ftruncate(tdb->fd, 0) == -1) + return -1; + + /* This creates an endian-converted header, as if read from disk */ + tdb->header = newdb.hdr; + tdb_convert(tdb, &tdb->header, sizeof(tdb->header)); + + /* Don't endian-convert the magic food! */ + memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food)); + strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD); + + if (!tdb_pwrite_all(tdb->fd, &newdb, sizeof(newdb), 0)) { + tdb->ecode = TDB_ERR_IO; + return -1; + } + return 0; +} + +struct tdb_context *tdb_open(const char *name, int tdb_flags, + int open_flags, mode_t mode, + union tdb_attribute *attr) +{ + struct tdb_context *tdb; + struct stat st; + int save_errno; + uint64_t hash_test; + unsigned v; + + if (!(tdb = (struct tdb_context *)calloc(1, sizeof *tdb))) { + /* Can't log this */ + errno = ENOMEM; + goto fail; + } + tdb->fd = -1; + tdb->name = NULL; + tdb->map_ptr = NULL; + tdb->flags = tdb_flags; + tdb->log = null_log_fn; + tdb->log_priv = NULL; + tdb->khash = jenkins_hash; + tdb->hash_priv = NULL; + + /* FIXME */ + if (attr) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_open: attributes not yet supported\n"); + errno = EINVAL; + goto fail; + } + + if ((open_flags & O_ACCMODE) == O_WRONLY) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_open: can't open tdb %s write-only\n", name); + errno = EINVAL; + goto fail; + } + + if ((open_flags & O_ACCMODE) == O_RDONLY) { + tdb->read_only = 1; + /* read only databases don't do locking */ + tdb->flags |= TDB_NOLOCK; + } + + /* internal databases don't mmap or lock */ + if (tdb->flags & TDB_INTERNAL) { + tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP); + if (tdb_new_database(tdb) != 0) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_open: tdb_new_database failed!"); + goto fail; + } + TEST_IT(tdb->flags & TDB_CONVERT); + goto internal; + } + + if ((tdb->fd = open(name, open_flags, mode)) == -1) { + tdb->log(tdb, TDB_DEBUG_WARNING, tdb->log_priv, + "tdb_open: could not open file %s: %s\n", + name, strerror(errno)); + goto fail; /* errno set by open(2) */ + } + + /* on exec, don't inherit the fd */ + v = fcntl(tdb->fd, F_GETFD, 0); + fcntl(tdb->fd, F_SETFD, v | FD_CLOEXEC); + + /* ensure there is only one process initialising at once */ + if (tdb_lock_open(tdb) == -1) { + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_open: failed to get open lock on %s: %s\n", + name, strerror(errno)); + goto fail; /* errno set by tdb_brlock */ + } + + errno = 0; + if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header) + || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0) { + if (!(open_flags & O_CREAT) || tdb_new_database(tdb) == -1) { + if (errno == 0) { + errno = EIO; /* ie bad format or something */ + } + goto fail; + } + } else if (tdb->header.version != TDB_VERSION) { + if (tdb->header.version == bswap_64(TDB_VERSION)) + tdb->flags |= TDB_CONVERT; + else { + /* wrong version */ + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_open: %s is unknown version 0x%llx\n", + name, (long long)tdb->header.version); + errno = EIO; + goto fail; + } + } + + tdb_convert(tdb, &tdb->header, sizeof(tdb->header)); + hash_test = TDB_HASH_MAGIC; + hash_test = tdb->khash(&hash_test, sizeof(hash_test), + tdb->header.hash_seed, tdb->hash_priv); + if (tdb->header.hash_test != hash_test) { + /* wrong hash variant */ + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_open: %s uses a different hash function\n", + name); + errno = EIO; + goto fail; + } + + if (fstat(tdb->fd, &st) == -1) + goto fail; + + /* Is it already in the open list? If so, fail. */ + if (tdb_already_open(st.st_dev, st.st_ino)) { + /* FIXME */ + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_open: %s (%d,%d) is already open in this process\n", + name, (int)st.st_dev, (int)st.st_ino); + errno = EBUSY; + goto fail; + } + + tdb->name = strdup(name); + if (!tdb->name) { + errno = ENOMEM; + goto fail; + } + + tdb->map_size = st.st_size; + tdb->device = st.st_dev; + tdb->inode = st.st_ino; + tdb_io_init(tdb); + tdb_mmap(tdb); + + internal: + /* Internal (memory-only) databases skip all the code above to + * do with disk files, and resume here by releasing their + * open lock and hooking into the active list. */ + tdb_unlock_open(tdb); + tdb->last_zone = random_free_zone(tdb); + tdb->next = tdbs; + tdbs = tdb; + return tdb; + + fail: + save_errno = errno; + + if (!tdb) + return NULL; + +#ifdef TDB_TRACE + close(tdb->tracefd); +#endif + if (tdb->map_ptr) { + if (tdb->flags & TDB_INTERNAL) { + free(tdb->map_ptr); + } else + tdb_munmap(tdb); + } + free((char *)tdb->name); + if (tdb->fd != -1) + if (close(tdb->fd) != 0) + tdb->log(tdb, TDB_DEBUG_ERROR, tdb->log_priv, + "tdb_open: failed to close tdb->fd" + " on error!\n"); + free(tdb); + errno = save_errno; + return NULL; +} + +static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data) +{ + return memcmp(data.dptr, key.dptr, data.dsize) == 0; +} + +static void unlock_lists(struct tdb_context *tdb, + uint64_t start, uint64_t end, int ltype) +{ + do { + tdb_unlock_list(tdb, start, ltype); + start = (start + ((1ULL << tdb->header.v.hash_bits) - 1)) + & ((1ULL << tdb->header.v.hash_bits) - 1); + } while (start != end); +} + +/* FIXME: Return header copy? */ +/* Returns -1 or offset of entry (0 if not found). + * Locks hash entried from *start to *end (where the entry was found). */ +static tdb_off_t find_bucket_and_lock(struct tdb_context *tdb, + const struct tdb_data *key, + uint64_t hash, + uint64_t *start, + uint64_t *end, + uint64_t *room, + int ltype) +{ + uint64_t hextra; + tdb_off_t off; + + /* hash_bits might be out of date... */ +again: + *start = *end = hash & ((1ULL << tdb->header.v.hash_bits) - 1); + hextra = hash >> tdb->header.v.hash_bits; + + /* FIXME: can we avoid locks for some fast paths? */ + if (tdb_lock_list(tdb, *end, ltype, TDB_LOCK_WAIT) == -1) + return TDB_OFF_ERR; + + /* We only need to check this for first lock. */ + if (unlikely(update_header(tdb))) { + tdb_unlock_list(tdb, *end, ltype); + goto again; + } + + while ((off = tdb_read_off(tdb, tdb->header.v.hash_off + + *end * sizeof(tdb_off_t))) + != TDB_OFF_ERR) { + struct tdb_used_record pad, *r; + uint64_t keylen, next; + + /* Didn't find it? */ + if (!off) + return 0; + +#if 0 /* FIXME: Check other bits. */ + unsigned int bits, bitmask, hoffextra; + /* Bottom three bits show how many extra hash bits. */ + bits = (off & ((1 << TDB_EXTRA_HASHBITS_NUM) - 1)) + 1; + bitmask = (1 << bits)-1; + hoffextra = ((off >> TDB_EXTRA_HASHBITS_NUM) & bitmask); + if ((hextra & bitmask) != hoffextra) + goto lock_next; +#endif + + r = tdb_get(tdb, off, &pad, sizeof(*r)); + if (!r) + goto unlock_err; + + if (rec_magic(r) != TDB_MAGIC) { + tdb->ecode = TDB_ERR_CORRUPT; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "find_bucket_and_lock: bad magic 0x%llx" + " at offset %llu!\n", + (long long)rec_magic(r), (long long)off); + goto unlock_err; + } + + /* FIXME: check extra bits in header! */ + keylen = rec_key_length(r); + if (keylen != key->dsize) + goto lock_next; + + switch (tdb_parse_data(tdb, *key, off + sizeof(*r), key->dsize, + tdb_key_compare, NULL)) { + case 1: + /* Match! */ + *room = rec_data_length(r) + rec_extra_padding(r); + return off >> TDB_EXTRA_HASHBITS_NUM; + case 0: + break; + default: + goto unlock_err; + } + + lock_next: + /* Lock next bucket. */ + /* FIXME: We can deadlock if this wraps! */ + next = (*end + 1) & ((1ULL << tdb->header.v.hash_bits) - 1); + if (next == *start) { + tdb->ecode = TDB_ERR_CORRUPT; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "find_bucket_and_lock: full hash table!\n"); + goto unlock_err; + } + if (tdb_lock_list(tdb, next, ltype, TDB_LOCK_WAIT) == -1) + goto unlock_err; + *end = next; + } + +unlock_err: + TEST_IT(*end < *start); + unlock_lists(tdb, *start, *end, ltype); + return TDB_OFF_ERR; +} + +static int update_rec_hdr(struct tdb_context *tdb, + tdb_off_t off, + tdb_len_t keylen, + tdb_len_t datalen, + tdb_len_t room, + uint64_t h) +{ + struct tdb_used_record rec; + + if (set_header(tdb, &rec, keylen, datalen, room - datalen, h)) + return -1; + + return tdb_write_convert(tdb, off, &rec, sizeof(rec)); +} + +/* If we fail, others will try after us. */ +static void enlarge_hash(struct tdb_context *tdb) +{ + tdb_off_t newoff, i; + uint64_t h, num = 1ULL << tdb->header.v.hash_bits; + struct tdb_used_record pad, *r; + + /* FIXME: We should do this without holding locks throughout. */ + if (tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false) == -1) + return; + + if (unlikely(update_header(tdb))) { + /* Someone else enlarged for us? Nothing to do. */ + if ((1ULL << tdb->header.v.hash_bits) != num) + goto unlock; + } + + newoff = alloc(tdb, 0, num * 2, 0, false); + if (unlikely(newoff == TDB_OFF_ERR)) + goto unlock; + if (unlikely(newoff == 0)) { + if (tdb_expand(tdb, 0, num * 2, false) == -1) + goto unlock; + newoff = alloc(tdb, 0, num * 2, 0, false); + if (newoff == TDB_OFF_ERR || newoff == 0) + goto unlock; + } + + /* FIXME: If the space before is empty, we know this is in its ideal + * location. We can steal a bit from the pointer to avoid rehash. */ + for (i = tdb_find_nonzero_off(tdb, tdb->header.v.hash_off, num); + i < num; + i += tdb_find_nonzero_off(tdb, tdb->header.v.hash_off + + i*sizeof(tdb_off_t), num - i)) { + tdb_off_t off; + off = tdb_read_off(tdb, tdb->header.v.hash_off + + i*sizeof(tdb_off_t)); + if (unlikely(off == TDB_OFF_ERR)) + goto unlock; + if (unlikely(!off)) { + tdb->ecode = TDB_ERR_CORRUPT; + tdb->log(tdb, TDB_DEBUG_FATAL, tdb->log_priv, + "find_bucket_and_lock: zero hash bucket!\n"); + goto unlock; + } + h = hash_record(tdb, off); + /* FIXME: Encode extra hash bits! */ + if (tdb_write_off(tdb, newoff + + (h & ((num * 2) - 1)) * sizeof(uint64_t), + off) == -1) + goto unlock; + } + + /* Free up old hash. */ + r = tdb_get(tdb, tdb->header.v.hash_off, &pad, sizeof(*r)); + if (!r) + goto unlock; + add_free_record(tdb, tdb->header.v.hash_off, + rec_data_length(r) + rec_extra_padding(r)); + + /* Now we write the modified header. */ + tdb->header.v.generation++; + tdb->header.v.hash_bits++; + tdb->header.v.hash_off = newoff; + tdb_write_convert(tdb, offsetof(struct tdb_header, v), + &tdb->header.v, sizeof(tdb->header.v)); +unlock: + tdb_allrecord_unlock(tdb, F_WRLCK); +} + +int tdb_store(struct tdb_context *tdb, + struct tdb_data key, struct tdb_data dbuf, int flag) +{ + tdb_off_t new_off, off, start, end, room; + uint64_t h; + bool growing = false; + + h = tdb_hash(tdb, key.dptr, key.dsize); + off = find_bucket_and_lock(tdb, &key, h, &start, &end, &room, F_WRLCK); + if (off == TDB_OFF_ERR) + return -1; + + /* Now we have lock on this hash bucket. */ + if (flag == TDB_INSERT) { + if (off) { + tdb->ecode = TDB_ERR_EXISTS; + goto fail; + } + } else { + if (off) { + if (room >= key.dsize + dbuf.dsize) { + new_off = off; + if (update_rec_hdr(tdb, off, + key.dsize, dbuf.dsize, + room, h)) + goto fail; + goto write; + } + /* FIXME: See if right record is free? */ + /* Hint to allocator that we've realloced. */ + growing = true; + } else { + if (flag == TDB_MODIFY) { + /* if the record doesn't exist and we + are in TDB_MODIFY mode then we should fail + the store */ + tdb->ecode = TDB_ERR_NOEXIST; + goto fail; + } + } + } + + /* Allocate a new record. */ + new_off = alloc(tdb, key.dsize, dbuf.dsize, h, growing); + if (new_off == 0) { + unlock_lists(tdb, start, end, F_WRLCK); + /* Expand, then try again... */ + if (tdb_expand(tdb, key.dsize, dbuf.dsize, growing) == -1) + return -1; + return tdb_store(tdb, key, dbuf, flag); + } + + /* We didn't like the existing one: remove it. */ + if (off) { + add_free_record(tdb, off, sizeof(struct tdb_used_record) + + key.dsize + room); + } + +write: + off = tdb->header.v.hash_off + end * sizeof(tdb_off_t); + /* FIXME: Encode extra hash bits! */ + if (tdb_write_off(tdb, off, new_off) == -1) + goto fail; + + off = new_off + sizeof(struct tdb_used_record); + if (tdb->methods->write(tdb, off, key.dptr, key.dsize) == -1) + goto fail; + off += key.dsize; + if (tdb->methods->write(tdb, off, dbuf.dptr, dbuf.dsize) == -1) + goto fail; + + /* FIXME: tdb_increment_seqnum(tdb); */ + unlock_lists(tdb, start, end, F_WRLCK); + + /* By simple trial and error, this roughly approximates a 60% + * full measure. */ + if (unlikely(end - start > 4 * tdb->header.v.hash_bits - 32)) + enlarge_hash(tdb); + + return 0; + +fail: + unlock_lists(tdb, start, end, F_WRLCK); + return -1; +} + +struct tdb_data tdb_fetch(struct tdb_context *tdb, struct tdb_data key) +{ + tdb_off_t off, start, end, room; + uint64_t h; + struct tdb_used_record pad, *r; + struct tdb_data ret; + + h = tdb_hash(tdb, key.dptr, key.dsize); + off = find_bucket_and_lock(tdb, &key, h, &start, &end, &room, F_RDLCK); + if (off == TDB_OFF_ERR) + return tdb_null; + + if (!off) { + unlock_lists(tdb, start, end, F_RDLCK); + tdb->ecode = TDB_SUCCESS; + return tdb_null; + } + + r = tdb_get(tdb, off, &pad, sizeof(*r)); + if (!r) { + unlock_lists(tdb, start, end, F_RDLCK); + return tdb_null; + } + + ret.dsize = rec_data_length(r); + ret.dptr = tdb_alloc_read(tdb, off + sizeof(*r) + key.dsize, + ret.dsize); + unlock_lists(tdb, start, end, F_RDLCK); + return ret; +} + +static int hash_add(struct tdb_context *tdb, uint64_t h, tdb_off_t off) +{ + tdb_off_t i, hoff, len, num; + + i = (h & ((1ULL << tdb->header.v.hash_bits) - 1)); + hoff = tdb->header.v.hash_off + i * sizeof(tdb_off_t); + len = (1ULL << tdb->header.v.hash_bits) - i; + + /* Look for next space. */ + num = tdb_find_zero_off(tdb, hoff, len); + if (unlikely(num == len)) { + hoff = tdb->header.v.hash_off; + len = (1ULL << tdb->header.v.hash_bits); + num = tdb_find_zero_off(tdb, hoff, len); + if (i == len) + return -1; + } + /* FIXME: Encode extra hash bits! */ + return tdb_write_off(tdb, hoff + num * sizeof(tdb_off_t), off); +} + +static int unlink_used_record(struct tdb_context *tdb, tdb_off_t chain, + uint64_t *extra_locks) +{ + tdb_off_t num, len, i, hoff; + + /* FIXME: Maybe lock more in search? Maybe don't lock if scan + * finds none? */ +again: + len = (1ULL << tdb->header.v.hash_bits) - (chain + 1); + hoff = tdb->header.v.hash_off + (chain + 1) * sizeof(tdb_off_t); + num = tdb_find_zero_off(tdb, hoff, len); + + /* We want to lock the zero entry, too. In the wrap case, + * this locks one extra. That's harmless. */ + num++; + + for (i = chain + 1; i < chain + 1 + num; i++) { + if (tdb_lock_list(tdb, i, F_WRLCK, TDB_LOCK_WAIT) == -1) { + if (i != chain + 1) + unlock_lists(tdb, chain + 1, i-1, F_WRLCK); + return -1; + } + } + + /* The wrap case: we need those locks out of order! */ + if (unlikely(num == len + 1)) { + *extra_locks = tdb_find_zero_off(tdb, tdb->header.v.hash_off, + 1ULL << tdb->header.v.hash_bits); + (*extra_locks)++; + for (i = 0; i < *extra_locks; i++) { + if (tdb_lock_list(tdb, i, F_WRLCK, TDB_LOCK_NOWAIT)) { + /* Failed. Caller must lock in order. */ + if (i) + unlock_lists(tdb, 0, i-1, F_WRLCK); + unlock_lists(tdb, chain + 1, chain + num, + F_WRLCK); + return 1; + } + } + num += *extra_locks; + } + + /* Now we have the locks, be certain that offset is still 0! */ + hoff = tdb->header.v.hash_off + + (((chain + num) * sizeof(tdb_off_t)) + & ((1ULL << tdb->header.v.hash_bits) - 1)); + + if (unlikely(tdb_read_off(tdb, hoff) != 0)) { + unlock_lists(tdb, chain + 1, chain + num, F_WRLCK); + goto again; + } + + /* OK, all locked. Unlink first one. */ + hoff = tdb->header.v.hash_off + chain * sizeof(tdb_off_t); + if (tdb_write_off(tdb, hoff, 0) == -1) + goto unlock_err; + + /* Rehash the rest. */ + for (i = 1; i < num; i++) { + tdb_off_t off; + uint64_t h; + + hoff = tdb->header.v.hash_off + + (((chain + i) * sizeof(tdb_off_t)) + & ((1ULL << tdb->header.v.hash_bits) - 1)); + off = tdb_read_off(tdb, hoff); + if (unlikely(off == TDB_OFF_ERR)) + goto unlock_err; + + /* Maybe use a bit to indicate it is in ideal place? */ + h = hash_record(tdb, off); + /* Is it happy where it is? */ + if ((h & ((1ULL << tdb->header.v.hash_bits)-1)) == (chain + i)) + continue; + + /* Remove it. */ + if (tdb_write_off(tdb, hoff, 0) == -1) + goto unlock_err; + + /* Rehash it. */ + if (hash_add(tdb, h, off) == -1) + goto unlock_err; + } + unlock_lists(tdb, chain + 1, chain + num, F_WRLCK); + return 0; + +unlock_err: + unlock_lists(tdb, chain + 1, chain + num, F_WRLCK); + return -1; +} + +int tdb_delete(struct tdb_context *tdb, struct tdb_data key) +{ + tdb_off_t off, start, end, room, extra_locks = 0; + uint64_t h; + int ret; + + h = tdb_hash(tdb, key.dptr, key.dsize); + off = find_bucket_and_lock(tdb, &key, h, &start, &end, &room, F_WRLCK); + if (off == TDB_OFF_ERR) + return -1; + + if (off == 0) { + unlock_lists(tdb, start, end, F_WRLCK); + tdb->ecode = TDB_ERR_NOEXIST; + return -1; + } + + ret = unlink_used_record(tdb, end, &extra_locks); + if (unlikely(ret == 1)) { + unsigned int i; + + unlock_lists(tdb, start, end, F_WRLCK); + + /* We need extra locks at the start. */ + for (i = 0; i < extra_locks; i++) { + if (tdb_lock_list(tdb, i, F_WRLCK, TDB_LOCK_WAIT)) { + if (i) + unlock_lists(tdb, 0, i-1, F_WRLCK); + return -1; + } + } + /* Try again now we're holding more locks. */ + ret = tdb_delete(tdb, key); + unlock_lists(tdb, 0, i, F_WRLCK); + return ret; + } + unlock_lists(tdb, start, end, F_WRLCK); + return ret; +} + +int tdb_close(struct tdb_context *tdb) +{ + struct tdb_context **i; + int ret = 0; + + /* FIXME: + if (tdb->transaction) { + tdb_transaction_cancel(tdb); + } + */ + tdb_trace(tdb, "tdb_close"); + + if (tdb->map_ptr) { + if (tdb->flags & TDB_INTERNAL) + free(tdb->map_ptr); + else + tdb_munmap(tdb); + } + free((char *)tdb->name); + if (tdb->fd != -1) { + ret = close(tdb->fd); + tdb->fd = -1; + } + free(tdb->lockrecs); + + /* Remove from contexts list */ + for (i = &tdbs; *i; i = &(*i)->next) { + if (*i == tdb) { + *i = tdb->next; + break; + } + } + +#ifdef TDB_TRACE + close(tdb->tracefd); +#endif + free(tdb); + + return ret; +} diff --git a/ccan/tdb2/tdb2.h b/ccan/tdb2/tdb2.h new file mode 100644 index 00000000..48c5ba65 --- /dev/null +++ b/ccan/tdb2/tdb2.h @@ -0,0 +1,143 @@ +#ifndef CCAN_TDB2_H +#define CCAN_TDB2_H + +/* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 1999-2004 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _SAMBA_BUILD_ +/* For mode_t */ +#include +/* For O_* flags. */ +#include +/* For sig_atomic_t. */ +#include +/* For uint64_t */ +#include +#endif + +/* flags to tdb_store() */ +#define TDB_REPLACE 1 /* Unused */ +#define TDB_INSERT 2 /* Don't overwrite an existing entry */ +#define TDB_MODIFY 3 /* Don't create an existing entry */ + +/* flags for tdb_open() */ +#define TDB_DEFAULT 0 /* just a readability place holder */ +#define TDB_CLEAR_IF_FIRST 1 +#define TDB_INTERNAL 2 /* don't store on disk */ +#define TDB_NOLOCK 4 /* don't do any locking */ +#define TDB_NOMMAP 8 /* don't use mmap */ +#define TDB_CONVERT 16 /* convert endian (internal use) */ +#define TDB_BIGENDIAN 32 /* header is big-endian (internal use) */ +#define TDB_NOSYNC 64 /* don't use synchronous transactions */ +#define TDB_SEQNUM 128 /* maintain a sequence number */ +#define TDB_VOLATILE 256 /* Activate the per-hashchain freelist, default 5 */ +#define TDB_ALLOW_NESTING 512 /* Allow transactions to nest */ +#define TDB_DISALLOW_NESTING 1024 /* Disallow transactions to nest */ + +/* error codes */ +enum TDB_ERROR {TDB_SUCCESS=0, TDB_ERR_CORRUPT, TDB_ERR_IO, TDB_ERR_LOCK, + TDB_ERR_OOM, TDB_ERR_EXISTS, TDB_ERR_NOLOCK, TDB_ERR_LOCK_TIMEOUT, + TDB_ERR_NOEXIST, TDB_ERR_EINVAL, TDB_ERR_RDONLY, + TDB_ERR_NESTING}; + +/* debugging uses one of the following levels */ +enum tdb_debug_level {TDB_DEBUG_FATAL = 0, TDB_DEBUG_ERROR, + TDB_DEBUG_WARNING, TDB_DEBUG_TRACE}; + +typedef struct tdb_data { + unsigned char *dptr; + size_t dsize; +} TDB_DATA; + +#ifndef PRINTF_ATTRIBUTE +#if (__GNUC__ >= 3) +/** Use gcc attribute to check printf fns. a1 is the 1-based index of + * the parameter containing the format, and a2 the index of the first + * argument. Note that some gcc 2.x versions don't handle this + * properly **/ +#define PRINTF_ATTRIBUTE(a1, a2) __attribute__ ((format (__printf__, a1, a2))) +#else +#define PRINTF_ATTRIBUTE(a1, a2) +#endif +#endif + +struct tdb_context; + +/* FIXME: Make typesafe */ +typedef void (*tdb_logfn_t)(struct tdb_context *, enum tdb_debug_level, void *priv, const char *, ...) PRINTF_ATTRIBUTE(4, 5); +typedef uint64_t (*tdb_hashfn_t)(const void *key, size_t len, uint64_t seed, + void *priv); + +enum tdb_attribute_type { + TDB_ATTRIBUTE_LOG = 0, + TDB_ATTRIBUTE_HASH = 1 +}; + +struct tdb_attribute_base { + enum tdb_attribute_type attr; + union tdb_attribute *next; +}; + +struct tdb_attribute_log { + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */ + tdb_logfn_t log_fn; + void *log_private; +}; + +struct tdb_attribute_hash { + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */ + tdb_hashfn_t hash_fn; + void *hash_private; +}; + +union tdb_attribute { + struct tdb_attribute_base base; + struct tdb_attribute_log log; + struct tdb_attribute_hash hash; +}; + +struct tdb_context *tdb_open(const char *name, int tdb_flags, + int open_flags, mode_t mode, + union tdb_attribute *attributes); + +struct tdb_data tdb_fetch(struct tdb_context *tdb, struct tdb_data key); +int tdb_delete(struct tdb_context *tdb, struct tdb_data key); +int tdb_store(struct tdb_context *tdb, struct tdb_data key, struct tdb_data dbuf, int flag); +int tdb_close(struct tdb_context *tdb); +int tdb_check(struct tdb_context *tdb, + int (*check)(TDB_DATA key, TDB_DATA data, void *private_data), + void *private_data); + +extern struct tdb_data tdb_null; + +#ifdef __cplusplus +} +#endif + +#endif /* tdb2.h */ diff --git a/ccan/tdb2/test/run-encode.c b/ccan/tdb2/test/run-encode.c new file mode 100644 index 00000000..a6253fe4 --- /dev/null +++ b/ccan/tdb2/test/run-encode.c @@ -0,0 +1,40 @@ +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_used_record rec; + struct tdb_context tdb = { .log = null_log_fn, .log_priv = NULL }; + + plan_tests(64 + 32 + 48*6); + + /* We should be able to encode any data value. */ + for (i = 0; i < 64; i++) + ok1(set_header(&tdb, &rec, 0, 1ULL << i, 1ULL << i, 0) == 0); + + /* And any key and data with < 64 bits between them. */ + for (i = 0; i < 32; i++) { + tdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i; + ok1(set_header(&tdb, &rec, klen, dlen, klen + dlen, 0) == 0); + } + + /* We should neatly encode all values. */ + for (i = 0; i < 48; i++) { + uint64_t h = 1ULL << (i < 11 ? 63 - i : 63 - 10); + uint64_t klen = 1ULL << (i < 16 ? i : 15); + uint64_t dlen = 1ULL << i; + uint64_t xlen = 1ULL << (i < 32 ? i : 31); + ok1(set_header(&tdb, &rec, klen, dlen, klen + dlen + xlen, h) + == 0); + ok1(rec_key_length(&rec) == klen); + ok1(rec_data_length(&rec) == dlen); + ok1(rec_extra_padding(&rec) == xlen); + ok1(rec_hash(&rec) == h); + ok1(rec_magic(&rec) == TDB_MAGIC); + } + return exit_status(); +} diff --git a/ccan/tdb2/test/run-fls.c b/ccan/tdb2/test/run-fls.c new file mode 100644 index 00000000..4ecc6f85 --- /dev/null +++ b/ccan/tdb2/test/run-fls.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include + +static unsigned int dumb_fls(uint64_t num) +{ + int i; + + for (i = 63; i >= 0; i--) { + if (num & (1ULL << i)) + break; + } + return i + 1; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + + plan_tests(64 * 64 + 2); + + ok1(fls64(0) == 0); + ok1(dumb_fls(0) == 0); + + for (i = 0; i < 64; i++) { + for (j = 0; j < 64; j++) { + uint64_t val = (1ULL << i) | (1ULL << j); + ok(fls64(val) == dumb_fls(val), + "%llu -> %u should be %u", (long long)val, + fls64(val), dumb_fls(val)); + } + } + return exit_status(); +} -- 2.39.2